Velocity Reviews - Computer Hardware Reviews

Velocity Reviews > Newsgroups > Programming > Python > Error checking using regex ?

Reply
Thread Tools

Error checking using regex ?

 
 
Guy Robinson
Guest
Posts: n/a
 
      06-08-2004
I have the code below which parses an expression string and creates tokens.

Can anyone suggest the best of error checking for things like:

Valid variable only obj.attribute -whitespace allowed

test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
test( "ff*$24..55/ddr") #double .. and $ -invalid number
test( "ff*2/dd.r.ss r") #variable with double . -invalid variable

I can't see an efficient way of doing this so any suggestions appreciated.

TIA,

Guy

code:

import re
import time

re_par = '[\(\)]'
re_num = '[0-9]*\.?[0-9]+\E?[0-9]*'
re_opr = '[\*\/\+\-\^]'
re_cns = 'PI'
re_trg = 'SIN|COS|TAN|ASIN|ACOS|ATAN|SGN'
re_var = '[a-z_0-9\s]*\.?[a-z_0-9\s]*'

recom = re.compile( '(?P<token>%s|%s|%s|%s|%s|%s)'
%(re_par,re_num,re_opr,re_cns,re_trg,re_var) ,re.VERBOSE|re.IGNORECASE)

def test(str):
output = []
try:
r = recom.split(str)
for rr in r:
rr = rr.strip()
#test for blank string
if rr =='':
pass
else:
output.append(rr)
print output

except:
print 'error of some kind'

class stopwatch:

def __init__(self):

pass
def start(self):

self.t = time.time()
return 'starting timer'

def stop(self):

rstr = 'stopped at %f seconds' %(time.time() -self.t)
self.t = 0
return rstr

e = stopwatch()
print e.start()
test( "9" )
test( "9 + 3 + 6" )
test( "9 + 3 / 11" )
test( "( 9 + 3)" )
test( "(9+3) / 11" )
test( "9 - 12 - 6" )
test( "-9 - (12 - 6)" )
test( "2*3.14159" )
test( "3.1415926535*3.1415926535 / 10" )
test( "PI * PI / 10" )
test( "PI*PI/10" )
test( "PI^2" )
test( "6.02E23 * 8.048" )
test( "sin(PI/2)" )
test( "2^3^2" )
test( "2^9" )
test( "sgn(-2)" )
test( "sgn(0)" )
test( "sgn(0.1)" )
test( "ff*2" )
test( "ff*g g/2" )
test( "ff*2/dd.r r")
test( "5*4+300/(5-2)*(6+4)+4" )
test( "((5*4+300)/(5-2))*(6+4)+4" )
test( "(320/3)*10+4" )

#now test error expressions

test( "ff*2/dd.r..ss r") #additional ..ss and whitespace -invalid
variable
test( "ff*$24..55/ddr") #double .. -invalid number
test( "ff*2/dd.r.ss r") #variable with double . -invalid variable
#test( "ff*((w.w+3)-2") #no closing parentheses-to be tested when
evaluating expression

print e.stop()
 
Reply With Quote
 
 
 
 
Heiko Wundram
Guest
Posts: n/a
 
      06-08-2004
Am Dienstag, 8. Juni 2004 13:26 schrieb Guy Robinson:
> I have the code below which parses an expression string and creates tokens.


You cannot parse expressions using regular expressions, and neither check them
for error, as the language specified by regular expressions is not
"intelligent" enough to match braces (read any book on complexity theory
primers, you need a machine with state, such as a deterministic stack
machine, to check for matching braces).

Your best bet to be able to check an expression, and also to be able to parse
it, is to write a context free grammar for your syntax, try to parse the
string you're evaluating, and in case parsing fails, to complain that the
expression is invalid. If you're parsing Python expressions, your best bet is
to call functions from the compile module (which create a code object from a
Python expression which is callable using exec).

HTH!

Heiko.

 
Reply With Quote
 
 
 
 
Paul McGuire
Guest
Posts: n/a
 
      06-08-2004
"Guy Robinson" <(E-Mail Removed)-e-d.co.nz> wrote in message
news:ca47pc$e11$(E-Mail Removed)...
> I have the code below which parses an expression string and creates

tokens.
>
> Can anyone suggest the best of error checking for things like:
>
> Valid variable only obj.attribute -whitespace allowed
>
> test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
> test( "ff*$24..55/ddr") #double .. and $ -invalid number
> test( "ff*2/dd.r.ss r") #variable with double . -invalid variable
>
> I can't see an efficient way of doing this so any suggestions appreciated.
>
> TIA,
>
> Guy
>

<snip>

Guy -

Well, I recognize the test cases from an example that I include with
pyparsing. Are you trying to add support for variables to that example? If
so, here is the example, modified to support assignments to variables.

-- Paul

============================
# minimath.py (formerly fourfn.py)
#
# Demonstration of the parsing module, implementing a simple 4-function
expression parser,
# with support for scientific notation, and symbols for e and pi.
# Extended to add exponentiation and simple built-in functions.
# Extended to add variable assignment, storage, and evaluation, and
Python-like comments.
#
# Copyright 2003,2004 by Paul McGuire
#
from pyparsing import
Literal,CaselessLiteral,Word,Combine,Group,Optiona l,ZeroOrMore,OneOrMore,For
ward,nums,alphas,restOfLine,delimitedList
import math

variables = {}
exprStack = []

def pushFirst( str, loc, toks ):
global exprStack
if toks:
exprStack.append( toks[0] )
return toks

def assignVar( str, loc, toks ):
global exprStack
global variables
variables[ toks[0] ] = evaluateStack( exprStack )
pushFirst(str,loc,toks)


bnf = None
def BNF():
global bnf
if not bnf:
point = Literal( "." )
e = CaselessLiteral( "E" )
fnumber = Combine( Word( "+-"+nums, nums ) +
Optional( point + Optional( Word( nums ) ) ) +
Optional( e + Word( "+-"+nums, nums ) ) )
ident = Word(alphas, alphas+nums+"_$")
varident = delimitedList(ident,".",combine=True)

plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
pi = CaselessLiteral( "PI" )

expr = Forward()
atom = ( pi | e | fnumber | ident + lpar + expr + rpar |
varident ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )
factor = atom + ZeroOrMore( ( expop + expr ).setParseAction(
pushFirst ) )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction(
pushFirst ) )
expr << term + ZeroOrMore( ( addop + term ).setParseAction(
pushFirst ) )
assignment = (varident + "=" + expr).setParseAction( assignVar )

bnf = Optional( assignment | expr )

comment = "#" + restOfLine
bnf.ignore(comment)

return bnf

# map operator symbols to corresponding arithmetic operations
opn = { "+" : ( lambda a,b: a + b ),
"-" : ( lambda a,b: a - b ),
"*" : ( lambda a,b: a * b ),
"/" : ( lambda a,b: a / b ),
"^" : ( lambda a,b: a ** b ) }
fn = { "sin" : math.sin,
"cos" : math.cos,
"tan" : math.tan,
"abs" : abs,
"trunc" : ( lambda a: int(a) ),
"round" : ( lambda a: int(a+0.5) ),
"sgn" : ( lambda a: ( (a<0 and -1) or (a>0 and 1) or 0 ) ) }
def evaluateStack( s ):
global variables
if not s: return 0.0
op = s.pop()
if op in "+-*/^":
op2 = evaluateStack( s )
op1 = evaluateStack( s )
return opn[op]( op1, op2 )
elif op == "PI":
return 3.1415926535
elif op == "E":
return 2.718281828
elif op[0].isalpha():
if op in variables:
return variables[op]
fnarg = evaluateStack( s )
return (fn[op])( fnarg )
else:
return float( op )

if __name__ == "__main__":

def test( str ):
global exprStack
exprStack = []
results = BNF().parseString( str )
print str, "->", results, "=>", exprStack, "=", evaluateStack(
exprStack )

test( "9" )
test( "9 + 3 + 6" )
test( "9 + 3 / 11" )
test( "(9 + 3)" )
test( "(9+3) / 11" )
test( "9 - 12 - 6" )
test( "9 - (12 - 6)" )
test( "2*3.14159" )
test( "3.1415926535*3.1415926535 / 10" )
test( "PI * PI / 10" )
test( "PI*PI/10" )
test( "PI^2" )
test( "6.02E23 * 8.048" )
test( "e / 3" )
test( "sin(PI/2)" )
test( "trunc(E)" )
test( "E^PI" )
test( "2^3^2" )
test( "2^9" )
test( "sgn(-2)" )
test( "sgn(0)" )
test( "sgn(0.1)" )
test( "5*4+300/(5-2)*(6+4)+4" )
test( "((5*4+301)/(5-2))*(6+4)+4" )
test( "(321/3)*10+4" )
test( "# nothing but comments" )
test( "a = 2^10" )
test( "a^0.1 # same as 10th root of 1024" )
test( "c = a" )
test( "b=a" )
test( "b-c" )


 
Reply With Quote
 
Guy Robinson
Guest
Posts: n/a
 
      06-08-2004
Hi Paul,

Yep your examples I'm using this as a learning experience and have
looked at your code but I have specific requirements for integration
into another application.

I'm using the regex to create a list of tokens to be processed into a
postfix processing string. This is then offloaded to another class that
processes the string for each database row.

The speed to generate the postffix string isn't important. But the speed
to process for each database row is.

Guy

> "Guy Robinson" <(E-Mail Removed)-e-d.co.nz> wrote in message
> news:ca47pc$e11$(E-Mail Removed)...
>
>>I have the code below which parses an expression string and creates

>
> tokens.
>
>>Can anyone suggest the best of error checking for things like:
>>
>>Valid variable only obj.attribute -whitespace allowed
>>
>>test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
>>test( "ff*$24..55/ddr") #double .. and $ -invalid number
>>test( "ff*2/dd.r.ss r") #variable with double . -invalid variable
>>
>>I can't see an efficient way of doing this so any suggestions appreciated.
>>
>>TIA,
>>
>>Guy
>>

>
> <snip>
>
> Guy -
>
> Well, I recognize the test cases from an example that I include with
> pyparsing. Are you trying to add support for variables to that example? If
> so, here is the example, modified to support assignments to variables.
>
> -- Paul
>
> ============================
> # minimath.py (formerly fourfn.py)
> #
> # Demonstration of the parsing module, implementing a simple 4-function
> expression parser,
> # with support for scientific notation, and symbols for e and pi.
> # Extended to add exponentiation and simple built-in functions.
> # Extended to add variable assignment, storage, and evaluation, and
> Python-like comments.
> #
> # Copyright 2003,2004 by Paul McGuire
> #
> from pyparsing import
> Literal,CaselessLiteral,Word,Combine,Group,Optiona l,ZeroOrMore,OneOrMore,For
> ward,nums,alphas,restOfLine,delimitedList
> import math
>
> variables = {}
> exprStack = []
>
> def pushFirst( str, loc, toks ):
> global exprStack
> if toks:
> exprStack.append( toks[0] )
> return toks
>
> def assignVar( str, loc, toks ):
> global exprStack
> global variables
> variables[ toks[0] ] = evaluateStack( exprStack )
> pushFirst(str,loc,toks)
>
>
> bnf = None
> def BNF():
> global bnf
> if not bnf:
> point = Literal( "." )
> e = CaselessLiteral( "E" )
> fnumber = Combine( Word( "+-"+nums, nums ) +
> Optional( point + Optional( Word( nums ) ) ) +
> Optional( e + Word( "+-"+nums, nums ) ) )
> ident = Word(alphas, alphas+nums+"_$")
> varident = delimitedList(ident,".",combine=True)
>
> plus = Literal( "+" )
> minus = Literal( "-" )
> mult = Literal( "*" )
> div = Literal( "/" )
> lpar = Literal( "(" ).suppress()
> rpar = Literal( ")" ).suppress()
> addop = plus | minus
> multop = mult | div
> expop = Literal( "^" )
> pi = CaselessLiteral( "PI" )
>
> expr = Forward()
> atom = ( pi | e | fnumber | ident + lpar + expr + rpar |
> varident ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )
> factor = atom + ZeroOrMore( ( expop + expr ).setParseAction(
> pushFirst ) )
> term = factor + ZeroOrMore( ( multop + factor ).setParseAction(
> pushFirst ) )
> expr << term + ZeroOrMore( ( addop + term ).setParseAction(
> pushFirst ) )
> assignment = (varident + "=" + expr).setParseAction( assignVar )
>
> bnf = Optional( assignment | expr )
>
> comment = "#" + restOfLine
> bnf.ignore(comment)
>
> return bnf
>
> # map operator symbols to corresponding arithmetic operations
> opn = { "+" : ( lambda a,b: a + b ),
> "-" : ( lambda a,b: a - b ),
> "*" : ( lambda a,b: a * b ),
> "/" : ( lambda a,b: a / b ),
> "^" : ( lambda a,b: a ** b ) }
> fn = { "sin" : math.sin,
> "cos" : math.cos,
> "tan" : math.tan,
> "abs" : abs,
> "trunc" : ( lambda a: int(a) ),
> "round" : ( lambda a: int(a+0.5) ),
> "sgn" : ( lambda a: ( (a<0 and -1) or (a>0 and 1) or 0 ) ) }
> def evaluateStack( s ):
> global variables
> if not s: return 0.0
> op = s.pop()
> if op in "+-*/^":
> op2 = evaluateStack( s )
> op1 = evaluateStack( s )
> return opn[op]( op1, op2 )
> elif op == "PI":
> return 3.1415926535
> elif op == "E":
> return 2.718281828
> elif op[0].isalpha():
> if op in variables:
> return variables[op]
> fnarg = evaluateStack( s )
> return (fn[op])( fnarg )
> else:
> return float( op )
>
> if __name__ == "__main__":
>
> def test( str ):
> global exprStack
> exprStack = []
> results = BNF().parseString( str )
> print str, "->", results, "=>", exprStack, "=", evaluateStack(
> exprStack )
>
> test( "9" )
> test( "9 + 3 + 6" )
> test( "9 + 3 / 11" )
> test( "(9 + 3)" )
> test( "(9+3) / 11" )
> test( "9 - 12 - 6" )
> test( "9 - (12 - 6)" )
> test( "2*3.14159" )
> test( "3.1415926535*3.1415926535 / 10" )
> test( "PI * PI / 10" )
> test( "PI*PI/10" )
> test( "PI^2" )
> test( "6.02E23 * 8.048" )
> test( "e / 3" )
> test( "sin(PI/2)" )
> test( "trunc(E)" )
> test( "E^PI" )
> test( "2^3^2" )
> test( "2^9" )
> test( "sgn(-2)" )
> test( "sgn(0)" )
> test( "sgn(0.1)" )
> test( "5*4+300/(5-2)*(6+4)+4" )
> test( "((5*4+301)/(5-2))*(6+4)+4" )
> test( "(321/3)*10+4" )
> test( "# nothing but comments" )
> test( "a = 2^10" )
> test( "a^0.1 # same as 10th root of 1024" )
> test( "c = a" )
> test( "b=a" )
> test( "b-c" )
>
>

 
Reply With Quote
 
 
 
Reply

Thread Tools

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is On
HTML code is Off
Trackbacks are On
Pingbacks are On
Refbacks are Off


Similar Threads
Thread Thread Starter Forum Replies Last Post
How make regex that means "contains regex#1 but NOT regex#2" ?? seberino@spawar.navy.mil Python 3 07-01-2008 03:06 PM
String Pattern Matching: regex and Python regex documentation Xah Lee Java 1 09-22-2006 07:11 PM
Is ASP Validator Regex Engine Same As VS2003 Find Regex Engine? =?Utf-8?B?SmViQnVzaGVsbA==?= ASP .Net 2 10-22-2005 02:43 PM
Java regex imposture re: Perl regex compatibility a_c_Attlee@yahoo.com Java 2 05-06-2005 12:16 AM
perl regex to java regex Rick Venter Java 5 11-06-2003 10:55 AM



Advertisments