You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
140 lines
3.4 KiB
140 lines
3.4 KiB
|
|
import re |
|
|
|
|
|
class DLexToken: |
|
""" |
|
DLexToken contains: |
|
'id' - the ID of the token (match with the value DLexer.AddToken returns). |
|
'val' - the token's string. |
|
'lineNumber' - the line the token was encountered on. |
|
""" |
|
pass |
|
|
|
|
|
class DLexState: |
|
pass |
|
|
|
|
|
class DLexer: |
|
""" |
|
DLex is a simple lexer simulator. Here is how to use it. |
|
|
|
1. Call AddToken to add the regular expressions that it will parse. Add them in |
|
order of precedence. Store the value returned from AddToken so you can compare |
|
it to the token ID returned by GetToken to determine what kind of token was found. |
|
|
|
2. Call BeginRead or BeginReadFile to setup the initial file. |
|
|
|
3. Repeatedly call GetToken. |
|
If it returns None, then there are no more tokens that match your specifications. |
|
If it returns a value, then it is a DLexToken with. |
|
""" |
|
|
|
def __init__( self, bSkipWhitespace=1 ): |
|
self.__tokens = [] |
|
self.__curTokenID = 0 |
|
self.__notnewline = re.compile( '[^\\r\\n]*' ) |
|
|
|
self.__bSkipWhitespace = bSkipWhitespace |
|
if bSkipWhitespace: |
|
self.__whitespace = re.compile( '[ \\t\\f\\v]+' ) |
|
self.__newline = re.compile( '[\\r\\n]' ) |
|
|
|
|
|
def GetErrorTokenID( self ): |
|
return -1 |
|
|
|
|
|
def AddToken( self, expr, flags=0 ): |
|
tokenID = self.__curTokenID |
|
self.__tokens.append( [tokenID, re.compile( expr, flags )] ) |
|
self.__curTokenID += 1 |
|
return tokenID |
|
|
|
|
|
# Store and restore the state. |
|
def BackupState( self ): |
|
ret = DLexState() |
|
ret.lineNumber = self.__lineNumber |
|
ret.currentCharacter = self.__currentCharacter |
|
ret.fileLen = self.__fileLen |
|
return ret |
|
|
|
def RestoreState( self, state ): |
|
self.__lineNumber = state.lineNumber |
|
self.__currentCharacter = state.currentCharacter |
|
self.__fileLen = state.fileLen |
|
|
|
|
|
def BeginRead( self, str ): |
|
self.__curString = str |
|
self.__lineNumber = 1 |
|
self.__currentCharacter = 0 |
|
self.__fileLen = len( str ) |
|
|
|
|
|
def BeginReadFile( self, fileName ): |
|
file = open( fileName, 'r' ) |
|
self.BeginRead( file.read() ) |
|
file.close() |
|
|
|
|
|
def GetToken( self ): |
|
# Skip whitespace. |
|
self.__SkipWhitespace() |
|
|
|
# Now return the first token that we have a match for. |
|
for token in self.__tokens: |
|
m = token[1].match( self.__curString, self.__currentCharacter ) |
|
if m: |
|
ret = DLexToken() |
|
ret.id = token[0] |
|
ret.val = self.__curString[ m.start() : m.end() ] |
|
ret.lineNumber = self.__lineNumber |
|
self.__currentCharacter = m.end() |
|
return ret |
|
|
|
if self.__currentCharacter < self.__fileLen: |
|
print "NO MATCH FOR '%s'" % self.__curString[ self.__currentCharacter : self.__currentCharacter+35 ] |
|
ret = DLexToken() |
|
ret.id = self.GetErrorTokenID() |
|
ret.val = self.__curString[ self.__currentCharacter : ] |
|
self.__currentCharacter = self.__fileLen |
|
return ret |
|
#print "%d" % t |
|
|
|
return None |
|
|
|
|
|
def GetLineNumber( self ): |
|
return self.__lineNumber |
|
|
|
|
|
def GetPercentComplete( self ): |
|
return (self.__currentCharacter * 100) / self.__fileLen |
|
|
|
|
|
def GetLineContents( self ): |
|
m = self.__notnewline.match( self.__curString, self.__currentCharacter ) |
|
if m: |
|
return self.__curString[ m.start() : m.end() ] |
|
else: |
|
return "" |
|
|
|
|
|
def __SkipWhitespace( self ): |
|
if self.__bSkipWhitespace: |
|
while 1: |
|
a = self.__whitespace.match( self.__curString, self.__currentCharacter ) |
|
b = self.__newline.match( self.__curString, self.__currentCharacter ) |
|
if a: |
|
self.__currentCharacter = a.end() |
|
continue |
|
elif b: |
|
self.__currentCharacter = b.end() |
|
self.__lineNumber += 1 |
|
continue |
|
else: |
|
break |
|
|
|
|