import re class DLexToken: """ DLexToken contains: 'id' - the ID of the token (match with the value DLexer.AddToken returns). 'val' - the token's string. 'lineNumber' - the line the token was encountered on. """ pass class DLexState: pass class DLexer: """ DLex is a simple lexer simulator. Here is how to use it. 1. Call AddToken to add the regular expressions that it will parse. Add them in order of precedence. Store the value returned from AddToken so you can compare it to the token ID returned by GetToken to determine what kind of token was found. 2. Call BeginRead or BeginReadFile to setup the initial file. 3. Repeatedly call GetToken. If it returns None, then there are no more tokens that match your specifications. If it returns a value, then it is a DLexToken with. """ def __init__( self, bSkipWhitespace=1 ): self.__tokens = [] self.__curTokenID = 0 self.__notnewline = re.compile( '[^\\r\\n]*' ) self.__bSkipWhitespace = bSkipWhitespace if bSkipWhitespace: self.__whitespace = re.compile( '[ \\t\\f\\v]+' ) self.__newline = re.compile( '[\\r\\n]' ) def GetErrorTokenID( self ): return -1 def AddToken( self, expr, flags=0 ): tokenID = self.__curTokenID self.__tokens.append( [tokenID, re.compile( expr, flags )] ) self.__curTokenID += 1 return tokenID # Store and restore the state. def BackupState( self ): ret = DLexState() ret.lineNumber = self.__lineNumber ret.currentCharacter = self.__currentCharacter ret.fileLen = self.__fileLen return ret def RestoreState( self, state ): self.__lineNumber = state.lineNumber self.__currentCharacter = state.currentCharacter self.__fileLen = state.fileLen def BeginRead( self, str ): self.__curString = str self.__lineNumber = 1 self.__currentCharacter = 0 self.__fileLen = len( str ) def BeginReadFile( self, fileName ): file = open( fileName, 'r' ) self.BeginRead( file.read() ) file.close() def GetToken( self ): # Skip whitespace. self.__SkipWhitespace() # Now return the first token that we have a match for. for token in self.__tokens: m = token[1].match( self.__curString, self.__currentCharacter ) if m: ret = DLexToken() ret.id = token[0] ret.val = self.__curString[ m.start() : m.end() ] ret.lineNumber = self.__lineNumber self.__currentCharacter = m.end() return ret if self.__currentCharacter < self.__fileLen: print "NO MATCH FOR '%s'" % self.__curString[ self.__currentCharacter : self.__currentCharacter+35 ] ret = DLexToken() ret.id = self.GetErrorTokenID() ret.val = self.__curString[ self.__currentCharacter : ] self.__currentCharacter = self.__fileLen return ret #print "%d" % t return None def GetLineNumber( self ): return self.__lineNumber def GetPercentComplete( self ): return (self.__currentCharacter * 100) / self.__fileLen def GetLineContents( self ): m = self.__notnewline.match( self.__curString, self.__currentCharacter ) if m: return self.__curString[ m.start() : m.end() ] else: return "" def __SkipWhitespace( self ): if self.__bSkipWhitespace: while 1: a = self.__whitespace.match( self.__curString, self.__currentCharacter ) b = self.__newline.match( self.__curString, self.__currentCharacter ) if a: self.__currentCharacter = a.end() continue elif b: self.__currentCharacter = b.end() self.__lineNumber += 1 continue else: break