from __future__ import print_function, division, absolute_import from __future__ import unicode_literals import codecs import os class LexerError(Exception): def __init__(self, message, location): Exception.__init__(self, message) self.location = location def __str__(self): message = Exception.__str__(self) if self.location: path, line, column = self.location return "%s:%d:%d: %s" % (path, line, column, message) else: return message class Lexer(object): NUMBER = "NUMBER" STRING = "STRING" NAME = "NAME" FILENAME = "FILENAME" GLYPHCLASS = "GLYPHCLASS" CID = "CID" SYMBOL = "SYMBOL" COMMENT = "COMMENT" NEWLINE = "NEWLINE" CHAR_WHITESPACE_ = " \t" CHAR_NEWLINE_ = "\r\n" CHAR_SYMBOL_ = ";:-+'{}[]<>()=" CHAR_DIGIT_ = "0123456789" CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" CHAR_NAME_START_ = CHAR_LETTER_ + "_.\\" CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_." MODE_NORMAL_ = "NORMAL" MODE_FILENAME_ = "FILENAME" def __init__(self, text, filename): self.filename_ = filename self.line_ = 1 self.pos_ = 0 self.line_start_ = 0 self.text_ = text self.text_length_ = len(text) self.mode_ = Lexer.MODE_NORMAL_ def __iter__(self): return self def next(self): # Python 2 return self.__next__() def __next__(self): # Python 3 while True: token_type, token, location = self.next_() if token_type not in {Lexer.COMMENT, Lexer.NEWLINE}: return (token_type, token, location) def next_(self): self.scan_over_(Lexer.CHAR_WHITESPACE_) column = self.pos_ - self.line_start_ + 1 location = (self.filename_, self.line_, column) start = self.pos_ text = self.text_ limit = len(text) if start >= limit: raise StopIteration() cur_char = text[start] next_char = text[start + 1] if start + 1 < limit else None if cur_char == "\n": self.pos_ += 1 self.line_ += 1 self.line_start_ = self.pos_ return (Lexer.NEWLINE, None, location) if cur_char == "\r": self.pos_ += (2 if next_char == "\n" else 1) self.line_ += 1 self.line_start_ = self.pos_ return (Lexer.NEWLINE, None, location) if cur_char == "#": self.scan_until_(Lexer.CHAR_NEWLINE_) return (Lexer.COMMENT, text[start:self.pos_], location) if self.mode_ is Lexer.MODE_FILENAME_: if cur_char != "(": raise LexerError("Expected '(' before file name", location) self.scan_until_(")") cur_char = text[self.pos_] if self.pos_ < limit else None if cur_char != ")": raise LexerError("Expected ')' after file name", location) self.pos_ += 1 self.mode_ = Lexer.MODE_NORMAL_ return (Lexer.FILENAME, text[start + 1:self.pos_ - 1], location) if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_: self.pos_ += 1 self.scan_over_(Lexer.CHAR_DIGIT_) return (Lexer.CID, int(text[start + 1:self.pos_], 10), location) if cur_char == "@": self.pos_ += 1 self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) glyphclass = text[start + 1:self.pos_] if len(glyphclass) < 1: raise LexerError("Expected glyph class name", location) if len(glyphclass) > 30: raise LexerError( "Glyph class names must not be longer than 30 characters", location) return (Lexer.GLYPHCLASS, glyphclass, location) if cur_char in Lexer.CHAR_NAME_START_: self.pos_ += 1 self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) token = text[start:self.pos_] if token == "include": self.mode_ = Lexer.MODE_FILENAME_ return (Lexer.NAME, token, location) if cur_char in Lexer.CHAR_DIGIT_: self.scan_over_(Lexer.CHAR_DIGIT_) return (Lexer.NUMBER, int(text[start:self.pos_], 10), location) if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_: self.pos_ += 1 self.scan_over_(Lexer.CHAR_DIGIT_) return (Lexer.NUMBER, int(text[start:self.pos_], 10), location) if cur_char in Lexer.CHAR_SYMBOL_: self.pos_ += 1 return (Lexer.SYMBOL, cur_char, location) if cur_char == '"': self.pos_ += 1 self.scan_until_('"\r\n') if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"': self.pos_ += 1 return (Lexer.STRING, text[start + 1:self.pos_ - 1], location) else: raise LexerError("Expected '\"' to terminate string", location) raise LexerError("Unexpected character: '%s'" % cur_char, location) def scan_over_(self, valid): p = self.pos_ while p < self.text_length_ and self.text_[p] in valid: p += 1 self.pos_ = p def scan_until_(self, stop_at): p = self.pos_ while p < self.text_length_ and self.text_[p] not in stop_at: p += 1 self.pos_ = p class IncludingLexer(object): def __init__(self, filename): self.lexers_ = [self.make_lexer_(filename, (filename, 0, 0))] def __iter__(self): return self def next(self): # Python 2 return self.__next__() def __next__(self): # Python 3 while self.lexers_: lexer = self.lexers_[-1] try: token_type, token, location = lexer.next() except StopIteration: self.lexers_.pop() continue if token_type is Lexer.NAME and token == "include": fname_type, fname_token, fname_location = lexer.next() if fname_type is not Lexer.FILENAME: raise LexerError("Expected file name", fname_location) semi_type, semi_token, semi_location = lexer.next() if semi_type is not Lexer.SYMBOL or semi_token != ";": raise LexerError("Expected ';'", semi_location) curpath, _ = os.path.split(lexer.filename_) path = os.path.join(curpath, fname_token) if len(self.lexers_) >= 5: raise LexerError("Too many recursive includes", fname_location) self.lexers_.append(self.make_lexer_(path, fname_location)) continue else: return (token_type, token, location) raise StopIteration() @staticmethod def make_lexer_(filename, location): try: with codecs.open(filename, "rb", "utf-8") as f: return Lexer(f.read(), filename) except IOError as err: raise LexerError(str(err), location)