fonttools/Lib/fontTools/feaLib/lexer.py

from __future__ import print_function, division, absolute_import
from __future__ import unicode_literals


class LexerError(Exception):
    def __init__(self, message, location):
        Exception.__init__(self, message)
        self.location = location


class Lexer:
    NUMBER = "NUMBER"
    STRING = "STRING"
    NAME = "NAME"
    FILENAME = "FILENAME"
    CID = "CID"
    SYMBOL = "SYMBOL"
    COMMENT = "COMMENT"
    NEWLINE = "NEWLINE"

    CHAR_WHITESPACE_ = " \t"
    CHAR_NEWLINE_ = "\r\n"
    CHAR_SYMBOL_ = ";:@-+'{}[]<>()"
    CHAR_DIGIT_ = "0123456789"
    CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    CHAR_NAME_START_ = CHAR_LETTER_ + "_.\\"
    CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_."

    MODE_NORMAL_ = "NORMAL"
    MODE_FILENAME_ = "FILENAME"

    def __init__(self, text, filename):
        self.filename_ = filename
        self.line_ = 1
        self.pos_ = 0
        self.line_start_ = 0
        self.text_ = text
        self.text_length_ = len(text)
        self.mode_ = Lexer.MODE_NORMAL_

    def __iter__(self):
        return self

    def next(self):  # Python 2
        return self.__next__()

    def __next__(self):  # Python 3
        while True:
            token_type, token, location = self.next_()
            if token_type not in {Lexer.COMMENT, Lexer.NEWLINE}:
                return (token_type, token, location)

    def next_(self):
        self.scan_over_(Lexer.CHAR_WHITESPACE_)
        column = self.pos_ - self.line_start_ + 1
        location = (self.filename_, self.line_, column)
        start = self.pos_
        text = self.text_
        limit = len(text)
        if start >= limit:
            raise StopIteration()
        cur_char = text[start]
        next_char = text[start + 1] if start + 1 < limit else None

        if cur_char == "\n":
            self.pos_ += 1
            self.line_ += 1
            self.line_start_ = self.pos_
            return (Lexer.NEWLINE, None, location)
        if cur_char == "\r":
            self.pos_ += (2 if next_char == "\n" else 1)
            self.line_ += 1
            self.line_start_ = self.pos_
            return (Lexer.NEWLINE, None, location)
        if cur_char == "#":
            self.scan_until_(Lexer.CHAR_NEWLINE_)
            return (Lexer.COMMENT, text[start:self.pos_], location)

        if self.mode_ is Lexer.MODE_FILENAME_:
            if cur_char != "(":
                raise LexerError("Expected '(' before file name", location)
            self.scan_until_(")")
            cur_char = text[self.pos_] if self.pos_ < limit else None
            if cur_char != ")":
                raise LexerError("Expected ')' after file name", location)
            self.pos_ += 1
            self.mode_ = Lexer.MODE_NORMAL_
            return (Lexer.FILENAME, text[start + 1:self.pos_ - 1], location)

        if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:
            self.pos_ += 1
            self.scan_over_(Lexer.CHAR_DIGIT_)
            return (Lexer.CID, int(text[start + 1:self.pos_], 10), location)
        if cur_char in Lexer.CHAR_NAME_START_:
            self.pos_ += 1
            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
            token = text[start:self.pos_]
            if token == "include":
                self.mode_ = Lexer.MODE_FILENAME_
            return (Lexer.NAME, token, location)
        if cur_char in Lexer.CHAR_DIGIT_:
            self.scan_over_(Lexer.CHAR_DIGIT_)
            return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
        if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
            self.pos_ += 1
            self.scan_over_(Lexer.CHAR_DIGIT_)
            return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
        if cur_char in Lexer.CHAR_SYMBOL_:
            self.pos_ += 1
            return (Lexer.SYMBOL, cur_char, location)
        if cur_char == '"':
            self.pos_ += 1
            self.scan_until_('"\r\n')
            if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
                self.pos_ += 1
                return (Lexer.STRING, text[start + 1:self.pos_ - 1], location)
            else:
                raise LexerError("Expected '\"' to terminate string", location)
        raise LexerError("Unexpected character: '%s'" % cur_char, location)

    def scan_over_(self, valid):
        p = self.pos_
        while p < self.text_length_ and self.text_[p] in valid:
            p += 1
        self.pos_ = p

    def scan_until_(self, stop_at):
        p = self.pos_
        while p < self.text_length_ and self.text_[p] not in stop_at:
            p += 1
        self.pos_ = p
Add lexer for OpenType feature files 2015-07-31 11:53:36 +02:00			`from __future__ import print_function, division, absolute_import`
			`from __future__ import unicode_literals`


			`class LexerError(Exception):`
			`def __init__(self, message, location):`
			`Exception.__init__(self, message)`
			`self.location = location`


			`class Lexer:`
			`NUMBER = "NUMBER"`
			`STRING = "STRING"`
			`NAME = "NAME"`
Support FILENAME tokens when lexing OpenType feature files When the lexer encounters the "include" keyword, it now enters a special mode for scanning file names. After having scanned over the file name, the lexer goes back to normal. The exact format of file name strings is not defined by the OpenType feature file specification, so we accept any character that is not a closing parenthesis. 2015-07-31 15:20:40 +02:00			`FILENAME = "FILENAME"`
Add lexer for OpenType feature files 2015-07-31 11:53:36 +02:00			`CID = "CID"`
			`SYMBOL = "SYMBOL"`
			`COMMENT = "COMMENT"`
			`NEWLINE = "NEWLINE"`

			`CHAR_WHITESPACE_ = " \t"`
			`CHAR_NEWLINE_ = "\r\n"`
			`CHAR_SYMBOL_ = ";:@-+'{}[]<>()"`
			`CHAR_DIGIT_ = "0123456789"`
			`CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"`
			`CHAR_NAME_START_ = CHAR_LETTER_ + "_.\\"`
			`CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_."`

Support FILENAME tokens when lexing OpenType feature files When the lexer encounters the "include" keyword, it now enters a special mode for scanning file names. After having scanned over the file name, the lexer goes back to normal. The exact format of file name strings is not defined by the OpenType feature file specification, so we accept any character that is not a closing parenthesis. 2015-07-31 15:20:40 +02:00			`MODE_NORMAL_ = "NORMAL"`
			`MODE_FILENAME_ = "FILENAME"`

Store file path in Lexer This simplifies the implementation of the parser for OpenType feature files, since it can now just keep token locations returned by the lexer. Before this change, the parser had to un-pack the location tuples and build new tuples that included the file path. 2015-07-31 14:35:54 +02:00			`def __init__(self, text, filename):`
			`self.filename_ = filename`
Add lexer for OpenType feature files 2015-07-31 11:53:36 +02:00			`self.line_ = 1`
			`self.pos_ = 0`
			`self.line_start_ = 0`
			`self.text_ = text`
			`self.text_length_ = len(text)`
Support FILENAME tokens when lexing OpenType feature files When the lexer encounters the "include" keyword, it now enters a special mode for scanning file names. After having scanned over the file name, the lexer goes back to normal. The exact format of file name strings is not defined by the OpenType feature file specification, so we accept any character that is not a closing parenthesis. 2015-07-31 15:20:40 +02:00			`self.mode_ = Lexer.MODE_NORMAL_`
Add lexer for OpenType feature files 2015-07-31 11:53:36 +02:00
			`def __iter__(self):`
			`return self`

			`def next(self): # Python 2`
			`return self.__next__()`

			`def __next__(self): # Python 3`
			`while True:`
			`token_type, token, location = self.next_()`
			`if token_type not in {Lexer.COMMENT, Lexer.NEWLINE}:`
			`return (token_type, token, location)`

			`def next_(self):`
			`self.scan_over_(Lexer.CHAR_WHITESPACE_)`
			`column = self.pos_ - self.line_start_ + 1`
Store file path in Lexer This simplifies the implementation of the parser for OpenType feature files, since it can now just keep token locations returned by the lexer. Before this change, the parser had to un-pack the location tuples and build new tuples that included the file path. 2015-07-31 14:35:54 +02:00			`location = (self.filename_, self.line_, column)`
Add lexer for OpenType feature files 2015-07-31 11:53:36 +02:00			`start = self.pos_`
			`text = self.text_`
			`limit = len(text)`
			`if start >= limit:`
			`raise StopIteration()`
			`cur_char = text[start]`
			`next_char = text[start + 1] if start + 1 < limit else None`
Support FILENAME tokens when lexing OpenType feature files When the lexer encounters the "include" keyword, it now enters a special mode for scanning file names. After having scanned over the file name, the lexer goes back to normal. The exact format of file name strings is not defined by the OpenType feature file specification, so we accept any character that is not a closing parenthesis. 2015-07-31 15:20:40 +02:00
Add lexer for OpenType feature files 2015-07-31 11:53:36 +02:00			`if cur_char == "\n":`
			`self.pos_ += 1`
			`self.line_ += 1`
			`self.line_start_ = self.pos_`
			`return (Lexer.NEWLINE, None, location)`
			`if cur_char == "\r":`
			`self.pos_ += (2 if next_char == "\n" else 1)`
			`self.line_ += 1`
			`self.line_start_ = self.pos_`
			`return (Lexer.NEWLINE, None, location)`
Support FILENAME tokens when lexing OpenType feature files When the lexer encounters the "include" keyword, it now enters a special mode for scanning file names. After having scanned over the file name, the lexer goes back to normal. The exact format of file name strings is not defined by the OpenType feature file specification, so we accept any character that is not a closing parenthesis. 2015-07-31 15:20:40 +02:00			`if cur_char == "#":`
			`self.scan_until_(Lexer.CHAR_NEWLINE_)`
			`return (Lexer.COMMENT, text[start:self.pos_], location)`

			`if self.mode_ is Lexer.MODE_FILENAME_:`
			`if cur_char != "(":`
			`raise LexerError("Expected '(' before file name", location)`
			`self.scan_until_(")")`
			`cur_char = text[self.pos_] if self.pos_ < limit else None`
			`if cur_char != ")":`
			`raise LexerError("Expected ')' after file name", location)`
			`self.pos_ += 1`
			`self.mode_ = Lexer.MODE_NORMAL_`
			`return (Lexer.FILENAME, text[start + 1:self.pos_ - 1], location)`

			`if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:`
			`self.pos_ += 1`
			`self.scan_over_(Lexer.CHAR_DIGIT_)`
			`return (Lexer.CID, int(text[start + 1:self.pos_], 10), location)`
			`if cur_char in Lexer.CHAR_NAME_START_:`
			`self.pos_ += 1`
			`self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)`
			`token = text[start:self.pos_]`
			`if token == "include":`
			`self.mode_ = Lexer.MODE_FILENAME_`
			`return (Lexer.NAME, token, location)`
Add lexer for OpenType feature files 2015-07-31 11:53:36 +02:00			`if cur_char in Lexer.CHAR_DIGIT_:`
			`self.scan_over_(Lexer.CHAR_DIGIT_)`
			`return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)`
			`if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:`
			`self.pos_ += 1`
			`self.scan_over_(Lexer.CHAR_DIGIT_)`
			`return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)`
			`if cur_char in Lexer.CHAR_SYMBOL_:`
			`self.pos_ += 1`
			`return (Lexer.SYMBOL, cur_char, location)`
			`if cur_char == '"':`
			`self.pos_ += 1`
			`self.scan_until_('"\r\n')`
			`if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':`
			`self.pos_ += 1`
			`return (Lexer.STRING, text[start + 1:self.pos_ - 1], location)`
			`else:`
			`raise LexerError("Expected '\"' to terminate string", location)`
			`raise LexerError("Unexpected character: '%s'" % cur_char, location)`

			`def scan_over_(self, valid):`
			`p = self.pos_`
			`while p < self.text_length_ and self.text_[p] in valid:`
			`p += 1`
			`self.pos_ = p`

			`def scan_until_(self, stop_at):`
			`p = self.pos_`
			`while p < self.text_length_ and self.text_[p] not in stop_at:`
			`p += 1`
			`self.pos_ = p`