fonttools/Lib/fontTools/feaLib/parser.py

from __future__ import print_function, division, absolute_import
from __future__ import unicode_literals
from fontTools.feaLib.lexer import Lexer, IncludingLexer
import fontTools.feaLib.ast as ast
import os
import re


class ParserError(Exception):
    def __init__(self, message, location):
        Exception.__init__(self, message)
        self.location = location

    def __str__(self):
        message = Exception.__str__(self)
        if self.location:
            path, line, column = self.location
            return "%s:%d:%d: %s" % (path, line, column, message)
        else:
            return message


class Parser(object):
    def __init__(self, path):
        self.doc_ = ast.FeatureFile()
        self.glyphclasses_ = SymbolTable()
        self.valuerecords_ = SymbolTable()
        self.symbol_tables_ = [self.glyphclasses_, self.valuerecords_]
        self.next_token_type_, self.next_token_ = (None, None)
        self.next_token_location_ = None
        self.lexer_ = IncludingLexer(path)
        self.advance_lexer_()

    def parse(self):
        while self.next_token_type_ is not None:
            self.advance_lexer_()
            if self.cur_token_type_ is Lexer.GLYPHCLASS:
                glyphclass = self.parse_glyphclass_definition_()
                self.doc_.statements.append(glyphclass)
            elif self.is_cur_keyword_("languagesystem"):
                self.parse_languagesystem_()
            elif self.is_cur_keyword_("feature"):
                self.parse_feature_block_()
            else:
                raise ParserError("Expected languagesystem, feature, or "
                                  "glyph class definition",
                                  self.cur_token_location_)
        return self.doc_

    def parse_glyphclass_definition_(self):
        location, name = self.cur_token_location_, self.cur_token_
        self.expect_symbol_("=")
        glyphs = self.parse_glyphclass_reference_()
        self.expect_symbol_(";")
        if self.glyphclasses_.resolve(name) is not None:
            raise ParserError("Glyph class @%s already defined" % name,
                              location)
        glyphclass = ast.GlyphClassDefinition(location, name, glyphs)
        self.glyphclasses_.define(name, glyphclass)
        return glyphclass

    def parse_glyphclass_reference_(self):
        result = set()
        if self.next_token_type_ is Lexer.GLYPHCLASS:
            self.advance_lexer_()
            gc = self.glyphclasses_.resolve(self.cur_token_)
            if gc is None:
                raise ParserError("Unknown glyph class @%s" % self.cur_token_,
                                  self.cur_token_location_)
            result.update(gc.glyphs)
            return result

        self.expect_symbol_("[")
        while self.next_token_ != "]":
            self.advance_lexer_()
            if self.cur_token_type_ is Lexer.NAME:
                if self.next_token_ == "-":
                    range_location_ = self.cur_token_location_
                    range_start = self.cur_token_
                    self.expect_symbol_("-")
                    range_end = self.expect_name_()
                    result.update(self.make_glyph_range_(range_location_,
                                                         range_start,
                                                         range_end))
                else:
                    result.add(self.cur_token_)
            elif self.cur_token_type_ is Lexer.GLYPHCLASS:
                gc = self.glyphclasses_.resolve(self.cur_token_)
                if gc is None:
                    raise ParserError(
                        "Unknown glyph class @%s" % self.cur_token_,
                        self.cur_token_location_)
                result.update(gc.glyphs)
            else:
                raise ParserError("Expected glyph name, range, or reference",
                                  self.cur_token_location_)
        self.expect_symbol_("]")
        return result

    def parse_languagesystem_(self):
        assert self.cur_token_ == "languagesystem"
        location = self.cur_token_location_
        script, language = self.expect_tag_(), self.expect_tag_()
        self.expect_symbol_(";")
        langsys = ast.LanguageSystemStatement(location, script, language)
        self.doc_.statements.append(langsys)

    def parse_feature_block_(self):
        assert self.cur_token_ == "feature"
        location = self.cur_token_location_
        tag = self.expect_tag_()

        self.expect_symbol_("{")
        for symtab in self.symbol_tables_:
            symtab.enter_scope()

        block = ast.FeatureBlock(location, tag)
        self.doc_.statements.append(block)

        while self.next_token_ != "}":
            self.advance_lexer_()
            if self.cur_token_type_ is Lexer.GLYPHCLASS:
                block.statements.append(self.parse_glyphclass_definition_())
            else:
                raise ParserError("Expected glyph class definition",
                                  self.cur_token_location_)

        self.expect_symbol_("}")
        for symtab in self.symbol_tables_:
            symtab.exit_scope()

        endtag = self.expect_tag_()
        if tag != endtag:
            raise ParserError("Expected \"%s\"" % tag.strip(),
                              self.cur_token_location_)
        self.expect_symbol_(";")

    def is_cur_keyword_(self, k):
        return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k)

    def expect_tag_(self):
        self.advance_lexer_()
        if self.cur_token_type_ is not Lexer.NAME:
            raise ParserError("Expected a tag", self.cur_token_location_)
        if len(self.cur_token_) > 4:
            raise ParserError("Tags can not be longer than 4 characters",
                              self.cur_token_location_)
        return (self.cur_token_ + "    ")[:4]

    def expect_symbol_(self, symbol):
        self.advance_lexer_()
        if self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == symbol:
            return symbol
        raise ParserError("Expected '%s'" % symbol, self.cur_token_location_)

    def expect_name_(self):
        self.advance_lexer_()
        if self.cur_token_type_ is Lexer.NAME:
            return self.cur_token_
        raise ParserError("Expected a name", self.cur_token_location_)

    def advance_lexer_(self):
        self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
            self.next_token_type_, self.next_token_, self.next_token_location_)
        try:
            (self.next_token_type_, self.next_token_,
             self.next_token_location_) = self.lexer_.next()
        except StopIteration:
            self.next_token_type_, self.next_token_ = (None, None)

    def make_glyph_range_(self, location, start, limit):
        """("a.sc", "d.sc") --> {"a.sc", "b.sc", "c.sc", "d.sc"}"""
        result = set()
        if len(start) != len(limit):
            raise ParserError(
                "Bad range: \"%s\" and \"%s\" should have the same length" %
                (start, limit), location)
        rev = lambda s: ''.join(reversed(list(s)))  # string reversal
        prefix = os.path.commonprefix([start, limit])
        suffix = rev(os.path.commonprefix([rev(start), rev(limit)]))
        if len(suffix) > 0:
            start_range = start[len(prefix):-len(suffix)]
            limit_range = limit[len(prefix):-len(suffix)]
        else:
            start_range = start[len(prefix):]
            limit_range = limit[len(prefix):]

        if start_range >= limit_range:
            raise ParserError("Start of range must be smaller than its end",
                              location)

        uppercase = re.compile(r'^[A-Z]$')
        if uppercase.match(start_range) and uppercase.match(limit_range):
            for c in range(ord(start_range), ord(limit_range) + 1):
                result.add("%s%c%s" % (prefix, c, suffix))
            return result

        lowercase = re.compile(r'^[a-z]$')
        if lowercase.match(start_range) and lowercase.match(limit_range):
            for c in range(ord(start_range), ord(limit_range) + 1):
                result.add("%s%c%s" % (prefix, c, suffix))
            return result

        digits = re.compile(r'^[0-9]{1,3}$')
        if digits.match(start_range) and digits.match(limit_range):
            for i in range(int(start_range, 10), int(limit_range, 10) + 1):
                number = ("000" + str(i))[-len(start_range):]
                result.add("%s%s%s" % (prefix, number, suffix))
            return result

        raise ParserError("Bad range: \"%s-%s\"" % (start, limit), location)


class SymbolTable(object):
    def __init__(self):
        self.scopes_ = [{}]

    def enter_scope(self):
        self.scopes_.append({})

    def exit_scope(self):
        self.scopes_.pop()

    def define(self, name, item):
        self.scopes_[-1][name] = item

    def resolve(self, name):
        for scope in reversed(self.scopes_):
            item = scope.get(name)
            if item:
                return item
        return None