233 lines
8.9 KiB
Python
233 lines
8.9 KiB
Python
from __future__ import print_function, division, absolute_import
|
|
from __future__ import unicode_literals
|
|
from fontTools.feaLib.lexer import Lexer, IncludingLexer
|
|
import fontTools.feaLib.ast as ast
|
|
import os
|
|
import re
|
|
|
|
|
|
class ParserError(Exception):
|
|
def __init__(self, message, location):
|
|
Exception.__init__(self, message)
|
|
self.location = location
|
|
|
|
def __str__(self):
|
|
message = Exception.__str__(self)
|
|
if self.location:
|
|
path, line, column = self.location
|
|
return "%s:%d:%d: %s" % (path, line, column, message)
|
|
else:
|
|
return message
|
|
|
|
|
|
class Parser(object):
|
|
def __init__(self, path):
|
|
self.doc_ = ast.FeatureFile()
|
|
self.glyphclasses_ = SymbolTable()
|
|
self.valuerecords_ = SymbolTable()
|
|
self.symbol_tables_ = [self.glyphclasses_, self.valuerecords_]
|
|
self.next_token_type_, self.next_token_ = (None, None)
|
|
self.next_token_location_ = None
|
|
self.lexer_ = IncludingLexer(path)
|
|
self.advance_lexer_()
|
|
|
|
def parse(self):
|
|
while self.next_token_type_ is not None:
|
|
self.advance_lexer_()
|
|
if self.cur_token_type_ is Lexer.GLYPHCLASS:
|
|
glyphclass = self.parse_glyphclass_definition_()
|
|
self.doc_.statements.append(glyphclass)
|
|
elif self.is_cur_keyword_("languagesystem"):
|
|
self.parse_languagesystem_()
|
|
elif self.is_cur_keyword_("feature"):
|
|
self.parse_feature_block_()
|
|
else:
|
|
raise ParserError("Expected languagesystem, feature, or "
|
|
"glyph class definition",
|
|
self.cur_token_location_)
|
|
return self.doc_
|
|
|
|
def parse_glyphclass_definition_(self):
|
|
location, name = self.cur_token_location_, self.cur_token_
|
|
self.expect_symbol_("=")
|
|
glyphs = self.parse_glyphclass_reference_()
|
|
self.expect_symbol_(";")
|
|
if self.glyphclasses_.resolve(name) is not None:
|
|
raise ParserError("Glyph class @%s already defined" % name,
|
|
location)
|
|
glyphclass = ast.GlyphClassDefinition(location, name, glyphs)
|
|
self.glyphclasses_.define(name, glyphclass)
|
|
return glyphclass
|
|
|
|
def parse_glyphclass_reference_(self):
|
|
result = set()
|
|
if self.next_token_type_ is Lexer.GLYPHCLASS:
|
|
self.advance_lexer_()
|
|
gc = self.glyphclasses_.resolve(self.cur_token_)
|
|
if gc is None:
|
|
raise ParserError("Unknown glyph class @%s" % self.cur_token_,
|
|
self.cur_token_location_)
|
|
result.update(gc.glyphs)
|
|
return result
|
|
|
|
self.expect_symbol_("[")
|
|
while self.next_token_ != "]":
|
|
self.advance_lexer_()
|
|
if self.cur_token_type_ is Lexer.NAME:
|
|
if self.next_token_ == "-":
|
|
range_location_ = self.cur_token_location_
|
|
range_start = self.cur_token_
|
|
self.expect_symbol_("-")
|
|
range_end = self.expect_name_()
|
|
result.update(self.make_glyph_range_(range_location_,
|
|
range_start,
|
|
range_end))
|
|
else:
|
|
result.add(self.cur_token_)
|
|
elif self.cur_token_type_ is Lexer.GLYPHCLASS:
|
|
gc = self.glyphclasses_.resolve(self.cur_token_)
|
|
if gc is None:
|
|
raise ParserError(
|
|
"Unknown glyph class @%s" % self.cur_token_,
|
|
self.cur_token_location_)
|
|
result.update(gc.glyphs)
|
|
else:
|
|
raise ParserError("Expected glyph name, range, or reference",
|
|
self.cur_token_location_)
|
|
self.expect_symbol_("]")
|
|
return result
|
|
|
|
def parse_languagesystem_(self):
|
|
assert self.cur_token_ == "languagesystem"
|
|
location = self.cur_token_location_
|
|
script, language = self.expect_tag_(), self.expect_tag_()
|
|
self.expect_symbol_(";")
|
|
langsys = ast.LanguageSystemStatement(location, script, language)
|
|
self.doc_.statements.append(langsys)
|
|
|
|
def parse_feature_block_(self):
|
|
assert self.cur_token_ == "feature"
|
|
location = self.cur_token_location_
|
|
tag = self.expect_tag_()
|
|
|
|
self.expect_symbol_("{")
|
|
for symtab in self.symbol_tables_:
|
|
symtab.enter_scope()
|
|
|
|
block = ast.FeatureBlock(location, tag)
|
|
self.doc_.statements.append(block)
|
|
|
|
while self.next_token_ != "}":
|
|
self.advance_lexer_()
|
|
if self.cur_token_type_ is Lexer.GLYPHCLASS:
|
|
block.statements.append(self.parse_glyphclass_definition_())
|
|
else:
|
|
raise ParserError("Expected glyph class definition",
|
|
self.cur_token_location_)
|
|
|
|
self.expect_symbol_("}")
|
|
for symtab in self.symbol_tables_:
|
|
symtab.exit_scope()
|
|
|
|
endtag = self.expect_tag_()
|
|
if tag != endtag:
|
|
raise ParserError("Expected \"%s\"" % tag.strip(),
|
|
self.cur_token_location_)
|
|
self.expect_symbol_(";")
|
|
|
|
def is_cur_keyword_(self, k):
|
|
return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k)
|
|
|
|
def expect_tag_(self):
|
|
self.advance_lexer_()
|
|
if self.cur_token_type_ is not Lexer.NAME:
|
|
raise ParserError("Expected a tag", self.cur_token_location_)
|
|
if len(self.cur_token_) > 4:
|
|
raise ParserError("Tags can not be longer than 4 characters",
|
|
self.cur_token_location_)
|
|
return (self.cur_token_ + " ")[:4]
|
|
|
|
def expect_symbol_(self, symbol):
|
|
self.advance_lexer_()
|
|
if self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == symbol:
|
|
return symbol
|
|
raise ParserError("Expected '%s'" % symbol, self.cur_token_location_)
|
|
|
|
def expect_name_(self):
|
|
self.advance_lexer_()
|
|
if self.cur_token_type_ is Lexer.NAME:
|
|
return self.cur_token_
|
|
raise ParserError("Expected a name", self.cur_token_location_)
|
|
|
|
def advance_lexer_(self):
|
|
self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
|
|
self.next_token_type_, self.next_token_, self.next_token_location_)
|
|
try:
|
|
(self.next_token_type_, self.next_token_,
|
|
self.next_token_location_) = self.lexer_.next()
|
|
except StopIteration:
|
|
self.next_token_type_, self.next_token_ = (None, None)
|
|
|
|
def make_glyph_range_(self, location, start, limit):
|
|
"""("a.sc", "d.sc") --> {"a.sc", "b.sc", "c.sc", "d.sc"}"""
|
|
result = set()
|
|
if len(start) != len(limit):
|
|
raise ParserError(
|
|
"Bad range: \"%s\" and \"%s\" should have the same length" %
|
|
(start, limit), location)
|
|
rev = lambda s: ''.join(reversed(list(s))) # string reversal
|
|
prefix = os.path.commonprefix([start, limit])
|
|
suffix = rev(os.path.commonprefix([rev(start), rev(limit)]))
|
|
if len(suffix) > 0:
|
|
start_range = start[len(prefix):-len(suffix)]
|
|
limit_range = limit[len(prefix):-len(suffix)]
|
|
else:
|
|
start_range = start[len(prefix):]
|
|
limit_range = limit[len(prefix):]
|
|
|
|
if start_range >= limit_range:
|
|
raise ParserError("Start of range must be smaller than its end",
|
|
location)
|
|
|
|
uppercase = re.compile(r'^[A-Z]$')
|
|
if uppercase.match(start_range) and uppercase.match(limit_range):
|
|
for c in range(ord(start_range), ord(limit_range) + 1):
|
|
result.add("%s%c%s" % (prefix, c, suffix))
|
|
return result
|
|
|
|
lowercase = re.compile(r'^[a-z]$')
|
|
if lowercase.match(start_range) and lowercase.match(limit_range):
|
|
for c in range(ord(start_range), ord(limit_range) + 1):
|
|
result.add("%s%c%s" % (prefix, c, suffix))
|
|
return result
|
|
|
|
digits = re.compile(r'^[0-9]{1,3}$')
|
|
if digits.match(start_range) and digits.match(limit_range):
|
|
for i in range(int(start_range, 10), int(limit_range, 10) + 1):
|
|
number = ("000" + str(i))[-len(start_range):]
|
|
result.add("%s%s%s" % (prefix, number, suffix))
|
|
return result
|
|
|
|
raise ParserError("Bad range: \"%s-%s\"" % (start, limit), location)
|
|
|
|
|
|
class SymbolTable(object):
|
|
def __init__(self):
|
|
self.scopes_ = [{}]
|
|
|
|
def enter_scope(self):
|
|
self.scopes_.append({})
|
|
|
|
def exit_scope(self):
|
|
self.scopes_.pop()
|
|
|
|
def define(self, name, item):
|
|
self.scopes_[-1][name] = item
|
|
|
|
def resolve(self, name):
|
|
for scope in reversed(self.scopes_):
|
|
item = scope.get(name)
|
|
if item:
|
|
return item
|
|
return None
|