[feaLib] Implement top-level glyph class definitions

This commit is contained in:
Sascha Brawer 2015-08-01 17:34:02 +02:00
parent b86e30d7c6
commit ba238344b1
5 changed files with 179 additions and 15 deletions

View File

@ -6,7 +6,7 @@ def write(buffer, text):
buffer.write(text.encode("utf-8"))
class FeatureFile:
class FeatureFile(object):
def __init__(self):
self.statements = []
@ -15,7 +15,18 @@ class FeatureFile:
s.write(out, linesep)
class LanguageSystemStatement:
class GlyphClassDefinition(object):
def __init__(self, location, name, glyphs):
self.location = location
self.name = name
self.glyphs = glyphs
def write(self, out, linesep):
glyphs = " ".join(sorted(self.glyphs))
write(out, "@%s = [%s];%s" % (self.name, glyphs, linesep))
class LanguageSystemStatement(object):
def __init__(self, location, script, language):
self.location = location
self.script, self.language = (script, language)

View File

@ -23,6 +23,7 @@ class Lexer(object):
STRING = "STRING"
NAME = "NAME"
FILENAME = "FILENAME"
GLYPHCLASS = "GLYPHCLASS"
CID = "CID"
SYMBOL = "SYMBOL"
COMMENT = "COMMENT"
@ -30,7 +31,7 @@ class Lexer(object):
CHAR_WHITESPACE_ = " \t"
CHAR_NEWLINE_ = "\r\n"
CHAR_SYMBOL_ = ";:@-+'{}[]<>()"
CHAR_SYMBOL_ = ";:-+'{}[]<>()="
CHAR_DIGIT_ = "0123456789"
CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
CHAR_NAME_START_ = CHAR_LETTER_ + "_.\\"
@ -101,6 +102,13 @@ class Lexer(object):
self.pos_ += 1
self.scan_over_(Lexer.CHAR_DIGIT_)
return (Lexer.CID, int(text[start + 1:self.pos_], 10), location)
if cur_char == "@":
self.pos_ += 1
self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
glyphclass = text[start + 1:self.pos_]
if len(glyphclass) < 1:
raise LexerError("Expected glyph class name", location)
return (Lexer.GLYPHCLASS, glyphclass, location)
if cur_char in Lexer.CHAR_NAME_START_:
self.pos_ += 1
self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)

View File

@ -20,6 +20,13 @@ class LexerErrorTest(unittest.TestCase):
class LexerTest(unittest.TestCase):
def __init__(self, methodName):
unittest.TestCase.__init__(self, methodName)
# Python 3 renamed assertRaisesRegexp to assertRaisesRegex,
# and fires deprecation warnings if a program uses the old name.
if not hasattr(self, "assertRaisesRegex"):
self.assertRaisesRegex = self.assertRaisesRegexp
def test_empty(self):
self.assertEqual(lex(""), [])
self.assertEqual(lex(" \t "), [])
@ -34,6 +41,11 @@ class LexerTest(unittest.TestCase):
def test_cid(self):
self.assertEqual(lex("\\0 \\987"), [(Lexer.CID, 0), (Lexer.CID, 987)])
def test_glyphclass(self):
self.assertEqual(lex("@Vowel.sc"), [(Lexer.GLYPHCLASS, "Vowel.sc")])
self.assertRaisesRegex(LexerError, "Expected glyph class", lex, "@(a)")
self.assertRaisesRegex(LexerError, "Expected glyph class", lex, "@ A")
def test_include(self):
self.assertEqual(lex("include (~/foo/bar baz.fea);"), [
(Lexer.NAME, "include"),
@ -81,7 +93,7 @@ class LexerTest(unittest.TestCase):
for (_, _, loc) in Lexer(s, "test.fea")]
self.assertEqual(locs("a b # Comment\n12 @x"), [
"test.fea:1:1", "test.fea:1:3", "test.fea:2:1",
"test.fea:2:4", "test.fea:2:5"
"test.fea:2:4"
])
def test_scan_over_(self):

View File

@ -1,8 +1,9 @@
from __future__ import print_function, division, absolute_import
from __future__ import unicode_literals
from fontTools.feaLib.lexer import Lexer, IncludingLexer
import fontTools.feaLib.ast as ast
import os
import re
class ParserError(Exception):
@ -30,13 +31,46 @@ class Parser(object):
def parse(self):
while self.next_token_type_ is not None:
keyword = self.expect_keyword_({"feature", "languagesystem"})
if keyword == "languagesystem":
self.advance_lexer_()
if self.cur_token_type_ is Lexer.GLYPHCLASS:
self.parse_glyphclass_definition_()
elif self.is_cur_keyword_("languagesystem"):
self.parse_languagesystem_()
elif keyword == "feature":
elif self.is_cur_keyword_("feature"):
break # TODO: Implement
else:
raise ParserError("Expected languagesystem, feature, or "
"glyph class definition",
self.cur_token_location_)
return self.doc_
def parse_glyphclass_definition_(self):
location, name = self.cur_token_location_, self.cur_token_
self.expect_symbol_("=")
glyphs = self.parse_glyphclass_reference_()
self.expect_symbol_(";")
glyphclass = ast.GlyphClassDefinition(location, name, glyphs)
self.doc_.statements.append(glyphclass)
def parse_glyphclass_reference_(self):
result = set()
self.expect_symbol_("[")
while self.next_token_ != "]":
if self.next_token_type_ is Lexer.NAME:
self.advance_lexer_()
if self.next_token_ == "-":
range_location_ = self.cur_token_location_
range_start = self.cur_token_
self.expect_symbol_("-")
range_end = self.expect_name_()
result.update(self.make_glyph_range_(range_location_,
range_start,
range_end))
else:
result.add(self.cur_token_)
self.expect_symbol_("]")
return result
def parse_languagesystem_(self):
location = self.cur_token_location_
script, language = self.expect_tag_(), self.expect_tag_()
@ -44,13 +78,8 @@ class Parser(object):
langsys = ast.LanguageSystemStatement(location, script, language)
self.doc_.statements.append(langsys)
def expect_keyword_(self, keywords):
self.advance_lexer_()
if self.cur_token_type_ is Lexer.NAME and self.cur_token_ in keywords:
return self.cur_token_
s = ", ".join(sorted(list(keywords)))
raise ParserError("Expected one of %s" % s,
self.cur_token_location_)
def is_cur_keyword_(self, k):
return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k)
def expect_tag_(self):
self.advance_lexer_()
@ -67,6 +96,12 @@ class Parser(object):
return symbol
raise ParserError("Expected '%s'" % symbol, self.cur_token_location_)
def expect_name_(self):
self.advance_lexer_()
if self.cur_token_type_ is Lexer.NAME:
return self.cur_token_
raise ParserError("Expected a name", self.cur_token_location_)
def advance_lexer_(self):
self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
self.next_token_type_, self.next_token_, self.next_token_location_)
@ -75,3 +110,45 @@ class Parser(object):
self.next_token_location_) = self.lexer_.next()
except StopIteration:
self.next_token_type_, self.next_token_ = (None, None)
def make_glyph_range_(self, location, start, limit):
"""("a.sc", "d.sc") --> {"a.sc", "b.sc", "c.sc", "d.sc"}"""
result = set()
if len(start) != len(limit):
raise ParserError(
"Bad range: \"%s\" and \"%s\" should have the same length" %
(start, limit), location)
rev = lambda s: ''.join(reversed(list(s))) # string reversal
prefix = os.path.commonprefix([start, limit])
suffix = rev(os.path.commonprefix([rev(start), rev(limit)]))
if len(suffix) > 0:
start_range = start[len(prefix):-len(suffix)]
limit_range = limit[len(prefix):-len(suffix)]
else:
start_range = start[len(prefix):]
limit_range = limit[len(prefix):]
if start_range >= limit_range:
raise ParserError("Start of range must be smaller than its end",
location)
uppercase = re.compile(r'^[A-Z]$')
if uppercase.match(start_range) and uppercase.match(limit_range):
for c in range(ord(start_range), ord(limit_range) + 1):
result.add("%s%c%s" % (prefix, c, suffix))
return result
lowercase = re.compile(r'^[a-z]$')
if lowercase.match(start_range) and lowercase.match(limit_range):
for c in range(ord(start_range), ord(limit_range) + 1):
result.add("%s%c%s" % (prefix, c, suffix))
return result
digits = re.compile(r'^[0-9]{1,3}$')
if digits.match(start_range) and digits.match(limit_range):
for i in range(int(start_range, 10), int(limit_range, 10) + 1):
number = ("000" + str(i))[-len(start_range):]
result.add("%s%s%s" % (prefix, number, suffix))
return result
raise ParserError("Bad range: \"%s-%s\"" % (start, limit), location)

View File

@ -19,6 +19,62 @@ class ParserTest(unittest.TestCase):
if not hasattr(self, "assertRaisesRegex"):
self.assertRaisesRegex = self.assertRaisesRegexp
def test_glyphclass(self):
[gc] = self.parse("@dash = [endash emdash figuredash];").statements
self.assertEqual(gc.name, "dash")
self.assertEqual(gc.glyphs, {"endash", "emdash", "figuredash"})
def test_glyphclass_range_uppercase(self):
[gc] = self.parse("@swashes = [X.swash-Z.swash];").statements
self.assertEqual(gc.name, "swashes")
self.assertEqual(gc.glyphs, {"X.swash", "Y.swash", "Z.swash"})
def test_glyphclass_range_lowercase(self):
[gc] = self.parse("@defg.sc = [d.sc-g.sc];").statements
self.assertEqual(gc.name, "defg.sc")
self.assertEqual(gc.glyphs, {"d.sc", "e.sc", "f.sc", "g.sc"})
def test_glyphclass_range_digit1(self):
[gc] = self.parse("@range = [foo.2-foo.5];").statements
self.assertEqual(gc.glyphs, {"foo.2", "foo.3", "foo.4", "foo.5"})
def test_glyphclass_range_digit2(self):
[gc] = self.parse("@range = [foo.09-foo.11];").statements
self.assertEqual(gc.glyphs, {"foo.09", "foo.10", "foo.11"})
def test_glyphclass_range_digit3(self):
[gc] = self.parse("@range = [foo.123-foo.125];").statements
self.assertEqual(gc.glyphs, {"foo.123", "foo.124", "foo.125"})
def test_glyphclass_range_bad(self):
self.assertRaisesRegex(
ParserError,
"Bad range: \"a\" and \"foobar\" should have the same length",
self.parse, "@bad = [a-foobar];")
self.assertRaisesRegex(
ParserError, "Bad range: \"A.swash-z.swash\"",
self.parse, "@bad = [A.swash-z.swash];")
self.assertRaisesRegex(
ParserError, "Start of range must be smaller than its end",
self.parse, "@bad = [B.swash-A.swash];")
self.assertRaisesRegex(
ParserError, "Bad range: \"foo.1234-foo.9876\"",
self.parse, "@bad = [foo.1234-foo.9876];")
def test_glyphclass_range_mixed(self):
[gc] = self.parse("@range = [a foo.09-foo.11 X.sc-Z.sc];").statements
self.assertEqual(gc.glyphs, {
"a", "foo.09", "foo.10", "foo.11", "X.sc", "Y.sc", "Z.sc"
})
# TODO: self.parse("@foo = [a b]; @bar = [@foo];")
# TODO: self.parse("@foo = [a b]; @bar = @foo;")
def test_glyphclass_empty(self):
[gc] = self.parse("@empty_set = [];").statements
self.assertEqual(gc.name, "empty_set")
self.assertEqual(gc.glyphs, set())
def test_languagesystem(self):
[langsys] = self.parse("languagesystem latn DEU;").statements
self.assertEqual(langsys.script, "latn")