From ba238344b1ca23962bf18b2e068fd4c1930dbb15 Mon Sep 17 00:00:00 2001 From: Sascha Brawer Date: Sat, 1 Aug 2015 17:34:02 +0200 Subject: [PATCH] [feaLib] Implement top-level glyph class definitions --- Lib/fontTools/feaLib/ast.py | 15 ++++- Lib/fontTools/feaLib/lexer.py | 10 ++- Lib/fontTools/feaLib/lexer_test.py | 14 +++- Lib/fontTools/feaLib/parser.py | 99 +++++++++++++++++++++++++---- Lib/fontTools/feaLib/parser_test.py | 56 ++++++++++++++++ 5 files changed, 179 insertions(+), 15 deletions(-) diff --git a/Lib/fontTools/feaLib/ast.py b/Lib/fontTools/feaLib/ast.py index 59a9072d9..7c268119d 100644 --- a/Lib/fontTools/feaLib/ast.py +++ b/Lib/fontTools/feaLib/ast.py @@ -6,7 +6,7 @@ def write(buffer, text): buffer.write(text.encode("utf-8")) -class FeatureFile: +class FeatureFile(object): def __init__(self): self.statements = [] @@ -15,7 +15,18 @@ class FeatureFile: s.write(out, linesep) -class LanguageSystemStatement: +class GlyphClassDefinition(object): + def __init__(self, location, name, glyphs): + self.location = location + self.name = name + self.glyphs = glyphs + + def write(self, out, linesep): + glyphs = " ".join(sorted(self.glyphs)) + write(out, "@%s = [%s];%s" % (self.name, glyphs, linesep)) + + +class LanguageSystemStatement(object): def __init__(self, location, script, language): self.location = location self.script, self.language = (script, language) diff --git a/Lib/fontTools/feaLib/lexer.py b/Lib/fontTools/feaLib/lexer.py index b544378eb..7d4c29fa6 100644 --- a/Lib/fontTools/feaLib/lexer.py +++ b/Lib/fontTools/feaLib/lexer.py @@ -23,6 +23,7 @@ class Lexer(object): STRING = "STRING" NAME = "NAME" FILENAME = "FILENAME" + GLYPHCLASS = "GLYPHCLASS" CID = "CID" SYMBOL = "SYMBOL" COMMENT = "COMMENT" @@ -30,7 +31,7 @@ class Lexer(object): CHAR_WHITESPACE_ = " \t" CHAR_NEWLINE_ = "\r\n" - CHAR_SYMBOL_ = ";:@-+'{}[]<>()" + CHAR_SYMBOL_ = ";:-+'{}[]<>()=" CHAR_DIGIT_ = "0123456789" CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" CHAR_NAME_START_ = CHAR_LETTER_ + "_.\\" @@ -101,6 +102,13 @@ class Lexer(object): self.pos_ += 1 self.scan_over_(Lexer.CHAR_DIGIT_) return (Lexer.CID, int(text[start + 1:self.pos_], 10), location) + if cur_char == "@": + self.pos_ += 1 + self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) + glyphclass = text[start + 1:self.pos_] + if len(glyphclass) < 1: + raise LexerError("Expected glyph class name", location) + return (Lexer.GLYPHCLASS, glyphclass, location) if cur_char in Lexer.CHAR_NAME_START_: self.pos_ += 1 self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) diff --git a/Lib/fontTools/feaLib/lexer_test.py b/Lib/fontTools/feaLib/lexer_test.py index 6fa3d42f0..92da8c588 100644 --- a/Lib/fontTools/feaLib/lexer_test.py +++ b/Lib/fontTools/feaLib/lexer_test.py @@ -20,6 +20,13 @@ class LexerErrorTest(unittest.TestCase): class LexerTest(unittest.TestCase): + def __init__(self, methodName): + unittest.TestCase.__init__(self, methodName) + # Python 3 renamed assertRaisesRegexp to assertRaisesRegex, + # and fires deprecation warnings if a program uses the old name. + if not hasattr(self, "assertRaisesRegex"): + self.assertRaisesRegex = self.assertRaisesRegexp + def test_empty(self): self.assertEqual(lex(""), []) self.assertEqual(lex(" \t "), []) @@ -34,6 +41,11 @@ class LexerTest(unittest.TestCase): def test_cid(self): self.assertEqual(lex("\\0 \\987"), [(Lexer.CID, 0), (Lexer.CID, 987)]) + def test_glyphclass(self): + self.assertEqual(lex("@Vowel.sc"), [(Lexer.GLYPHCLASS, "Vowel.sc")]) + self.assertRaisesRegex(LexerError, "Expected glyph class", lex, "@(a)") + self.assertRaisesRegex(LexerError, "Expected glyph class", lex, "@ A") + def test_include(self): self.assertEqual(lex("include (~/foo/bar baz.fea);"), [ (Lexer.NAME, "include"), @@ -81,7 +93,7 @@ class LexerTest(unittest.TestCase): for (_, _, loc) in Lexer(s, "test.fea")] self.assertEqual(locs("a b # Comment\n12 @x"), [ "test.fea:1:1", "test.fea:1:3", "test.fea:2:1", - "test.fea:2:4", "test.fea:2:5" + "test.fea:2:4" ]) def test_scan_over_(self): diff --git a/Lib/fontTools/feaLib/parser.py b/Lib/fontTools/feaLib/parser.py index 9a930e4df..d5e7256e3 100644 --- a/Lib/fontTools/feaLib/parser.py +++ b/Lib/fontTools/feaLib/parser.py @@ -1,8 +1,9 @@ from __future__ import print_function, division, absolute_import from __future__ import unicode_literals from fontTools.feaLib.lexer import Lexer, IncludingLexer - import fontTools.feaLib.ast as ast +import os +import re class ParserError(Exception): @@ -30,13 +31,46 @@ class Parser(object): def parse(self): while self.next_token_type_ is not None: - keyword = self.expect_keyword_({"feature", "languagesystem"}) - if keyword == "languagesystem": + self.advance_lexer_() + if self.cur_token_type_ is Lexer.GLYPHCLASS: + self.parse_glyphclass_definition_() + elif self.is_cur_keyword_("languagesystem"): self.parse_languagesystem_() - elif keyword == "feature": + elif self.is_cur_keyword_("feature"): break # TODO: Implement + else: + raise ParserError("Expected languagesystem, feature, or " + "glyph class definition", + self.cur_token_location_) return self.doc_ + def parse_glyphclass_definition_(self): + location, name = self.cur_token_location_, self.cur_token_ + self.expect_symbol_("=") + glyphs = self.parse_glyphclass_reference_() + self.expect_symbol_(";") + glyphclass = ast.GlyphClassDefinition(location, name, glyphs) + self.doc_.statements.append(glyphclass) + + def parse_glyphclass_reference_(self): + result = set() + self.expect_symbol_("[") + while self.next_token_ != "]": + if self.next_token_type_ is Lexer.NAME: + self.advance_lexer_() + if self.next_token_ == "-": + range_location_ = self.cur_token_location_ + range_start = self.cur_token_ + self.expect_symbol_("-") + range_end = self.expect_name_() + result.update(self.make_glyph_range_(range_location_, + range_start, + range_end)) + else: + result.add(self.cur_token_) + self.expect_symbol_("]") + return result + def parse_languagesystem_(self): location = self.cur_token_location_ script, language = self.expect_tag_(), self.expect_tag_() @@ -44,13 +78,8 @@ class Parser(object): langsys = ast.LanguageSystemStatement(location, script, language) self.doc_.statements.append(langsys) - def expect_keyword_(self, keywords): - self.advance_lexer_() - if self.cur_token_type_ is Lexer.NAME and self.cur_token_ in keywords: - return self.cur_token_ - s = ", ".join(sorted(list(keywords))) - raise ParserError("Expected one of %s" % s, - self.cur_token_location_) + def is_cur_keyword_(self, k): + return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k) def expect_tag_(self): self.advance_lexer_() @@ -67,6 +96,12 @@ class Parser(object): return symbol raise ParserError("Expected '%s'" % symbol, self.cur_token_location_) + def expect_name_(self): + self.advance_lexer_() + if self.cur_token_type_ is Lexer.NAME: + return self.cur_token_ + raise ParserError("Expected a name", self.cur_token_location_) + def advance_lexer_(self): self.cur_token_type_, self.cur_token_, self.cur_token_location_ = ( self.next_token_type_, self.next_token_, self.next_token_location_) @@ -75,3 +110,45 @@ class Parser(object): self.next_token_location_) = self.lexer_.next() except StopIteration: self.next_token_type_, self.next_token_ = (None, None) + + def make_glyph_range_(self, location, start, limit): + """("a.sc", "d.sc") --> {"a.sc", "b.sc", "c.sc", "d.sc"}""" + result = set() + if len(start) != len(limit): + raise ParserError( + "Bad range: \"%s\" and \"%s\" should have the same length" % + (start, limit), location) + rev = lambda s: ''.join(reversed(list(s))) # string reversal + prefix = os.path.commonprefix([start, limit]) + suffix = rev(os.path.commonprefix([rev(start), rev(limit)])) + if len(suffix) > 0: + start_range = start[len(prefix):-len(suffix)] + limit_range = limit[len(prefix):-len(suffix)] + else: + start_range = start[len(prefix):] + limit_range = limit[len(prefix):] + + if start_range >= limit_range: + raise ParserError("Start of range must be smaller than its end", + location) + + uppercase = re.compile(r'^[A-Z]$') + if uppercase.match(start_range) and uppercase.match(limit_range): + for c in range(ord(start_range), ord(limit_range) + 1): + result.add("%s%c%s" % (prefix, c, suffix)) + return result + + lowercase = re.compile(r'^[a-z]$') + if lowercase.match(start_range) and lowercase.match(limit_range): + for c in range(ord(start_range), ord(limit_range) + 1): + result.add("%s%c%s" % (prefix, c, suffix)) + return result + + digits = re.compile(r'^[0-9]{1,3}$') + if digits.match(start_range) and digits.match(limit_range): + for i in range(int(start_range, 10), int(limit_range, 10) + 1): + number = ("000" + str(i))[-len(start_range):] + result.add("%s%s%s" % (prefix, number, suffix)) + return result + + raise ParserError("Bad range: \"%s-%s\"" % (start, limit), location) diff --git a/Lib/fontTools/feaLib/parser_test.py b/Lib/fontTools/feaLib/parser_test.py index ea6f95801..572b98f71 100644 --- a/Lib/fontTools/feaLib/parser_test.py +++ b/Lib/fontTools/feaLib/parser_test.py @@ -19,6 +19,62 @@ class ParserTest(unittest.TestCase): if not hasattr(self, "assertRaisesRegex"): self.assertRaisesRegex = self.assertRaisesRegexp + def test_glyphclass(self): + [gc] = self.parse("@dash = [endash emdash figuredash];").statements + self.assertEqual(gc.name, "dash") + self.assertEqual(gc.glyphs, {"endash", "emdash", "figuredash"}) + + def test_glyphclass_range_uppercase(self): + [gc] = self.parse("@swashes = [X.swash-Z.swash];").statements + self.assertEqual(gc.name, "swashes") + self.assertEqual(gc.glyphs, {"X.swash", "Y.swash", "Z.swash"}) + + def test_glyphclass_range_lowercase(self): + [gc] = self.parse("@defg.sc = [d.sc-g.sc];").statements + self.assertEqual(gc.name, "defg.sc") + self.assertEqual(gc.glyphs, {"d.sc", "e.sc", "f.sc", "g.sc"}) + + def test_glyphclass_range_digit1(self): + [gc] = self.parse("@range = [foo.2-foo.5];").statements + self.assertEqual(gc.glyphs, {"foo.2", "foo.3", "foo.4", "foo.5"}) + + def test_glyphclass_range_digit2(self): + [gc] = self.parse("@range = [foo.09-foo.11];").statements + self.assertEqual(gc.glyphs, {"foo.09", "foo.10", "foo.11"}) + + def test_glyphclass_range_digit3(self): + [gc] = self.parse("@range = [foo.123-foo.125];").statements + self.assertEqual(gc.glyphs, {"foo.123", "foo.124", "foo.125"}) + + def test_glyphclass_range_bad(self): + self.assertRaisesRegex( + ParserError, + "Bad range: \"a\" and \"foobar\" should have the same length", + self.parse, "@bad = [a-foobar];") + self.assertRaisesRegex( + ParserError, "Bad range: \"A.swash-z.swash\"", + self.parse, "@bad = [A.swash-z.swash];") + self.assertRaisesRegex( + ParserError, "Start of range must be smaller than its end", + self.parse, "@bad = [B.swash-A.swash];") + self.assertRaisesRegex( + ParserError, "Bad range: \"foo.1234-foo.9876\"", + self.parse, "@bad = [foo.1234-foo.9876];") + + def test_glyphclass_range_mixed(self): + [gc] = self.parse("@range = [a foo.09-foo.11 X.sc-Z.sc];").statements + self.assertEqual(gc.glyphs, { + "a", "foo.09", "foo.10", "foo.11", "X.sc", "Y.sc", "Z.sc" + }) + + # TODO: self.parse("@foo = [a b]; @bar = [@foo];") + # TODO: self.parse("@foo = [a b]; @bar = @foo;") + + def test_glyphclass_empty(self): + [gc] = self.parse("@empty_set = [];").statements + self.assertEqual(gc.name, "empty_set") + self.assertEqual(gc.glyphs, set()) + def test_languagesystem(self): [langsys] = self.parse("languagesystem latn DEU;").statements self.assertEqual(langsys.script, "latn")