[feaLib] Implement top-level glyph class definitions

2015-08-01 17:34:02 +02:00 · 2015-08-01 17:34:02 +02:00 · ba238344b1
commit ba238344b1
parent b86e30d7c6
5 changed files with 179 additions and 15 deletions
--- a/Lib/fontTools/feaLib/ast.py
+++ b/Lib/fontTools/feaLib/ast.py
@ -6,7 +6,7 @@ def write(buffer, text):
    buffer.write(text.encode("utf-8"))


-class FeatureFile:
+class FeatureFile(object):
    def __init__(self):
        self.statements = []

@ -15,7 +15,18 @@ class FeatureFile:
            s.write(out, linesep)


-class LanguageSystemStatement:
+class GlyphClassDefinition(object):
+    def __init__(self, location, name, glyphs):
+        self.location = location
+        self.name = name
+        self.glyphs = glyphs
+
+    def write(self, out, linesep):
+        glyphs = " ".join(sorted(self.glyphs))
+        write(out, "@%s = [%s];%s" % (self.name, glyphs, linesep))
+
+
+class LanguageSystemStatement(object):
    def __init__(self, location, script, language):
        self.location = location
        self.script, self.language = (script, language)
--- a/Lib/fontTools/feaLib/lexer.py
+++ b/Lib/fontTools/feaLib/lexer.py
@ -23,6 +23,7 @@ class Lexer(object):
    STRING = "STRING"
    NAME = "NAME"
    FILENAME = "FILENAME"
+    GLYPHCLASS = "GLYPHCLASS"
    CID = "CID"
    SYMBOL = "SYMBOL"
    COMMENT = "COMMENT"
@ -30,7 +31,7 @@ class Lexer(object):

    CHAR_WHITESPACE_ = " \t"
    CHAR_NEWLINE_ = "\r\n"
-    CHAR_SYMBOL_ = ";:@-+'{}[]<>()"
+    CHAR_SYMBOL_ = ";:-+'{}[]<>()="
    CHAR_DIGIT_ = "0123456789"
    CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    CHAR_NAME_START_ = CHAR_LETTER_ + "_.\\"
@ -101,6 +102,13 @@ class Lexer(object):
            self.pos_ += 1
            self.scan_over_(Lexer.CHAR_DIGIT_)
            return (Lexer.CID, int(text[start + 1:self.pos_], 10), location)
+        if cur_char == "@":
+            self.pos_ += 1
+            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
+            glyphclass = text[start + 1:self.pos_]
+            if len(glyphclass) < 1:
+                raise LexerError("Expected glyph class name", location)
+            return (Lexer.GLYPHCLASS, glyphclass, location)
        if cur_char in Lexer.CHAR_NAME_START_:
            self.pos_ += 1
            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
--- a/Lib/fontTools/feaLib/lexer_test.py
+++ b/Lib/fontTools/feaLib/lexer_test.py
@ -20,6 +20,13 @@ class LexerErrorTest(unittest.TestCase):


 class LexerTest(unittest.TestCase):
+    def __init__(self, methodName):
+        unittest.TestCase.__init__(self, methodName)
+        # Python 3 renamed assertRaisesRegexp to assertRaisesRegex,
+        # and fires deprecation warnings if a program uses the old name.
+        if not hasattr(self, "assertRaisesRegex"):
+            self.assertRaisesRegex = self.assertRaisesRegexp
+
    def test_empty(self):
        self.assertEqual(lex(""), [])
        self.assertEqual(lex(" \t "), [])
@ -34,6 +41,11 @@ class LexerTest(unittest.TestCase):
    def test_cid(self):
        self.assertEqual(lex("\\0 \\987"), [(Lexer.CID, 0), (Lexer.CID, 987)])

+    def test_glyphclass(self):
+        self.assertEqual(lex("@Vowel.sc"), [(Lexer.GLYPHCLASS, "Vowel.sc")])
+        self.assertRaisesRegex(LexerError, "Expected glyph class", lex, "@(a)")
+        self.assertRaisesRegex(LexerError, "Expected glyph class", lex, "@ A")
+
    def test_include(self):
        self.assertEqual(lex("include (~/foo/bar baz.fea);"), [
            (Lexer.NAME, "include"),
@ -81,7 +93,7 @@ class LexerTest(unittest.TestCase):
                          for (_, _, loc) in Lexer(s, "test.fea")]
        self.assertEqual(locs("a b # Comment\n12 @x"), [
            "test.fea:1:1", "test.fea:1:3", "test.fea:2:1",
-            "test.fea:2:4", "test.fea:2:5"
+            "test.fea:2:4"
        ])

    def test_scan_over_(self):
--- a/Lib/fontTools/feaLib/parser.py
+++ b/Lib/fontTools/feaLib/parser.py
@ -1,8 +1,9 @@
 from __future__ import print_function, division, absolute_import
 from __future__ import unicode_literals
 from fontTools.feaLib.lexer import Lexer, IncludingLexer
-
 import fontTools.feaLib.ast as ast
+import os
+import re


 class ParserError(Exception):
@ -30,13 +31,46 @@ class Parser(object):

    def parse(self):
        while self.next_token_type_ is not None:
-            keyword = self.expect_keyword_({"feature", "languagesystem"})
-            if keyword == "languagesystem":
+            self.advance_lexer_()
+            if self.cur_token_type_ is Lexer.GLYPHCLASS:
+                self.parse_glyphclass_definition_()
+            elif self.is_cur_keyword_("languagesystem"):
                self.parse_languagesystem_()
-            elif keyword == "feature":
+            elif self.is_cur_keyword_("feature"):
                break  # TODO: Implement
+            else:
+                raise ParserError("Expected languagesystem, feature, or "
+                                  "glyph class definition",
+                                  self.cur_token_location_)
        return self.doc_

+    def parse_glyphclass_definition_(self):
+        location, name = self.cur_token_location_, self.cur_token_
+        self.expect_symbol_("=")
+        glyphs = self.parse_glyphclass_reference_()
+        self.expect_symbol_(";")
+        glyphclass = ast.GlyphClassDefinition(location, name, glyphs)
+        self.doc_.statements.append(glyphclass)
+
+    def parse_glyphclass_reference_(self):
+        result = set()
+        self.expect_symbol_("[")
+        while self.next_token_ != "]":
+            if self.next_token_type_ is Lexer.NAME:
+                self.advance_lexer_()
+                if self.next_token_ == "-":
+                    range_location_ = self.cur_token_location_
+                    range_start = self.cur_token_
+                    self.expect_symbol_("-")
+                    range_end = self.expect_name_()
+                    result.update(self.make_glyph_range_(range_location_,
+                                                         range_start,
+                                                         range_end))
+                else:
+                    result.add(self.cur_token_)
+        self.expect_symbol_("]")
+        return result
+
    def parse_languagesystem_(self):
        location = self.cur_token_location_
        script, language = self.expect_tag_(), self.expect_tag_()
@ -44,13 +78,8 @@ class Parser(object):
        langsys = ast.LanguageSystemStatement(location, script, language)
        self.doc_.statements.append(langsys)

-    def expect_keyword_(self, keywords):
-        self.advance_lexer_()
-        if self.cur_token_type_ is Lexer.NAME and self.cur_token_ in keywords:
-            return self.cur_token_
-        s = ", ".join(sorted(list(keywords)))
-        raise ParserError("Expected one of %s" % s,
-                          self.cur_token_location_)
+    def is_cur_keyword_(self, k):
+        return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k)

    def expect_tag_(self):
        self.advance_lexer_()
@ -67,6 +96,12 @@ class Parser(object):
            return symbol
        raise ParserError("Expected '%s'" % symbol, self.cur_token_location_)

+    def expect_name_(self):
+        self.advance_lexer_()
+        if self.cur_token_type_ is Lexer.NAME:
+            return self.cur_token_
+        raise ParserError("Expected a name", self.cur_token_location_)
+
    def advance_lexer_(self):
        self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
            self.next_token_type_, self.next_token_, self.next_token_location_)
@ -75,3 +110,45 @@ class Parser(object):
             self.next_token_location_) = self.lexer_.next()
        except StopIteration:
            self.next_token_type_, self.next_token_ = (None, None)
+
+    def make_glyph_range_(self, location, start, limit):
+        """("a.sc", "d.sc") --> {"a.sc", "b.sc", "c.sc", "d.sc"}"""
+        result = set()
+        if len(start) != len(limit):
+            raise ParserError(
+                "Bad range: \"%s\" and \"%s\" should have the same length" %
+                (start, limit), location)
+        rev = lambda s: ''.join(reversed(list(s)))  # string reversal
+        prefix = os.path.commonprefix([start, limit])
+        suffix = rev(os.path.commonprefix([rev(start), rev(limit)]))
+        if len(suffix) > 0:
+            start_range = start[len(prefix):-len(suffix)]
+            limit_range = limit[len(prefix):-len(suffix)]
+        else:
+            start_range = start[len(prefix):]
+            limit_range = limit[len(prefix):]
+
+        if start_range >= limit_range:
+            raise ParserError("Start of range must be smaller than its end",
+                              location)
+
+        uppercase = re.compile(r'^[A-Z]$')
+        if uppercase.match(start_range) and uppercase.match(limit_range):
+            for c in range(ord(start_range), ord(limit_range) + 1):
+                result.add("%s%c%s" % (prefix, c, suffix))
+            return result
+
+        lowercase = re.compile(r'^[a-z]$')
+        if lowercase.match(start_range) and lowercase.match(limit_range):
+            for c in range(ord(start_range), ord(limit_range) + 1):
+                result.add("%s%c%s" % (prefix, c, suffix))
+            return result
+
+        digits = re.compile(r'^[0-9]{1,3}$')
+        if digits.match(start_range) and digits.match(limit_range):
+            for i in range(int(start_range, 10), int(limit_range, 10) + 1):
+                number = ("000" + str(i))[-len(start_range):]
+                result.add("%s%s%s" % (prefix, number, suffix))
+            return result
+
+        raise ParserError("Bad range: \"%s-%s\"" % (start, limit), location)
--- a/Lib/fontTools/feaLib/parser_test.py
+++ b/Lib/fontTools/feaLib/parser_test.py
@ -19,6 +19,62 @@ class ParserTest(unittest.TestCase):
        if not hasattr(self, "assertRaisesRegex"):
            self.assertRaisesRegex = self.assertRaisesRegexp

+    def test_glyphclass(self):
+        [gc] = self.parse("@dash = [endash emdash figuredash];").statements
+        self.assertEqual(gc.name, "dash")
+        self.assertEqual(gc.glyphs, {"endash", "emdash", "figuredash"})
+
+    def test_glyphclass_range_uppercase(self):
+        [gc] = self.parse("@swashes = [X.swash-Z.swash];").statements
+        self.assertEqual(gc.name, "swashes")
+        self.assertEqual(gc.glyphs, {"X.swash", "Y.swash", "Z.swash"})
+
+    def test_glyphclass_range_lowercase(self):
+        [gc] = self.parse("@defg.sc = [d.sc-g.sc];").statements
+        self.assertEqual(gc.name, "defg.sc")
+        self.assertEqual(gc.glyphs, {"d.sc", "e.sc", "f.sc", "g.sc"})
+
+    def test_glyphclass_range_digit1(self):
+        [gc] = self.parse("@range = [foo.2-foo.5];").statements
+        self.assertEqual(gc.glyphs, {"foo.2", "foo.3", "foo.4", "foo.5"})
+
+    def test_glyphclass_range_digit2(self):
+        [gc] = self.parse("@range = [foo.09-foo.11];").statements
+        self.assertEqual(gc.glyphs, {"foo.09", "foo.10", "foo.11"})
+
+    def test_glyphclass_range_digit3(self):
+        [gc] = self.parse("@range = [foo.123-foo.125];").statements
+        self.assertEqual(gc.glyphs, {"foo.123", "foo.124", "foo.125"})
+
+    def test_glyphclass_range_bad(self):
+        self.assertRaisesRegex(
+            ParserError,
+            "Bad range: \"a\" and \"foobar\" should have the same length",
+            self.parse, "@bad = [a-foobar];")
+        self.assertRaisesRegex(
+            ParserError, "Bad range: \"A.swash-z.swash\"",
+            self.parse, "@bad = [A.swash-z.swash];")
+        self.assertRaisesRegex(
+            ParserError, "Start of range must be smaller than its end",
+            self.parse, "@bad = [B.swash-A.swash];")
+        self.assertRaisesRegex(
+            ParserError, "Bad range: \"foo.1234-foo.9876\"",
+            self.parse, "@bad = [foo.1234-foo.9876];")
+
+    def test_glyphclass_range_mixed(self):
+        [gc] = self.parse("@range = [a foo.09-foo.11 X.sc-Z.sc];").statements
+        self.assertEqual(gc.glyphs, {
+            "a", "foo.09", "foo.10", "foo.11", "X.sc", "Y.sc", "Z.sc"
+        })
+
+    # TODO: self.parse("@foo = [a b]; @bar = [@foo];")
+    # TODO: self.parse("@foo = [a b]; @bar = @foo;")
+
+    def test_glyphclass_empty(self):
+        [gc] = self.parse("@empty_set = [];").statements
+        self.assertEqual(gc.name, "empty_set")
+        self.assertEqual(gc.glyphs, set())
+
    def test_languagesystem(self):
        [langsys] = self.parse("languagesystem latn DEU;").statements
        self.assertEqual(langsys.script, "latn")