[feaLib] Implement top-level glyph class definitions
This commit is contained in:
parent
b86e30d7c6
commit
ba238344b1
@ -6,7 +6,7 @@ def write(buffer, text):
|
||||
buffer.write(text.encode("utf-8"))
|
||||
|
||||
|
||||
class FeatureFile:
|
||||
class FeatureFile(object):
|
||||
def __init__(self):
|
||||
self.statements = []
|
||||
|
||||
@ -15,7 +15,18 @@ class FeatureFile:
|
||||
s.write(out, linesep)
|
||||
|
||||
|
||||
class LanguageSystemStatement:
|
||||
class GlyphClassDefinition(object):
|
||||
def __init__(self, location, name, glyphs):
|
||||
self.location = location
|
||||
self.name = name
|
||||
self.glyphs = glyphs
|
||||
|
||||
def write(self, out, linesep):
|
||||
glyphs = " ".join(sorted(self.glyphs))
|
||||
write(out, "@%s = [%s];%s" % (self.name, glyphs, linesep))
|
||||
|
||||
|
||||
class LanguageSystemStatement(object):
|
||||
def __init__(self, location, script, language):
|
||||
self.location = location
|
||||
self.script, self.language = (script, language)
|
||||
|
@ -23,6 +23,7 @@ class Lexer(object):
|
||||
STRING = "STRING"
|
||||
NAME = "NAME"
|
||||
FILENAME = "FILENAME"
|
||||
GLYPHCLASS = "GLYPHCLASS"
|
||||
CID = "CID"
|
||||
SYMBOL = "SYMBOL"
|
||||
COMMENT = "COMMENT"
|
||||
@ -30,7 +31,7 @@ class Lexer(object):
|
||||
|
||||
CHAR_WHITESPACE_ = " \t"
|
||||
CHAR_NEWLINE_ = "\r\n"
|
||||
CHAR_SYMBOL_ = ";:@-+'{}[]<>()"
|
||||
CHAR_SYMBOL_ = ";:-+'{}[]<>()="
|
||||
CHAR_DIGIT_ = "0123456789"
|
||||
CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
CHAR_NAME_START_ = CHAR_LETTER_ + "_.\\"
|
||||
@ -101,6 +102,13 @@ class Lexer(object):
|
||||
self.pos_ += 1
|
||||
self.scan_over_(Lexer.CHAR_DIGIT_)
|
||||
return (Lexer.CID, int(text[start + 1:self.pos_], 10), location)
|
||||
if cur_char == "@":
|
||||
self.pos_ += 1
|
||||
self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
|
||||
glyphclass = text[start + 1:self.pos_]
|
||||
if len(glyphclass) < 1:
|
||||
raise LexerError("Expected glyph class name", location)
|
||||
return (Lexer.GLYPHCLASS, glyphclass, location)
|
||||
if cur_char in Lexer.CHAR_NAME_START_:
|
||||
self.pos_ += 1
|
||||
self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
|
||||
|
@ -20,6 +20,13 @@ class LexerErrorTest(unittest.TestCase):
|
||||
|
||||
|
||||
class LexerTest(unittest.TestCase):
|
||||
def __init__(self, methodName):
|
||||
unittest.TestCase.__init__(self, methodName)
|
||||
# Python 3 renamed assertRaisesRegexp to assertRaisesRegex,
|
||||
# and fires deprecation warnings if a program uses the old name.
|
||||
if not hasattr(self, "assertRaisesRegex"):
|
||||
self.assertRaisesRegex = self.assertRaisesRegexp
|
||||
|
||||
def test_empty(self):
|
||||
self.assertEqual(lex(""), [])
|
||||
self.assertEqual(lex(" \t "), [])
|
||||
@ -34,6 +41,11 @@ class LexerTest(unittest.TestCase):
|
||||
def test_cid(self):
|
||||
self.assertEqual(lex("\\0 \\987"), [(Lexer.CID, 0), (Lexer.CID, 987)])
|
||||
|
||||
def test_glyphclass(self):
|
||||
self.assertEqual(lex("@Vowel.sc"), [(Lexer.GLYPHCLASS, "Vowel.sc")])
|
||||
self.assertRaisesRegex(LexerError, "Expected glyph class", lex, "@(a)")
|
||||
self.assertRaisesRegex(LexerError, "Expected glyph class", lex, "@ A")
|
||||
|
||||
def test_include(self):
|
||||
self.assertEqual(lex("include (~/foo/bar baz.fea);"), [
|
||||
(Lexer.NAME, "include"),
|
||||
@ -81,7 +93,7 @@ class LexerTest(unittest.TestCase):
|
||||
for (_, _, loc) in Lexer(s, "test.fea")]
|
||||
self.assertEqual(locs("a b # Comment\n12 @x"), [
|
||||
"test.fea:1:1", "test.fea:1:3", "test.fea:2:1",
|
||||
"test.fea:2:4", "test.fea:2:5"
|
||||
"test.fea:2:4"
|
||||
])
|
||||
|
||||
def test_scan_over_(self):
|
||||
|
@ -1,8 +1,9 @@
|
||||
from __future__ import print_function, division, absolute_import
|
||||
from __future__ import unicode_literals
|
||||
from fontTools.feaLib.lexer import Lexer, IncludingLexer
|
||||
|
||||
import fontTools.feaLib.ast as ast
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
class ParserError(Exception):
|
||||
@ -30,13 +31,46 @@ class Parser(object):
|
||||
|
||||
def parse(self):
|
||||
while self.next_token_type_ is not None:
|
||||
keyword = self.expect_keyword_({"feature", "languagesystem"})
|
||||
if keyword == "languagesystem":
|
||||
self.advance_lexer_()
|
||||
if self.cur_token_type_ is Lexer.GLYPHCLASS:
|
||||
self.parse_glyphclass_definition_()
|
||||
elif self.is_cur_keyword_("languagesystem"):
|
||||
self.parse_languagesystem_()
|
||||
elif keyword == "feature":
|
||||
elif self.is_cur_keyword_("feature"):
|
||||
break # TODO: Implement
|
||||
else:
|
||||
raise ParserError("Expected languagesystem, feature, or "
|
||||
"glyph class definition",
|
||||
self.cur_token_location_)
|
||||
return self.doc_
|
||||
|
||||
def parse_glyphclass_definition_(self):
|
||||
location, name = self.cur_token_location_, self.cur_token_
|
||||
self.expect_symbol_("=")
|
||||
glyphs = self.parse_glyphclass_reference_()
|
||||
self.expect_symbol_(";")
|
||||
glyphclass = ast.GlyphClassDefinition(location, name, glyphs)
|
||||
self.doc_.statements.append(glyphclass)
|
||||
|
||||
def parse_glyphclass_reference_(self):
|
||||
result = set()
|
||||
self.expect_symbol_("[")
|
||||
while self.next_token_ != "]":
|
||||
if self.next_token_type_ is Lexer.NAME:
|
||||
self.advance_lexer_()
|
||||
if self.next_token_ == "-":
|
||||
range_location_ = self.cur_token_location_
|
||||
range_start = self.cur_token_
|
||||
self.expect_symbol_("-")
|
||||
range_end = self.expect_name_()
|
||||
result.update(self.make_glyph_range_(range_location_,
|
||||
range_start,
|
||||
range_end))
|
||||
else:
|
||||
result.add(self.cur_token_)
|
||||
self.expect_symbol_("]")
|
||||
return result
|
||||
|
||||
def parse_languagesystem_(self):
|
||||
location = self.cur_token_location_
|
||||
script, language = self.expect_tag_(), self.expect_tag_()
|
||||
@ -44,13 +78,8 @@ class Parser(object):
|
||||
langsys = ast.LanguageSystemStatement(location, script, language)
|
||||
self.doc_.statements.append(langsys)
|
||||
|
||||
def expect_keyword_(self, keywords):
|
||||
self.advance_lexer_()
|
||||
if self.cur_token_type_ is Lexer.NAME and self.cur_token_ in keywords:
|
||||
return self.cur_token_
|
||||
s = ", ".join(sorted(list(keywords)))
|
||||
raise ParserError("Expected one of %s" % s,
|
||||
self.cur_token_location_)
|
||||
def is_cur_keyword_(self, k):
|
||||
return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k)
|
||||
|
||||
def expect_tag_(self):
|
||||
self.advance_lexer_()
|
||||
@ -67,6 +96,12 @@ class Parser(object):
|
||||
return symbol
|
||||
raise ParserError("Expected '%s'" % symbol, self.cur_token_location_)
|
||||
|
||||
def expect_name_(self):
|
||||
self.advance_lexer_()
|
||||
if self.cur_token_type_ is Lexer.NAME:
|
||||
return self.cur_token_
|
||||
raise ParserError("Expected a name", self.cur_token_location_)
|
||||
|
||||
def advance_lexer_(self):
|
||||
self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
|
||||
self.next_token_type_, self.next_token_, self.next_token_location_)
|
||||
@ -75,3 +110,45 @@ class Parser(object):
|
||||
self.next_token_location_) = self.lexer_.next()
|
||||
except StopIteration:
|
||||
self.next_token_type_, self.next_token_ = (None, None)
|
||||
|
||||
def make_glyph_range_(self, location, start, limit):
|
||||
"""("a.sc", "d.sc") --> {"a.sc", "b.sc", "c.sc", "d.sc"}"""
|
||||
result = set()
|
||||
if len(start) != len(limit):
|
||||
raise ParserError(
|
||||
"Bad range: \"%s\" and \"%s\" should have the same length" %
|
||||
(start, limit), location)
|
||||
rev = lambda s: ''.join(reversed(list(s))) # string reversal
|
||||
prefix = os.path.commonprefix([start, limit])
|
||||
suffix = rev(os.path.commonprefix([rev(start), rev(limit)]))
|
||||
if len(suffix) > 0:
|
||||
start_range = start[len(prefix):-len(suffix)]
|
||||
limit_range = limit[len(prefix):-len(suffix)]
|
||||
else:
|
||||
start_range = start[len(prefix):]
|
||||
limit_range = limit[len(prefix):]
|
||||
|
||||
if start_range >= limit_range:
|
||||
raise ParserError("Start of range must be smaller than its end",
|
||||
location)
|
||||
|
||||
uppercase = re.compile(r'^[A-Z]$')
|
||||
if uppercase.match(start_range) and uppercase.match(limit_range):
|
||||
for c in range(ord(start_range), ord(limit_range) + 1):
|
||||
result.add("%s%c%s" % (prefix, c, suffix))
|
||||
return result
|
||||
|
||||
lowercase = re.compile(r'^[a-z]$')
|
||||
if lowercase.match(start_range) and lowercase.match(limit_range):
|
||||
for c in range(ord(start_range), ord(limit_range) + 1):
|
||||
result.add("%s%c%s" % (prefix, c, suffix))
|
||||
return result
|
||||
|
||||
digits = re.compile(r'^[0-9]{1,3}$')
|
||||
if digits.match(start_range) and digits.match(limit_range):
|
||||
for i in range(int(start_range, 10), int(limit_range, 10) + 1):
|
||||
number = ("000" + str(i))[-len(start_range):]
|
||||
result.add("%s%s%s" % (prefix, number, suffix))
|
||||
return result
|
||||
|
||||
raise ParserError("Bad range: \"%s-%s\"" % (start, limit), location)
|
||||
|
@ -19,6 +19,62 @@ class ParserTest(unittest.TestCase):
|
||||
if not hasattr(self, "assertRaisesRegex"):
|
||||
self.assertRaisesRegex = self.assertRaisesRegexp
|
||||
|
||||
def test_glyphclass(self):
|
||||
[gc] = self.parse("@dash = [endash emdash figuredash];").statements
|
||||
self.assertEqual(gc.name, "dash")
|
||||
self.assertEqual(gc.glyphs, {"endash", "emdash", "figuredash"})
|
||||
|
||||
def test_glyphclass_range_uppercase(self):
|
||||
[gc] = self.parse("@swashes = [X.swash-Z.swash];").statements
|
||||
self.assertEqual(gc.name, "swashes")
|
||||
self.assertEqual(gc.glyphs, {"X.swash", "Y.swash", "Z.swash"})
|
||||
|
||||
def test_glyphclass_range_lowercase(self):
|
||||
[gc] = self.parse("@defg.sc = [d.sc-g.sc];").statements
|
||||
self.assertEqual(gc.name, "defg.sc")
|
||||
self.assertEqual(gc.glyphs, {"d.sc", "e.sc", "f.sc", "g.sc"})
|
||||
|
||||
def test_glyphclass_range_digit1(self):
|
||||
[gc] = self.parse("@range = [foo.2-foo.5];").statements
|
||||
self.assertEqual(gc.glyphs, {"foo.2", "foo.3", "foo.4", "foo.5"})
|
||||
|
||||
def test_glyphclass_range_digit2(self):
|
||||
[gc] = self.parse("@range = [foo.09-foo.11];").statements
|
||||
self.assertEqual(gc.glyphs, {"foo.09", "foo.10", "foo.11"})
|
||||
|
||||
def test_glyphclass_range_digit3(self):
|
||||
[gc] = self.parse("@range = [foo.123-foo.125];").statements
|
||||
self.assertEqual(gc.glyphs, {"foo.123", "foo.124", "foo.125"})
|
||||
|
||||
def test_glyphclass_range_bad(self):
|
||||
self.assertRaisesRegex(
|
||||
ParserError,
|
||||
"Bad range: \"a\" and \"foobar\" should have the same length",
|
||||
self.parse, "@bad = [a-foobar];")
|
||||
self.assertRaisesRegex(
|
||||
ParserError, "Bad range: \"A.swash-z.swash\"",
|
||||
self.parse, "@bad = [A.swash-z.swash];")
|
||||
self.assertRaisesRegex(
|
||||
ParserError, "Start of range must be smaller than its end",
|
||||
self.parse, "@bad = [B.swash-A.swash];")
|
||||
self.assertRaisesRegex(
|
||||
ParserError, "Bad range: \"foo.1234-foo.9876\"",
|
||||
self.parse, "@bad = [foo.1234-foo.9876];")
|
||||
|
||||
def test_glyphclass_range_mixed(self):
|
||||
[gc] = self.parse("@range = [a foo.09-foo.11 X.sc-Z.sc];").statements
|
||||
self.assertEqual(gc.glyphs, {
|
||||
"a", "foo.09", "foo.10", "foo.11", "X.sc", "Y.sc", "Z.sc"
|
||||
})
|
||||
|
||||
# TODO: self.parse("@foo = [a b]; @bar = [@foo];")
|
||||
# TODO: self.parse("@foo = [a b]; @bar = @foo;")
|
||||
|
||||
def test_glyphclass_empty(self):
|
||||
[gc] = self.parse("@empty_set = [];").statements
|
||||
self.assertEqual(gc.name, "empty_set")
|
||||
self.assertEqual(gc.glyphs, set())
|
||||
|
||||
def test_languagesystem(self):
|
||||
[langsys] = self.parse("languagesystem latn DEU;").statements
|
||||
self.assertEqual(langsys.script, "latn")
|
||||
|
Loading…
x
Reference in New Issue
Block a user