[feaLib] Don’t accept hex/octal numbers everywhere

Only in name, nameid, sizemenuname and Character.
This commit is contained in:
Khaled Hosny 2019-08-17 05:53:52 +02:00
parent ae239722d4
commit 197b36fef4
4 changed files with 52 additions and 26 deletions

View File

@ -6,6 +6,9 @@ import os
class Lexer(object): class Lexer(object):
NUMBER = "NUMBER" NUMBER = "NUMBER"
HEXADECIMAL = "HEXADECIMAL"
OCTAL = "OCTAL"
NUMBERS = (NUMBER, HEXADECIMAL, OCTAL)
FLOAT = "FLOAT" FLOAT = "FLOAT"
STRING = "STRING" STRING = "STRING"
NAME = "NAME" NAME = "NAME"
@ -123,10 +126,10 @@ class Lexer(object):
if cur_char == "0" and next_char in "xX": if cur_char == "0" and next_char in "xX":
self.pos_ += 2 self.pos_ += 2
self.scan_over_(Lexer.CHAR_HEXDIGIT_) self.scan_over_(Lexer.CHAR_HEXDIGIT_)
return (Lexer.NUMBER, int(text[start:self.pos_], 16), location) return (Lexer.HEXADECIMAL, int(text[start:self.pos_], 16), location)
if cur_char == "0" and next_char in Lexer.CHAR_DIGIT_: if cur_char == "0" and next_char in Lexer.CHAR_DIGIT_:
self.scan_over_(Lexer.CHAR_DIGIT_) self.scan_over_(Lexer.CHAR_DIGIT_)
return (Lexer.NUMBER, int(text[start:self.pos_], 8), location) return (Lexer.OCTAL, int(text[start:self.pos_], 8), location)
if cur_char in Lexer.CHAR_DIGIT_: if cur_char in Lexer.CHAR_DIGIT_:
self.scan_over_(Lexer.CHAR_DIGIT_) self.scan_over_(Lexer.CHAR_DIGIT_)
if self.pos_ >= limit or text[self.pos_] != ".": if self.pos_ >= limit or text[self.pos_] != ".":

View File

@ -976,14 +976,14 @@ class Parser(object):
def parse_name_(self): def parse_name_(self):
platEncID = None platEncID = None
langID = None langID = None
if self.next_token_type_ == Lexer.NUMBER: if self.next_token_type_ in Lexer.NUMBERS:
platformID = self.expect_number_() platformID = self.expect_any_number_()
location = self.cur_token_location_ location = self.cur_token_location_
if platformID not in (1, 3): if platformID not in (1, 3):
raise FeatureLibError("Expected platform id 1 or 3", location) raise FeatureLibError("Expected platform id 1 or 3", location)
if self.next_token_type_ == Lexer.NUMBER: if self.next_token_type_ in Lexer.NUMBERS:
platEncID = self.expect_number_() platEncID = self.expect_any_number_()
langID = self.expect_number_() langID = self.expect_any_number_()
else: else:
platformID = 3 platformID = 3
location = self.cur_token_location_ location = self.cur_token_location_
@ -1006,7 +1006,7 @@ class Parser(object):
def parse_nameid_(self): def parse_nameid_(self):
assert self.cur_token_ == "nameid", self.cur_token_ assert self.cur_token_ == "nameid", self.cur_token_
location, nameID = self.cur_token_location_, self.expect_number_() location, nameID = self.cur_token_location_, self.expect_any_number_()
if nameID > 32767: if nameID > 32767:
raise FeatureLibError("Name id value cannot be greater than 32767", raise FeatureLibError("Name id value cannot be greater than 32767",
self.cur_token_location_) self.cur_token_location_)
@ -1350,7 +1350,7 @@ class Parser(object):
def parse_cvCharacter_(self, tag): def parse_cvCharacter_(self, tag):
assert self.cur_token_ == "Character", self.cur_token_ assert self.cur_token_ == "Character", self.cur_token_
location, character = self.cur_token_location_, self.expect_decimal_or_hexadecimal_() location, character = self.cur_token_location_, self.expect_any_number_()
self.expect_symbol_(";") self.expect_symbol_(";")
if not (0xFFFFFF >= character >= 0): if not (0xFFFFFF >= character >= 0):
raise FeatureLibError("Character value must be between " raise FeatureLibError("Character value must be between "
@ -1556,13 +1556,19 @@ class Parser(object):
return self.cur_token_ return self.cur_token_
raise FeatureLibError("Expected a name", self.cur_token_location_) raise FeatureLibError("Expected a name", self.cur_token_location_)
# TODO: Don't allow this method to accept hexadecimal values
def expect_number_(self): def expect_number_(self):
self.advance_lexer_() self.advance_lexer_()
if self.cur_token_type_ is Lexer.NUMBER: if self.cur_token_type_ is Lexer.NUMBER:
return self.cur_token_ return self.cur_token_
raise FeatureLibError("Expected a number", self.cur_token_location_) raise FeatureLibError("Expected a number", self.cur_token_location_)
def expect_any_number_(self):
self.advance_lexer_()
if self.cur_token_type_ in Lexer.NUMBERS:
return self.cur_token_
raise FeatureLibError("Expected a decimal, hexadecimal or octal number",
self.cur_token_location_)
def expect_float_(self): def expect_float_(self):
self.advance_lexer_() self.advance_lexer_()
if self.cur_token_type_ is Lexer.FLOAT: if self.cur_token_type_ is Lexer.FLOAT:
@ -1570,7 +1576,6 @@ class Parser(object):
raise FeatureLibError("Expected a floating-point number", raise FeatureLibError("Expected a floating-point number",
self.cur_token_location_) self.cur_token_location_)
# TODO: Don't allow this method to accept hexadecimal values
def expect_decipoint_(self): def expect_decipoint_(self):
if self.next_token_type_ == Lexer.FLOAT: if self.next_token_type_ == Lexer.FLOAT:
return self.expect_float_() return self.expect_float_()
@ -1580,18 +1585,6 @@ class Parser(object):
raise FeatureLibError("Expected an integer or floating-point number", raise FeatureLibError("Expected an integer or floating-point number",
self.cur_token_location_) self.cur_token_location_)
def expect_decimal_or_hexadecimal_(self):
# the lexer returns the same token type 'NUMBER' for either decimal or
# hexadecimal integers, and casts them both to a `int` type, so it's
# impossible to distinguish the two here. This method is implemented
# the same as `expect_number_`, only it gives a more informative
# error message
self.advance_lexer_()
if self.cur_token_type_ is Lexer.NUMBER:
return self.cur_token_
raise FeatureLibError("Expected a decimal or hexadecimal number",
self.cur_token_location_)
def expect_string_(self): def expect_string_(self):
self.advance_lexer_() self.advance_lexer_()
if self.cur_token_type_ is Lexer.STRING: if self.cur_token_type_ is Lexer.STRING:

View File

@ -67,9 +67,9 @@ class LexerTest(unittest.TestCase):
def test_number(self): def test_number(self):
self.assertEqual(lex("123 -456"), self.assertEqual(lex("123 -456"),
[(Lexer.NUMBER, 123), (Lexer.NUMBER, -456)]) [(Lexer.NUMBER, 123), (Lexer.NUMBER, -456)])
self.assertEqual(lex("0xCAFED00D"), [(Lexer.NUMBER, 0xCAFED00D)]) self.assertEqual(lex("0xCAFED00D"), [(Lexer.HEXADECIMAL, 0xCAFED00D)])
self.assertEqual(lex("0xcafed00d"), [(Lexer.NUMBER, 0xCAFED00D)]) self.assertEqual(lex("0xcafed00d"), [(Lexer.HEXADECIMAL, 0xCAFED00D)])
self.assertEqual(lex("010"), [(Lexer.NUMBER, 0o10)]) self.assertEqual(lex("010"), [(Lexer.OCTAL, 0o10)])
def test_float(self): def test_float(self):
self.assertEqual(lex("1.23 -4.5"), self.assertEqual(lex("1.23 -4.5"),

View File

@ -1117,6 +1117,36 @@ class ParserTest(unittest.TestCase):
FeatureLibError, "Expected platform id 1 or 3", FeatureLibError, "Expected platform id 1 or 3",
self.parse, 'table name { nameid 9 666 "Foo"; } name;') self.parse, 'table name { nameid 9 666 "Foo"; } name;')
def test_nameid_hexadecimal(self):
doc = self.parse(
r'table name { nameid 0x9 0x3 0x1 0x0409 "Test"; } name;')
name = doc.statements[0].statements[0]
self.assertEqual(name.nameID, 9)
self.assertEqual(name.platformID, 3)
self.assertEqual(name.platEncID, 1)
self.assertEqual(name.langID, 0x0409)
def test_nameid_octal(self):
doc = self.parse(
r'table name { nameid 011 03 012 02011 "Test"; } name;')
name = doc.statements[0].statements[0]
self.assertEqual(name.nameID, 9)
self.assertEqual(name.platformID, 3)
self.assertEqual(name.platEncID, 10)
self.assertEqual(name.langID, 0o2011)
def test_cv_hexadecimal(self):
doc = self.parse(
r'feature cv01 { cvParameters { Character 0x5DDE; }; } cv01;')
cv = doc.statements[0].statements[0].statements[0]
self.assertEqual(cv.character, 0x5DDE)
def test_cv_octal(self):
doc = self.parse(
r'feature cv01 { cvParameters { Character 056736; }; } cv01;')
cv = doc.statements[0].statements[0].statements[0]
self.assertEqual(cv.character, 0o56736)
def test_rsub_format_a(self): def test_rsub_format_a(self):
doc = self.parse("feature test {rsub a [b B] c' d [e E] by C;} test;") doc = self.parse("feature test {rsub a [b B] c' d [e E] by C;} test;")
rsub = doc.statements[0].statements[0] rsub = doc.statements[0].statements[0]