[feaLib] Make nameid parsing more robust
We now correctly handle nameid statements with surrogate pairs and old-style macOS-encoded names (provided that fonttools supports the specified encoding). Resolves https://github.com/fonttools/fonttools/issues/842.
This commit is contained in:
parent
b22df7ff48
commit
eac7ef89c0
@ -2,6 +2,7 @@ from __future__ import print_function, division, absolute_import
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from fontTools.feaLib.error import FeatureLibError
|
from fontTools.feaLib.error import FeatureLibError
|
||||||
from fontTools.feaLib.lexer import Lexer, IncludingLexer
|
from fontTools.feaLib.lexer import Lexer, IncludingLexer
|
||||||
|
from fontTools.misc.encodingTools import getEncoding
|
||||||
from fontTools.misc.py23 import *
|
from fontTools.misc.py23 import *
|
||||||
import fontTools.feaLib.ast as ast
|
import fontTools.feaLib.ast as ast
|
||||||
import logging
|
import logging
|
||||||
@ -863,14 +864,15 @@ class Parser(object):
|
|||||||
langID = None
|
langID = None
|
||||||
if self.next_token_type_ == Lexer.NUMBER:
|
if self.next_token_type_ == Lexer.NUMBER:
|
||||||
platformID = self.expect_number_()
|
platformID = self.expect_number_()
|
||||||
|
location = self.cur_token_location_
|
||||||
if platformID not in (1, 3):
|
if platformID not in (1, 3):
|
||||||
raise FeatureLibError("Expected platform id 1 or 3",
|
raise FeatureLibError("Expected platform id 1 or 3", location)
|
||||||
self.cur_token_location_)
|
|
||||||
if self.next_token_type_ == Lexer.NUMBER:
|
if self.next_token_type_ == Lexer.NUMBER:
|
||||||
platEncID = self.expect_number_()
|
platEncID = self.expect_number_()
|
||||||
langID = self.expect_number_()
|
langID = self.expect_number_()
|
||||||
else:
|
else:
|
||||||
platformID = 3
|
platformID = 3
|
||||||
|
location = self.cur_token_location_
|
||||||
|
|
||||||
if platformID == 1: # Macintosh
|
if platformID == 1: # Macintosh
|
||||||
platEncID = platEncID or 0 # Roman
|
platEncID = platEncID or 0 # Roman
|
||||||
@ -882,12 +884,11 @@ class Parser(object):
|
|||||||
string = self.expect_string_()
|
string = self.expect_string_()
|
||||||
self.expect_symbol_(";")
|
self.expect_symbol_(";")
|
||||||
|
|
||||||
if platformID == 1 and platEncID == 0:
|
encoding = getEncoding(platformID, platEncID, langID)
|
||||||
string = self.unescape_mac_name_string(string)
|
if encoding is None:
|
||||||
elif platformID == 3 and platEncID == 1:
|
raise FeatureLibError("Unsupported encoding", location)
|
||||||
string = self.unescape_windows_name_string(string)
|
unescaped = self.unescape_string_(string, encoding)
|
||||||
|
return platformID, platEncID, langID, unescaped
|
||||||
return platformID, platEncID, langID, string
|
|
||||||
|
|
||||||
def parse_nameid_(self):
|
def parse_nameid_(self):
|
||||||
assert self.cur_token_ == "nameid", self.cur_token_
|
assert self.cur_token_ == "nameid", self.cur_token_
|
||||||
@ -905,21 +906,27 @@ class Parser(object):
|
|||||||
return self.ast.NameRecord(location, nameID, platformID, platEncID,
|
return self.ast.NameRecord(location, nameID, platformID, platEncID,
|
||||||
langID, string)
|
langID, string)
|
||||||
|
|
||||||
def unescape_mac_name_string(self, string):
|
def unescape_string_(self, string, encoding):
|
||||||
def unescape(match):
|
if encoding == "utf_16_be":
|
||||||
|
s = re.sub(r"\\[0-9a-fA-F]{4}", self.unescape_unichr_, string)
|
||||||
|
else:
|
||||||
|
unescape = lambda m: self.unescape_byte_(m, encoding)
|
||||||
|
s = re.sub(r"\\[0-9a-fA-F]{2}", unescape, string)
|
||||||
|
# We now have a Unicode string, but it might contain surrogate pairs.
|
||||||
|
# We convert surrogates to actual Unicode by round-tripping through
|
||||||
|
# Python's UTF-16 codec in a special mode.
|
||||||
|
utf16 = tobytes(s, "utf_16_be", "surrogatepass")
|
||||||
|
return tounicode(utf16, "utf_16_be")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def unescape_unichr_(match):
|
||||||
n = match.group(0)[1:]
|
n = match.group(0)[1:]
|
||||||
c = bytechr(int(n, 16)).decode('mac_roman')
|
return unichr(int(n, 16))
|
||||||
return c
|
|
||||||
|
|
||||||
return re.sub(r'\\[0-9a-fA-F]{2}', unescape, string)
|
@staticmethod
|
||||||
|
def unescape_byte_(match, encoding):
|
||||||
def unescape_windows_name_string(self, string):
|
|
||||||
def unescape(match):
|
|
||||||
n = match.group(0)[1:]
|
n = match.group(0)[1:]
|
||||||
c = unichr(int(n, 16))
|
return bytechr(int(n, 16)).decode(encoding)
|
||||||
return c
|
|
||||||
|
|
||||||
return re.sub(r'\\[0-9a-fA-F]{4}', unescape, string)
|
|
||||||
|
|
||||||
def parse_table_BASE_(self, table):
|
def parse_table_BASE_(self, table):
|
||||||
statements = table.statements
|
statements = table.statements
|
||||||
|
3
NEWS.rst
3
NEWS.rst
@ -1,4 +1,7 @@
|
|||||||
- [feaLib] include statements now resolve relative paths like makeotf (#838)
|
- [feaLib] include statements now resolve relative paths like makeotf (#838)
|
||||||
|
- [feaLib] `table name` now handles Unicode codepoints beyond the Basic
|
||||||
|
Multilingual Plane, also supports old-style MacOS platform encodings (#842)
|
||||||
|
- [feaLib] correctly escape string literals when emitting feature syntax (#780)
|
||||||
|
|
||||||
3.7.0 (released 2017-02-11)
|
3.7.0 (released 2017-02-11)
|
||||||
---------------------------
|
---------------------------
|
||||||
|
@ -4,5 +4,9 @@ feature size {
|
|||||||
# 139 - range end (inclusive, decipoints)
|
# 139 - range end (inclusive, decipoints)
|
||||||
sizemenuname "Win MinionPro Size Name";
|
sizemenuname "Win MinionPro Size Name";
|
||||||
sizemenuname 1 "Mac MinionPro Size Name";
|
sizemenuname 1 "Mac MinionPro Size Name";
|
||||||
sizemenuname 1 21 0 "Mac MinionPro Size Name";
|
# The specification says: sizemenuname 1 21 0 "Mac MinionPro Size Name";
|
||||||
|
# which means Macintosh platform, MacOS Thai encoding, English language.
|
||||||
|
# Since fonttools currently does not support the MacOS Thai encoding,
|
||||||
|
# we use instead MacOS Roman encoding (0), Swedish language (5) for our test.
|
||||||
|
sizemenuname 1 0 5 "Mac MinionPro Size Name";
|
||||||
} size;
|
} size;
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
<namerecord nameID="256" platformID="1" platEncID="0" langID="0x0" unicode="True">
|
<namerecord nameID="256" platformID="1" platEncID="0" langID="0x0" unicode="True">
|
||||||
Mac MinionPro Size Name
|
Mac MinionPro Size Name
|
||||||
</namerecord>
|
</namerecord>
|
||||||
<namerecord nameID="256" platformID="1" platEncID="21" langID="0x0" unicode="True">
|
<namerecord nameID="256" platformID="1" platEncID="0" langID="0x5" unicode="True">
|
||||||
Mac MinionPro Size Name
|
Mac MinionPro Size Name
|
||||||
</namerecord>
|
</namerecord>
|
||||||
</name>
|
</name>
|
||||||
|
@ -928,12 +928,10 @@ class ParserTest(unittest.TestCase):
|
|||||||
self.assertEquals(name.asFea(), r'nameid 9 "Quotation \0022Mark\0022";')
|
self.assertEquals(name.asFea(), r'nameid 9 "Quotation \0022Mark\0022";')
|
||||||
|
|
||||||
def test_nameid_windows_utf16_surroates(self):
|
def test_nameid_windows_utf16_surroates(self):
|
||||||
pass
|
doc = self.parse(r'table name { nameid 9 "Carrot \D83E\DD55"; } name;')
|
||||||
# TODO: https://github.com/fonttools/fonttools/issues/842
|
name = doc.statements[0].statements[0]
|
||||||
# doc = self.parse(r'table name { nameid 9 "Carrot \D83E\DD55"; } name;')
|
self.assertEquals(name.string, r"Carrot 🥕")
|
||||||
# name = doc.statements[0].statements[0]
|
self.assertEquals(name.asFea(), r'nameid 9 "Carrot \d83e\dd55";')
|
||||||
# self.assertEquals(name.string, r"Carrot 🥕")
|
|
||||||
# self.assertEquals(name.asFea(), r'nameid 9 "Carrot \d83e\dd55";')
|
|
||||||
|
|
||||||
def test_nameid_mac_roman(self):
|
def test_nameid_mac_roman(self):
|
||||||
doc = self.parse(
|
doc = self.parse(
|
||||||
@ -956,9 +954,8 @@ class ParserTest(unittest.TestCase):
|
|||||||
self.assertEquals(name.platformID, 1)
|
self.assertEquals(name.platformID, 1)
|
||||||
self.assertEquals(name.platEncID, 0)
|
self.assertEquals(name.platEncID, 0)
|
||||||
self.assertEquals(name.langID, 18)
|
self.assertEquals(name.langID, 18)
|
||||||
# TODO: https://github.com/fonttools/fonttools/issues/842
|
self.assertEquals(name.string, "Jovica Veljović")
|
||||||
# self.assertEquals(name.string, "Jovica Veljović")
|
self.assertEquals(name.asFea(), r'nameid 9 1 0 18 "Jovica Veljovi\e6";')
|
||||||
# self.assertEquals(name.asFea(), r'nameid 9 1 0 18 "Jovica Veljovi\e6";')
|
|
||||||
|
|
||||||
def test_nameid_unsupported_platform(self):
|
def test_nameid_unsupported_platform(self):
|
||||||
self.assertRaisesRegex(
|
self.assertRaisesRegex(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user