Use canonical Python encoding names
This commit is contained in:
parent
5a18fc83e6
commit
702b94f00a
@ -331,9 +331,9 @@ _mac_romanian_mapping = {
|
||||
}
|
||||
|
||||
_extended_encodings = {
|
||||
"x-mac-croatian-ttx": ("ascii", _mac_croatian_mapping),
|
||||
"x-mac-romanian-ttx": ("ascii", _mac_romanian_mapping),
|
||||
"x-mac-japanese-ttx": ("shift_jis", {
|
||||
"x_mac_croatian_ttx": ("ascii", _mac_croatian_mapping),
|
||||
"x_mac_romanian_ttx": ("ascii", _mac_romanian_mapping),
|
||||
"x_mac_japanese_ttx": ("shift_jis", {
|
||||
b"\xFC": unichr(0x007C),
|
||||
b"\x7E": unichr(0x007E),
|
||||
b"\x80": unichr(0x005C),
|
||||
@ -342,14 +342,14 @@ _extended_encodings = {
|
||||
b"\xFE": unichr(0x2122),
|
||||
b"\xFF": unichr(0x2026),
|
||||
}),
|
||||
"x-mac-chinesetrad-ttx": ("big5", {
|
||||
"x_mac_trad_ttx": ("big5", {
|
||||
b"\x80 ": unichr(0x005C),
|
||||
b"\xA0 ": unichr(0x00A0),
|
||||
b"\xFD ": unichr(0x00A9),
|
||||
b"\xFE ": unichr(0x2122),
|
||||
b"\xFF ": unichr(0x2026),
|
||||
}),
|
||||
"x-mac-korean-ttx": ("euc_kr", {
|
||||
"x_mac_korean_ttx": ("euc_kr", {
|
||||
b"\x80 ": unichr(0x00A0),
|
||||
b"\x81 ": unichr(0x20A9),
|
||||
b"\x82 ": unichr(0x2014),
|
||||
@ -357,7 +357,7 @@ _extended_encodings = {
|
||||
b"\xFE ": unichr(0x2122),
|
||||
b"\xFF ": unichr(0x2026),
|
||||
}),
|
||||
"x-mac-chinesesimp-ttx": ("gb2312", {
|
||||
"x_mac_simp_ttx": ("gb2312", {
|
||||
b"\x80 ": unichr(0x00FC),
|
||||
b"\xA0 ": unichr(0x00A0),
|
||||
b"\xFD ": unichr(0x00A9),
|
||||
|
@ -6,15 +6,15 @@ import fontTools.encodings.codecs # Not to be confused with "import codecs"
|
||||
class ExtendedCodecsTest(unittest.TestCase):
|
||||
|
||||
def test_decode_japanese(self):
|
||||
self.assertEqual(b'x\xfe\xfdy'.decode("x-mac-japanese-ttx"),
|
||||
self.assertEqual(b'x\xfe\xfdy'.decode("x_mac_japanese_ttx"),
|
||||
unichr(0x78)+unichr(0x2122)+unichr(0x00A9)+unichr(0x79))
|
||||
|
||||
def test_encode_japanese(self):
|
||||
self.assertEqual(b'x\xfe\xfdy',
|
||||
(unichr(0x78)+unichr(0x2122)+unichr(0x00A9)+unichr(0x79)).encode("x-mac-japanese-ttx"))
|
||||
(unichr(0x78)+unichr(0x2122)+unichr(0x00A9)+unichr(0x79)).encode("x_mac_japanese_ttx"))
|
||||
|
||||
def test_decode_romanian(self):
|
||||
self.assertEqual(b'x\xfb'.decode("x-mac-romanian-ttx"),
|
||||
self.assertEqual(b'x\xfb'.decode("x_mac_romanian_ttx"),
|
||||
unichr(0x78)+unichr(0x02DA))
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -8,57 +8,57 @@ import fontTools.encodings.codecs
|
||||
# Map keyed by platformID, then platEncID, then possibly langID
|
||||
_encodingMap = {
|
||||
0: { # Unicode
|
||||
0: 'utf-16be',
|
||||
1: 'utf-16be',
|
||||
2: 'utf-16be',
|
||||
3: 'utf-16be',
|
||||
4: 'utf-16be',
|
||||
5: 'utf-16be',
|
||||
6: 'utf-16be',
|
||||
0: 'utf_16be',
|
||||
1: 'utf_16be',
|
||||
2: 'utf_16be',
|
||||
3: 'utf_16be',
|
||||
4: 'utf_16be',
|
||||
5: 'utf_16be',
|
||||
6: 'utf_16be',
|
||||
},
|
||||
1: { # Macintosh
|
||||
# See
|
||||
# https://github.com/behdad/fonttools/issues/236
|
||||
0: { # Macintosh, platEncID==0, keyed by langID
|
||||
15: "mac-iceland",
|
||||
17: "mac-turkish",
|
||||
18: "x-mac-croatian-ttx",
|
||||
24: "mac-latin2",
|
||||
25: "mac-latin2",
|
||||
26: "mac-latin2",
|
||||
27: "mac-latin2",
|
||||
28: "mac-latin2",
|
||||
36: "mac-latin2",
|
||||
37: "x-mac-romanian-ttx",
|
||||
38: "mac-latin2",
|
||||
39: "mac-latin2",
|
||||
40: "mac-latin2",
|
||||
Ellipsis: 'mac-roman', # Other
|
||||
15: "mac_iceland",
|
||||
17: "mac_turkish",
|
||||
18: "x_mac_croatian_ttx",
|
||||
24: "mac_latin2",
|
||||
25: "mac_latin2",
|
||||
26: "mac_latin2",
|
||||
27: "mac_latin2",
|
||||
28: "mac_latin2",
|
||||
36: "mac_latin2",
|
||||
37: "x_mac_romanian_ttx",
|
||||
38: "mac_latin2",
|
||||
39: "mac_latin2",
|
||||
40: "mac_latin2",
|
||||
Ellipsis: 'mac_roman', # Other
|
||||
},
|
||||
1: 'x-mac-japanese-ttx',
|
||||
2: 'x-mac-chinesetrad-ttx',
|
||||
3: 'x-mac-korean-ttx',
|
||||
6: 'mac-greek',
|
||||
7: 'mac-cyrillic',
|
||||
25: 'x-mac-chinesesimp-ttx',
|
||||
29: 'mac-latin2',
|
||||
35: 'mac-turkish',
|
||||
37: 'mac-iceland',
|
||||
1: 'x_mac_japanese_ttx',
|
||||
2: 'x_mac_trad_ttx',
|
||||
3: 'x_mac_korean_ttx',
|
||||
6: 'mac_greek',
|
||||
7: 'mac_cyrillic',
|
||||
25: 'x_mac_simp_ttx',
|
||||
29: 'mac_latin2',
|
||||
35: 'mac_turkish',
|
||||
37: 'mac_iceland',
|
||||
},
|
||||
2: { # ISO
|
||||
0: 'ascii',
|
||||
1: 'utf-16be',
|
||||
1: 'utf_16be',
|
||||
2: 'latin1',
|
||||
},
|
||||
3: { # Microsoft
|
||||
0: 'utf-16be',
|
||||
1: 'utf-16be',
|
||||
2: 'shift-jis',
|
||||
0: 'utf_16be',
|
||||
1: 'utf_16be',
|
||||
2: 'shift_jis',
|
||||
3: 'gb2312',
|
||||
4: 'big5',
|
||||
5: 'wansung',
|
||||
5: 'wansung', # XXX This isn't a canonical Python encoding name
|
||||
6: 'johab',
|
||||
10: 'utf-16be',
|
||||
10: 'utf_16be',
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -7,15 +7,15 @@ class EncodingTest(unittest.TestCase):
|
||||
|
||||
def test_encoding_unicode(self):
|
||||
|
||||
self.assertEqual(getEncoding(3, 0, None), "utf-16be") # MS Symbol is Unicode as well
|
||||
self.assertEqual(getEncoding(3, 1, None), "utf-16be")
|
||||
self.assertEqual(getEncoding(3, 10, None), "utf-16be")
|
||||
self.assertEqual(getEncoding(0, 3, None), "utf-16be")
|
||||
self.assertEqual(getEncoding(3, 0, None), "utf_16be") # MS Symbol is Unicode as well
|
||||
self.assertEqual(getEncoding(3, 1, None), "utf_16be")
|
||||
self.assertEqual(getEncoding(3, 10, None), "utf_16be")
|
||||
self.assertEqual(getEncoding(0, 3, None), "utf_16be")
|
||||
|
||||
def test_encoding_macroman_misc(self):
|
||||
self.assertEqual(getEncoding(1, 0, 17), "mac-turkish")
|
||||
self.assertEqual(getEncoding(1, 0, 37), "x-mac-romanian-ttx")
|
||||
self.assertEqual(getEncoding(1, 0, 45), "mac-roman")
|
||||
self.assertEqual(getEncoding(1, 0, 17), "mac_turkish")
|
||||
self.assertEqual(getEncoding(1, 0, 37), "x_mac_romanian_ttx")
|
||||
self.assertEqual(getEncoding(1, 0, 45), "mac_roman")
|
||||
|
||||
def test_extended_mac_encodings(self):
|
||||
encoding = getEncoding(1, 1, 0) # Mac Japanese
|
||||
|
@ -12,12 +12,12 @@ class CmapSubtableTest(unittest.TestCase):
|
||||
|
||||
def test_toUnicode_utf16be(self):
|
||||
subtable = self.makeSubtable(0, 2, 7)
|
||||
self.assertEqual("utf-16be", subtable.getEncoding())
|
||||
self.assertEqual("utf_16be", subtable.getEncoding())
|
||||
self.assertEqual(True, subtable.isUnicode())
|
||||
|
||||
def test_toUnicode_macroman(self):
|
||||
subtable = self.makeSubtable(1, 0, 7) # MacRoman
|
||||
self.assertEqual("mac-roman", subtable.getEncoding())
|
||||
self.assertEqual("mac_roman", subtable.getEncoding())
|
||||
self.assertEqual(False, subtable.isUnicode())
|
||||
|
||||
def test_toUnicode_macromanian(self):
|
||||
|
@ -102,7 +102,7 @@ class NameRecord(object):
|
||||
return getEncoding(self.platformID, self.platEncID, self.langID, default)
|
||||
|
||||
def encodingIsUnicodeCompatible(self):
|
||||
return self.getEncoding(None) in ['utf-16be', 'ucs2be', 'ascii', 'latin1']
|
||||
return self.getEncoding(None) in ['utf_16be', 'ucs2be', 'ascii', 'latin1']
|
||||
|
||||
def __str__(self):
|
||||
try:
|
||||
|
@ -14,30 +14,30 @@ class NameRecordTest(unittest.TestCase):
|
||||
|
||||
def test_toUnicode_utf16be(self):
|
||||
name = self.makeName("Foo Bold", 111, 0, 2, 7)
|
||||
self.assertEqual("utf-16be", name.getEncoding())
|
||||
self.assertEqual("utf_16be", name.getEncoding())
|
||||
self.assertEqual("Foo Bold", name.toUnicode())
|
||||
|
||||
def test_toUnicode_macroman(self):
|
||||
name = self.makeName("Foo Italic", 222, 1, 0, 7) # MacRoman
|
||||
self.assertEqual("mac-roman", name.getEncoding())
|
||||
self.assertEqual("mac_roman", name.getEncoding())
|
||||
self.assertEqual("Foo Italic", name.toUnicode())
|
||||
|
||||
def test_toUnicode_macromanian(self):
|
||||
name = self.makeName(b"Foo Italic\xfb", 222, 1, 0, 37) # Mac Romanian
|
||||
self.assertEqual("x-mac-romanian-ttx", name.getEncoding())
|
||||
self.assertEqual("x_mac_romanian_ttx", name.getEncoding())
|
||||
self.assertEqual("Foo Italic"+unichr(0x02DA), name.toUnicode())
|
||||
|
||||
def test_toUnicode_UnicodeDecodeError(self):
|
||||
name = self.makeName("Foo Bold", 111, 0, 2, 7)
|
||||
self.assertEqual("utf-16be", name.getEncoding())
|
||||
name.string = b"X" # invalid utf-16be sequence
|
||||
self.assertEqual("utf_16be", name.getEncoding())
|
||||
name.string = b"X" # invalid utf_16be sequence
|
||||
self.assertRaises(UnicodeDecodeError, name.toUnicode)
|
||||
|
||||
def toXML(self, name):
|
||||
writer = XMLWriter(StringIO())
|
||||
name.toXML(writer, ttFont=None)
|
||||
xml = writer.file.getvalue().decode("utf-8").strip()
|
||||
return xml.split(writer.newlinestr.decode("utf-8"))[1:]
|
||||
xml = writer.file.getvalue().decode("utf_8").strip()
|
||||
return xml.split(writer.newlinestr.decode("utf_8"))[1:]
|
||||
|
||||
def test_toXML_utf16be(self):
|
||||
name = self.makeName("Foo Bold", 111, 0, 2, 7)
|
||||
@ -73,11 +73,11 @@ class NameRecordTest(unittest.TestCase):
|
||||
|
||||
def test_encoding_macroman_misc(self):
|
||||
name = self.makeName('', 123, 1, 0, 17) # Mac Turkish
|
||||
self.assertEqual(name.getEncoding(), "mac-turkish")
|
||||
self.assertEqual(name.getEncoding(), "mac_turkish")
|
||||
name.langID = 37
|
||||
self.assertEqual(name.getEncoding(), "x-mac-romanian-ttx")
|
||||
self.assertEqual(name.getEncoding(), "x_mac_romanian_ttx")
|
||||
name.langID = 45 # Other
|
||||
self.assertEqual(name.getEncoding(), "mac-roman")
|
||||
self.assertEqual(name.getEncoding(), "mac_roman")
|
||||
|
||||
def test_extended_mac_encodings(self):
|
||||
name = self.makeName(b'\xfe', 123, 1, 1, 0) # Mac Japanese
|
||||
|
Loading…
x
Reference in New Issue
Block a user