Use canonical Python encoding names

This commit is contained in:
Behdad Esfahbod 2015-04-19 04:24:55 -07:00
parent 5a18fc83e6
commit 702b94f00a
7 changed files with 65 additions and 65 deletions

View File

@ -331,9 +331,9 @@ _mac_romanian_mapping = {
}
_extended_encodings = {
"x-mac-croatian-ttx": ("ascii", _mac_croatian_mapping),
"x-mac-romanian-ttx": ("ascii", _mac_romanian_mapping),
"x-mac-japanese-ttx": ("shift_jis", {
"x_mac_croatian_ttx": ("ascii", _mac_croatian_mapping),
"x_mac_romanian_ttx": ("ascii", _mac_romanian_mapping),
"x_mac_japanese_ttx": ("shift_jis", {
b"\xFC": unichr(0x007C),
b"\x7E": unichr(0x007E),
b"\x80": unichr(0x005C),
@ -342,14 +342,14 @@ _extended_encodings = {
b"\xFE": unichr(0x2122),
b"\xFF": unichr(0x2026),
}),
"x-mac-chinesetrad-ttx": ("big5", {
"x_mac_trad_ttx": ("big5", {
b"\x80 ": unichr(0x005C),
b"\xA0 ": unichr(0x00A0),
b"\xFD ": unichr(0x00A9),
b"\xFE ": unichr(0x2122),
b"\xFF ": unichr(0x2026),
}),
"x-mac-korean-ttx": ("euc_kr", {
"x_mac_korean_ttx": ("euc_kr", {
b"\x80 ": unichr(0x00A0),
b"\x81 ": unichr(0x20A9),
b"\x82 ": unichr(0x2014),
@ -357,7 +357,7 @@ _extended_encodings = {
b"\xFE ": unichr(0x2122),
b"\xFF ": unichr(0x2026),
}),
"x-mac-chinesesimp-ttx": ("gb2312", {
"x_mac_simp_ttx": ("gb2312", {
b"\x80 ": unichr(0x00FC),
b"\xA0 ": unichr(0x00A0),
b"\xFD ": unichr(0x00A9),

View File

@ -6,15 +6,15 @@ import fontTools.encodings.codecs # Not to be confused with "import codecs"
class ExtendedCodecsTest(unittest.TestCase):
def test_decode_japanese(self):
self.assertEqual(b'x\xfe\xfdy'.decode("x-mac-japanese-ttx"),
self.assertEqual(b'x\xfe\xfdy'.decode("x_mac_japanese_ttx"),
unichr(0x78)+unichr(0x2122)+unichr(0x00A9)+unichr(0x79))
def test_encode_japanese(self):
self.assertEqual(b'x\xfe\xfdy',
(unichr(0x78)+unichr(0x2122)+unichr(0x00A9)+unichr(0x79)).encode("x-mac-japanese-ttx"))
(unichr(0x78)+unichr(0x2122)+unichr(0x00A9)+unichr(0x79)).encode("x_mac_japanese_ttx"))
def test_decode_romanian(self):
self.assertEqual(b'x\xfb'.decode("x-mac-romanian-ttx"),
self.assertEqual(b'x\xfb'.decode("x_mac_romanian_ttx"),
unichr(0x78)+unichr(0x02DA))
if __name__ == '__main__':

View File

@ -8,57 +8,57 @@ import fontTools.encodings.codecs
# Map keyed by platformID, then platEncID, then possibly langID
_encodingMap = {
0: { # Unicode
0: 'utf-16be',
1: 'utf-16be',
2: 'utf-16be',
3: 'utf-16be',
4: 'utf-16be',
5: 'utf-16be',
6: 'utf-16be',
0: 'utf_16be',
1: 'utf_16be',
2: 'utf_16be',
3: 'utf_16be',
4: 'utf_16be',
5: 'utf_16be',
6: 'utf_16be',
},
1: { # Macintosh
# See
# https://github.com/behdad/fonttools/issues/236
0: { # Macintosh, platEncID==0, keyed by langID
15: "mac-iceland",
17: "mac-turkish",
18: "x-mac-croatian-ttx",
24: "mac-latin2",
25: "mac-latin2",
26: "mac-latin2",
27: "mac-latin2",
28: "mac-latin2",
36: "mac-latin2",
37: "x-mac-romanian-ttx",
38: "mac-latin2",
39: "mac-latin2",
40: "mac-latin2",
Ellipsis: 'mac-roman', # Other
15: "mac_iceland",
17: "mac_turkish",
18: "x_mac_croatian_ttx",
24: "mac_latin2",
25: "mac_latin2",
26: "mac_latin2",
27: "mac_latin2",
28: "mac_latin2",
36: "mac_latin2",
37: "x_mac_romanian_ttx",
38: "mac_latin2",
39: "mac_latin2",
40: "mac_latin2",
Ellipsis: 'mac_roman', # Other
},
1: 'x-mac-japanese-ttx',
2: 'x-mac-chinesetrad-ttx',
3: 'x-mac-korean-ttx',
6: 'mac-greek',
7: 'mac-cyrillic',
25: 'x-mac-chinesesimp-ttx',
29: 'mac-latin2',
35: 'mac-turkish',
37: 'mac-iceland',
1: 'x_mac_japanese_ttx',
2: 'x_mac_trad_ttx',
3: 'x_mac_korean_ttx',
6: 'mac_greek',
7: 'mac_cyrillic',
25: 'x_mac_simp_ttx',
29: 'mac_latin2',
35: 'mac_turkish',
37: 'mac_iceland',
},
2: { # ISO
0: 'ascii',
1: 'utf-16be',
1: 'utf_16be',
2: 'latin1',
},
3: { # Microsoft
0: 'utf-16be',
1: 'utf-16be',
2: 'shift-jis',
0: 'utf_16be',
1: 'utf_16be',
2: 'shift_jis',
3: 'gb2312',
4: 'big5',
5: 'wansung',
5: 'wansung', # XXX This isn't a canonical Python encoding name
6: 'johab',
10: 'utf-16be',
10: 'utf_16be',
},
}

View File

@ -7,15 +7,15 @@ class EncodingTest(unittest.TestCase):
def test_encoding_unicode(self):
self.assertEqual(getEncoding(3, 0, None), "utf-16be") # MS Symbol is Unicode as well
self.assertEqual(getEncoding(3, 1, None), "utf-16be")
self.assertEqual(getEncoding(3, 10, None), "utf-16be")
self.assertEqual(getEncoding(0, 3, None), "utf-16be")
self.assertEqual(getEncoding(3, 0, None), "utf_16be") # MS Symbol is Unicode as well
self.assertEqual(getEncoding(3, 1, None), "utf_16be")
self.assertEqual(getEncoding(3, 10, None), "utf_16be")
self.assertEqual(getEncoding(0, 3, None), "utf_16be")
def test_encoding_macroman_misc(self):
self.assertEqual(getEncoding(1, 0, 17), "mac-turkish")
self.assertEqual(getEncoding(1, 0, 37), "x-mac-romanian-ttx")
self.assertEqual(getEncoding(1, 0, 45), "mac-roman")
self.assertEqual(getEncoding(1, 0, 17), "mac_turkish")
self.assertEqual(getEncoding(1, 0, 37), "x_mac_romanian_ttx")
self.assertEqual(getEncoding(1, 0, 45), "mac_roman")
def test_extended_mac_encodings(self):
encoding = getEncoding(1, 1, 0) # Mac Japanese

View File

@ -12,12 +12,12 @@ class CmapSubtableTest(unittest.TestCase):
def test_toUnicode_utf16be(self):
subtable = self.makeSubtable(0, 2, 7)
self.assertEqual("utf-16be", subtable.getEncoding())
self.assertEqual("utf_16be", subtable.getEncoding())
self.assertEqual(True, subtable.isUnicode())
def test_toUnicode_macroman(self):
subtable = self.makeSubtable(1, 0, 7) # MacRoman
self.assertEqual("mac-roman", subtable.getEncoding())
self.assertEqual("mac_roman", subtable.getEncoding())
self.assertEqual(False, subtable.isUnicode())
def test_toUnicode_macromanian(self):

View File

@ -102,7 +102,7 @@ class NameRecord(object):
return getEncoding(self.platformID, self.platEncID, self.langID, default)
def encodingIsUnicodeCompatible(self):
return self.getEncoding(None) in ['utf-16be', 'ucs2be', 'ascii', 'latin1']
return self.getEncoding(None) in ['utf_16be', 'ucs2be', 'ascii', 'latin1']
def __str__(self):
try:

View File

@ -14,30 +14,30 @@ class NameRecordTest(unittest.TestCase):
def test_toUnicode_utf16be(self):
name = self.makeName("Foo Bold", 111, 0, 2, 7)
self.assertEqual("utf-16be", name.getEncoding())
self.assertEqual("utf_16be", name.getEncoding())
self.assertEqual("Foo Bold", name.toUnicode())
def test_toUnicode_macroman(self):
name = self.makeName("Foo Italic", 222, 1, 0, 7) # MacRoman
self.assertEqual("mac-roman", name.getEncoding())
self.assertEqual("mac_roman", name.getEncoding())
self.assertEqual("Foo Italic", name.toUnicode())
def test_toUnicode_macromanian(self):
name = self.makeName(b"Foo Italic\xfb", 222, 1, 0, 37) # Mac Romanian
self.assertEqual("x-mac-romanian-ttx", name.getEncoding())
self.assertEqual("x_mac_romanian_ttx", name.getEncoding())
self.assertEqual("Foo Italic"+unichr(0x02DA), name.toUnicode())
def test_toUnicode_UnicodeDecodeError(self):
name = self.makeName("Foo Bold", 111, 0, 2, 7)
self.assertEqual("utf-16be", name.getEncoding())
name.string = b"X" # invalid utf-16be sequence
self.assertEqual("utf_16be", name.getEncoding())
name.string = b"X" # invalid utf_16be sequence
self.assertRaises(UnicodeDecodeError, name.toUnicode)
def toXML(self, name):
writer = XMLWriter(StringIO())
name.toXML(writer, ttFont=None)
xml = writer.file.getvalue().decode("utf-8").strip()
return xml.split(writer.newlinestr.decode("utf-8"))[1:]
xml = writer.file.getvalue().decode("utf_8").strip()
return xml.split(writer.newlinestr.decode("utf_8"))[1:]
def test_toXML_utf16be(self):
name = self.makeName("Foo Bold", 111, 0, 2, 7)
@ -73,11 +73,11 @@ class NameRecordTest(unittest.TestCase):
def test_encoding_macroman_misc(self):
name = self.makeName('', 123, 1, 0, 17) # Mac Turkish
self.assertEqual(name.getEncoding(), "mac-turkish")
self.assertEqual(name.getEncoding(), "mac_turkish")
name.langID = 37
self.assertEqual(name.getEncoding(), "x-mac-romanian-ttx")
self.assertEqual(name.getEncoding(), "x_mac_romanian_ttx")
name.langID = 45 # Other
self.assertEqual(name.getEncoding(), "mac-roman")
self.assertEqual(name.getEncoding(), "mac_roman")
def test_extended_mac_encodings(self):
name = self.makeName(b'\xfe', 123, 1, 1, 0) # Mac Japanese