Move getEncoding into fontTools.misc.encodingTools

Logic developed as part of:
https://github.com/behdad/fonttools/pull/208
This commit is contained in:
Behdad Esfahbod 2015-04-19 03:36:20 -07:00
parent 12c3f6ecd4
commit e561b758c1
3 changed files with 106 additions and 62 deletions

View File

@ -0,0 +1,73 @@
"""fontTools.misc.timeTools.py -- tools for working with OpenType encodings.
"""
from __future__ import print_function, division, absolute_import
from fontTools.misc.py23 import *
import fontTools.encodings.codecs
# Map keyed by platformID, then platEncID, then possibly langID
_encodingMap = {
0: { # Unicode
0: 'utf-16be',
1: 'utf-16be',
2: 'utf-16be',
3: 'utf-16be',
4: 'utf-16be',
5: 'utf-16be',
6: 'utf-16be',
},
1: { # Macintosh
# See
# https://github.com/behdad/fonttools/issues/236
0: { # Macintosh, platEncID==0, keyed by langID
15: "mac-iceland",
17: "mac-turkish",
18: "x-mac-croatian-ttx",
24: "mac-latin2",
25: "mac-latin2",
26: "mac-latin2",
27: "mac-latin2",
28: "mac-latin2",
36: "mac-latin2",
37: "x-mac-romanian-ttx",
38: "mac-latin2",
39: "mac-latin2",
40: "mac-latin2",
Ellipsis: 'mac-roman', # Other
},
1: 'x-mac-japanese-ttx',
2: 'x-mac-chinesetrad-ttx',
3: 'x-mac-korean-ttx',
6: 'mac-greek',
7: 'mac-cyrillic',
25: 'x-mac-chinesesimp-ttx',
29: 'mac-latin2',
35: 'mac-turkish',
37: 'mac-iceland',
},
2: { # ISO
0: 'ascii',
1: 'utf-16be',
2: 'latin1',
},
3: { # Microsoft
0: 'utf-16be',
1: 'utf-16be',
2: 'shift-jis',
3: 'gb2312',
4: 'big5',
5: 'wansung',
6: 'johab',
10: 'utf-16be',
},
}
def getEncoding(platformID, platEncID, langID, default=None):
"""Returns the Python encoding name for OpenType platformID/encodingID/langID
triplet. If encoding for these values is not known, by default None is
returned. That can be overriden by passing a value to the default argument.
"""
encoding = _encodingMap.get(platformID, {}).get(platEncID, default)
if isinstance(encoding, dict):
encoding = encoding.get(langID, encoding[Ellipsis])
return encoding

View File

@ -0,0 +1,31 @@
from __future__ import print_function, division, absolute_import, unicode_literals
from fontTools.misc.py23 import *
import unittest
from .encodingTools import getEncoding
class EncodingTest(unittest.TestCase):
def test_encoding_unicode(self):
self.assertEqual(getEncoding(3, 0, None), "utf-16be") # MS Symbol is Unicode as well
self.assertEqual(getEncoding(3, 1, None), "utf-16be")
self.assertEqual(getEncoding(3, 10, None), "utf-16be")
self.assertEqual(getEncoding(0, 3, None), "utf-16be")
def test_encoding_macroman_misc(self):
self.assertEqual(getEncoding(1, 0, 17), "mac-turkish")
self.assertEqual(getEncoding(1, 0, 37), "x-mac-romanian-ttx")
self.assertEqual(getEncoding(1, 0, 45), "mac-roman")
def test_extended_mac_encodings(self):
encoding = getEncoding(1, 1, 0) # Mac Japanese
decoded = b'\xfe'.decode(encoding)
self.assertEqual(decoded, unichr(0x2122))
def test_extended_unknown(self):
self.assertEqual(getEncoding(10, 11, 12), None)
self.assertEqual(getEncoding(10, 11, 12, "ascii"), "ascii")
self.assertEqual(getEncoding(10, 11, 12, default="ascii"), "ascii")
if __name__ == "__main__":
unittest.main()

View File

@ -2,7 +2,7 @@ from __future__ import print_function, division, absolute_import
from fontTools.misc.py23 import *
from fontTools.misc import sstruct
from fontTools.misc.textTools import safeEval
import fontTools.encodings.codecs
from fontTools.misc.encodingTools import getEncoding
from . import DefaultTable
import struct
@ -93,73 +93,13 @@ class table__n_a_m_e(DefaultTable.DefaultTable):
class NameRecord(object):
# Map keyed by platformID, then platEncID, then possibly langID
_encodingMap = {
0: { # Unicode
0: 'utf-16be',
1: 'utf-16be',
2: 'utf-16be',
3: 'utf-16be',
4: 'utf-16be',
5: 'utf-16be',
6: 'utf-16be',
},
1: { # Macintosh
# See
# https://github.com/behdad/fonttools/issues/236
0: { # Macintosh, platEncID==0, keyed by langID
15: "mac-iceland",
17: "mac-turkish",
18: "x-mac-croatian-ttx",
24: "mac-latin2",
25: "mac-latin2",
26: "mac-latin2",
27: "mac-latin2",
28: "mac-latin2",
36: "mac-latin2",
37: "x-mac-romanian-ttx",
38: "mac-latin2",
39: "mac-latin2",
40: "mac-latin2",
Ellipsis: 'mac-roman', # Other
},
1: 'x-mac-japanese-ttx',
2: 'x-mac-chinesetrad-ttx',
3: 'x-mac-korean-ttx',
6: 'mac-greek',
7: 'mac-cyrillic',
25: 'x-mac-chinesesimp-ttx',
29: 'mac-latin2',
35: 'mac-turkish',
37: 'mac-iceland',
},
2: { # ISO
0: 'ascii',
1: 'utf-16be',
2: 'latin1',
},
3: { # Microsoft
0: 'utf-16be',
1: 'utf-16be',
2: 'shift-jis',
3: 'gb2312',
4: 'big5',
5: 'wansung',
6: 'johab',
10: 'utf-16be',
},
}
def getEncoding(self, default='ascii'):
"""Returns the Python encoding name for this name entry based on its platformID,
platEncID, and langID. If encoding for these values is not known, by default
'ascii' is returned. That can be overriden by passing a value to the default
argument.
"""
encoding = self._encodingMap.get(self.platformID, {}).get(self.platEncID, default)
if isinstance(encoding, dict):
encoding = encoding.get(self.langID, encoding[Ellipsis])
return encoding
return getEncoding(self.platformID, self.platEncID, self.langID, default)
def encodingIsUnicodeCompatible(self):
return self.getEncoding(None) in ['utf-16be', 'ucs2be', 'ascii', 'latin1']