Move getEncoding into fontTools.misc.encodingTools
Logic developed as part of: https://github.com/behdad/fonttools/pull/208
This commit is contained in:
parent
12c3f6ecd4
commit
e561b758c1
73
Lib/fontTools/misc/encodingTools.py
Normal file
73
Lib/fontTools/misc/encodingTools.py
Normal file
@ -0,0 +1,73 @@
|
||||
"""fontTools.misc.timeTools.py -- tools for working with OpenType encodings.
|
||||
"""
|
||||
|
||||
from __future__ import print_function, division, absolute_import
|
||||
from fontTools.misc.py23 import *
|
||||
import fontTools.encodings.codecs
|
||||
|
||||
# Map keyed by platformID, then platEncID, then possibly langID
|
||||
_encodingMap = {
|
||||
0: { # Unicode
|
||||
0: 'utf-16be',
|
||||
1: 'utf-16be',
|
||||
2: 'utf-16be',
|
||||
3: 'utf-16be',
|
||||
4: 'utf-16be',
|
||||
5: 'utf-16be',
|
||||
6: 'utf-16be',
|
||||
},
|
||||
1: { # Macintosh
|
||||
# See
|
||||
# https://github.com/behdad/fonttools/issues/236
|
||||
0: { # Macintosh, platEncID==0, keyed by langID
|
||||
15: "mac-iceland",
|
||||
17: "mac-turkish",
|
||||
18: "x-mac-croatian-ttx",
|
||||
24: "mac-latin2",
|
||||
25: "mac-latin2",
|
||||
26: "mac-latin2",
|
||||
27: "mac-latin2",
|
||||
28: "mac-latin2",
|
||||
36: "mac-latin2",
|
||||
37: "x-mac-romanian-ttx",
|
||||
38: "mac-latin2",
|
||||
39: "mac-latin2",
|
||||
40: "mac-latin2",
|
||||
Ellipsis: 'mac-roman', # Other
|
||||
},
|
||||
1: 'x-mac-japanese-ttx',
|
||||
2: 'x-mac-chinesetrad-ttx',
|
||||
3: 'x-mac-korean-ttx',
|
||||
6: 'mac-greek',
|
||||
7: 'mac-cyrillic',
|
||||
25: 'x-mac-chinesesimp-ttx',
|
||||
29: 'mac-latin2',
|
||||
35: 'mac-turkish',
|
||||
37: 'mac-iceland',
|
||||
},
|
||||
2: { # ISO
|
||||
0: 'ascii',
|
||||
1: 'utf-16be',
|
||||
2: 'latin1',
|
||||
},
|
||||
3: { # Microsoft
|
||||
0: 'utf-16be',
|
||||
1: 'utf-16be',
|
||||
2: 'shift-jis',
|
||||
3: 'gb2312',
|
||||
4: 'big5',
|
||||
5: 'wansung',
|
||||
6: 'johab',
|
||||
10: 'utf-16be',
|
||||
},
|
||||
}
|
||||
|
||||
def getEncoding(platformID, platEncID, langID, default=None):
|
||||
"""Returns the Python encoding name for OpenType platformID/encodingID/langID
|
||||
triplet. If encoding for these values is not known, by default None is
|
||||
returned. That can be overriden by passing a value to the default argument.
|
||||
"""
|
||||
encoding = _encodingMap.get(platformID, {}).get(platEncID, default)
|
||||
if isinstance(encoding, dict):
|
||||
encoding = encoding.get(langID, encoding[Ellipsis])
|
||||
return encoding
|
31
Lib/fontTools/misc/encodingTools_test.py
Normal file
31
Lib/fontTools/misc/encodingTools_test.py
Normal file
@ -0,0 +1,31 @@
|
||||
from __future__ import print_function, division, absolute_import, unicode_literals
|
||||
from fontTools.misc.py23 import *
|
||||
import unittest
|
||||
from .encodingTools import getEncoding
|
||||
|
||||
class EncodingTest(unittest.TestCase):
|
||||
|
||||
def test_encoding_unicode(self):
|
||||
|
||||
self.assertEqual(getEncoding(3, 0, None), "utf-16be") # MS Symbol is Unicode as well
|
||||
self.assertEqual(getEncoding(3, 1, None), "utf-16be")
|
||||
self.assertEqual(getEncoding(3, 10, None), "utf-16be")
|
||||
self.assertEqual(getEncoding(0, 3, None), "utf-16be")
|
||||
|
||||
def test_encoding_macroman_misc(self):
|
||||
self.assertEqual(getEncoding(1, 0, 17), "mac-turkish")
|
||||
self.assertEqual(getEncoding(1, 0, 37), "x-mac-romanian-ttx")
|
||||
self.assertEqual(getEncoding(1, 0, 45), "mac-roman")
|
||||
|
||||
def test_extended_mac_encodings(self):
|
||||
encoding = getEncoding(1, 1, 0) # Mac Japanese
|
||||
decoded = b'\xfe'.decode(encoding)
|
||||
self.assertEqual(decoded, unichr(0x2122))
|
||||
|
||||
def test_extended_unknown(self):
|
||||
self.assertEqual(getEncoding(10, 11, 12), None)
|
||||
self.assertEqual(getEncoding(10, 11, 12, "ascii"), "ascii")
|
||||
self.assertEqual(getEncoding(10, 11, 12, default="ascii"), "ascii")
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
@ -2,7 +2,7 @@ from __future__ import print_function, division, absolute_import
|
||||
from fontTools.misc.py23 import *
|
||||
from fontTools.misc import sstruct
|
||||
from fontTools.misc.textTools import safeEval
|
||||
import fontTools.encodings.codecs
|
||||
from fontTools.misc.encodingTools import getEncoding
|
||||
from . import DefaultTable
|
||||
import struct
|
||||
|
||||
@ -93,73 +93,13 @@ class table__n_a_m_e(DefaultTable.DefaultTable):
|
||||
|
||||
class NameRecord(object):
|
||||
|
||||
# Map keyed by platformID, then platEncID, then possibly langID
|
||||
_encodingMap = {
|
||||
0: { # Unicode
|
||||
0: 'utf-16be',
|
||||
1: 'utf-16be',
|
||||
2: 'utf-16be',
|
||||
3: 'utf-16be',
|
||||
4: 'utf-16be',
|
||||
5: 'utf-16be',
|
||||
6: 'utf-16be',
|
||||
},
|
||||
1: { # Macintosh
|
||||
# See
|
||||
# https://github.com/behdad/fonttools/issues/236
|
||||
0: { # Macintosh, platEncID==0, keyed by langID
|
||||
15: "mac-iceland",
|
||||
17: "mac-turkish",
|
||||
18: "x-mac-croatian-ttx",
|
||||
24: "mac-latin2",
|
||||
25: "mac-latin2",
|
||||
26: "mac-latin2",
|
||||
27: "mac-latin2",
|
||||
28: "mac-latin2",
|
||||
36: "mac-latin2",
|
||||
37: "x-mac-romanian-ttx",
|
||||
38: "mac-latin2",
|
||||
39: "mac-latin2",
|
||||
40: "mac-latin2",
|
||||
Ellipsis: 'mac-roman', # Other
|
||||
},
|
||||
1: 'x-mac-japanese-ttx',
|
||||
2: 'x-mac-chinesetrad-ttx',
|
||||
3: 'x-mac-korean-ttx',
|
||||
6: 'mac-greek',
|
||||
7: 'mac-cyrillic',
|
||||
25: 'x-mac-chinesesimp-ttx',
|
||||
29: 'mac-latin2',
|
||||
35: 'mac-turkish',
|
||||
37: 'mac-iceland',
|
||||
},
|
||||
2: { # ISO
|
||||
0: 'ascii',
|
||||
1: 'utf-16be',
|
||||
2: 'latin1',
|
||||
},
|
||||
3: { # Microsoft
|
||||
0: 'utf-16be',
|
||||
1: 'utf-16be',
|
||||
2: 'shift-jis',
|
||||
3: 'gb2312',
|
||||
4: 'big5',
|
||||
5: 'wansung',
|
||||
6: 'johab',
|
||||
10: 'utf-16be',
|
||||
},
|
||||
}
|
||||
|
||||
def getEncoding(self, default='ascii'):
|
||||
"""Returns the Python encoding name for this name entry based on its platformID,
|
||||
platEncID, and langID. If encoding for these values is not known, by default
|
||||
'ascii' is returned. That can be overriden by passing a value to the default
|
||||
argument.
|
||||
"""
|
||||
encoding = self._encodingMap.get(self.platformID, {}).get(self.platEncID, default)
|
||||
if isinstance(encoding, dict):
|
||||
encoding = encoding.get(self.langID, encoding[Ellipsis])
|
||||
return encoding
|
||||
return getEncoding(self.platformID, self.platEncID, self.langID, default)
|
||||
|
||||
def encodingIsUnicodeCompatible(self):
|
||||
return self.getEncoding(None) in ['utf-16be', 'ucs2be', 'ascii', 'latin1']
|
||||
|
Loading…
x
Reference in New Issue
Block a user