This removes our internal copy of Unicode names database! Adds new API fontTools.unicode.setUnicodeData() that can be called with a filename or file object corresponding to a UnicodeData.txt file, which will consequently used for Unicode character name mapping. This is useful for using newer Unicode database than the one that comes with the builtin Python module. This also changes behavior such that control characters, Hangul syllables, Han chars, etc, get no name with custom UnicodeData.txt. We may revisit this though. Filed https://github.com/behdad/fonttools/issues/82 to add option to ttx to pass custom UnicodeData.txt. Fixes https://github.com/behdad/fonttools/issues/81
42 lines
788 B
Python
42 lines
788 B
Python
|
|
def _makeunicodes(f):
|
|
import re
|
|
lines = iter(f.readlines())
|
|
unicodes = {}
|
|
for line in lines:
|
|
if not line: continue
|
|
num, name = line.split(';')[:2]
|
|
if name[0] == '<': continue # "<control>", etc.
|
|
num = int(num, 16)
|
|
unicodes[num] = name
|
|
return unicodes
|
|
|
|
|
|
class _UnicodeCustom(object):
|
|
|
|
def __init__(self, f):
|
|
if isinstance(f, basestring):
|
|
f = open(f)
|
|
self.codes = _makeunicodes(f)
|
|
|
|
def __getitem__(self, charCode):
|
|
try:
|
|
return self.codes[charCode]
|
|
except KeyError:
|
|
return "????"
|
|
|
|
class _UnicodeBuiltin(object):
|
|
|
|
def __getitem__(self, charCode):
|
|
import unicodedata
|
|
try:
|
|
return unicodedata.name(unichr(charCode))
|
|
except ValueError:
|
|
return "????"
|
|
|
|
Unicode = _UnicodeBuiltin()
|
|
|
|
def setUnicodeData(f):
|
|
global Unicode
|
|
Unicode = _UnicodeCustom(f)
|