Merge pull request #349 from brawer/glyphnames-beyond-bmp
Support non-BMP characters for synthetic glyph names
This commit is contained in:
commit
5f5b07b8d7
@ -511,47 +511,44 @@ class TTFont(object):
|
||||
# Set the glyph order, so the cmap parser has something
|
||||
# to work with (so we don't get called recursively).
|
||||
self.glyphOrder = glyphOrder
|
||||
# Get a (new) temporary cmap (based on the just invented names)
|
||||
try:
|
||||
tempcmap = self['cmap'].getcmap(3, 1)
|
||||
except KeyError:
|
||||
tempcmap = None
|
||||
if tempcmap is not None:
|
||||
# we have a unicode cmap
|
||||
from fontTools import agl
|
||||
cmap = tempcmap.cmap
|
||||
# create a reverse cmap dict
|
||||
reversecmap = {}
|
||||
for unicode, name in list(cmap.items()):
|
||||
reversecmap[name] = unicode
|
||||
allNames = {}
|
||||
for i in range(numGlyphs):
|
||||
tempName = glyphOrder[i]
|
||||
if tempName in reversecmap:
|
||||
unicode = reversecmap[tempName]
|
||||
if unicode in agl.UV2AGL:
|
||||
# get name from the Adobe Glyph List
|
||||
glyphName = agl.UV2AGL[unicode]
|
||||
else:
|
||||
# create uni<CODE> name
|
||||
glyphName = "uni%04X" % unicode
|
||||
tempName = glyphName
|
||||
n = allNames.get(tempName, 0)
|
||||
if n:
|
||||
tempName = glyphName + "#" + str(n)
|
||||
glyphOrder[i] = tempName
|
||||
allNames[tempName] = n + 1
|
||||
# Delete the temporary cmap table from the cache, so it can
|
||||
# be parsed again with the right names.
|
||||
del self.tables['cmap']
|
||||
else:
|
||||
pass # no unicode cmap available, stick with the invented names
|
||||
|
||||
# Make up glyph names based on the reversed cmap table. Because some
|
||||
# glyphs (eg. ligatures or alternates) may not be reachable via cmap,
|
||||
# this naming table will usually not cover all glyphs in the font.
|
||||
# If the font has no Unicode cmap table, reversecmap will be empty.
|
||||
reversecmap = self['cmap'].buildReversed()
|
||||
useCount = {}
|
||||
for i in range(numGlyphs):
|
||||
tempName = glyphOrder[i]
|
||||
if tempName in reversecmap:
|
||||
# If a font maps both U+0041 LATIN CAPITAL LETTER A and
|
||||
# U+0391 GREEK CAPITAL LETTER ALPHA to the same glyph,
|
||||
# we prefer naming the glyph as "A".
|
||||
glyphName = self._makeGlyphName(min(reversecmap[tempName]))
|
||||
numUses = useCount[glyphName] = useCount.get(glyphName, 0) + 1
|
||||
if numUses > 1:
|
||||
glyphName = "%s.alt%d" % (glyphName, numUses - 1)
|
||||
glyphOrder[i] = glyphName
|
||||
|
||||
# Delete the temporary cmap table from the cache, so it can
|
||||
# be parsed again with the right names.
|
||||
del self.tables['cmap']
|
||||
self.glyphOrder = glyphOrder
|
||||
if cmapLoading:
|
||||
# restore partially loaded cmap, so it can continue loading
|
||||
# using the proper names.
|
||||
self.tables['cmap'] = cmapLoading
|
||||
|
||||
@staticmethod
|
||||
def _makeGlyphName(codepoint):
|
||||
from fontTools import agl # Adobe Glyph List
|
||||
if codepoint in agl.UV2AGL:
|
||||
return agl.UV2AGL[codepoint]
|
||||
elif codepoint <= 0xFFFF:
|
||||
return "uni%04X" % codepoint
|
||||
else:
|
||||
return "u%X" % codepoint
|
||||
|
||||
def getGlyphNames(self):
|
||||
"""Get a list of glyph names, sorted alphabetically."""
|
||||
glyphNames = sorted(self.getGlyphOrder()[:])
|
||||
|
@ -20,6 +20,21 @@ class table__c_m_a_p(DefaultTable.DefaultTable):
|
||||
return subtable
|
||||
return None # not found
|
||||
|
||||
def buildReversed(self):
|
||||
"""Returns a reverse cmap such as {'one':{0x31}, 'A':{0x41,0x391}}.
|
||||
|
||||
The values are sets of Unicode codepoints because
|
||||
some fonts map different codepoints to the same glyph.
|
||||
For example, U+0041 LATIN CAPITAL LETTER A and U+0391
|
||||
GREEK CAPITAL LETTER ALPHA are sometimes the same glyph.
|
||||
"""
|
||||
result = {}
|
||||
for subtable in self.tables:
|
||||
if subtable.isUnicode():
|
||||
for codepoint, name in subtable.cmap.items():
|
||||
result.setdefault(name, set()).add(codepoint)
|
||||
return result
|
||||
|
||||
def decompile(self, data, ttFont):
|
||||
tableVersion, numSubTables = struct.unpack(">HH", data[:4])
|
||||
self.tableVersion = int(tableVersion)
|
||||
|
@ -2,37 +2,37 @@ from __future__ import print_function, division, absolute_import, unicode_litera
|
||||
from fontTools.misc.py23 import *
|
||||
from fontTools import ttLib
|
||||
import unittest
|
||||
from ._c_m_a_p import CmapSubtable
|
||||
from ._c_m_a_p import CmapSubtable, table__c_m_a_p
|
||||
|
||||
class CmapSubtableTest(unittest.TestCase):
|
||||
|
||||
def makeSubtable(self, platformID, platEncID, langID):
|
||||
subtable = CmapSubtable(None)
|
||||
def makeSubtable(self, cmapFormat, platformID, platEncID, langID):
|
||||
subtable = CmapSubtable.newSubtable(cmapFormat)
|
||||
subtable.platformID, subtable.platEncID, subtable.language = (platformID, platEncID, langID)
|
||||
return subtable
|
||||
|
||||
def test_toUnicode_utf16be(self):
|
||||
subtable = self.makeSubtable(0, 2, 7)
|
||||
subtable = self.makeSubtable(4, 0, 2, 7)
|
||||
self.assertEqual("utf_16_be", subtable.getEncoding())
|
||||
self.assertEqual(True, subtable.isUnicode())
|
||||
|
||||
def test_toUnicode_macroman(self):
|
||||
subtable = self.makeSubtable(1, 0, 7) # MacRoman
|
||||
subtable = self.makeSubtable(4, 1, 0, 7) # MacRoman
|
||||
self.assertEqual("mac_roman", subtable.getEncoding())
|
||||
self.assertEqual(False, subtable.isUnicode())
|
||||
|
||||
def test_toUnicode_macromanian(self):
|
||||
subtable = self.makeSubtable(1, 0, 37) # Mac Romanian
|
||||
subtable = self.makeSubtable(4, 1, 0, 37) # Mac Romanian
|
||||
self.assertNotEqual(None, subtable.getEncoding())
|
||||
self.assertEqual(False, subtable.isUnicode())
|
||||
|
||||
def test_extended_mac_encodings(self):
|
||||
subtable = self.makeSubtable(1, 1, 0) # Mac Japanese
|
||||
subtable = self.makeSubtable(4, 1, 1, 0) # Mac Japanese
|
||||
self.assertNotEqual(None, subtable.getEncoding())
|
||||
self.assertEqual(False, subtable.isUnicode())
|
||||
|
||||
def test_extended_unknown(self):
|
||||
subtable = self.makeSubtable(10, 11, 12)
|
||||
subtable = self.makeSubtable(4, 10, 11, 12)
|
||||
self.assertEqual(subtable.getEncoding(), None)
|
||||
self.assertEqual(subtable.getEncoding("ascii"), "ascii")
|
||||
self.assertEqual(subtable.getEncoding(default="xyz"), "xyz")
|
||||
@ -49,5 +49,15 @@ class CmapSubtableTest(unittest.TestCase):
|
||||
font.setGlyphOrder([])
|
||||
subtable.decompile(b'\0' * 7 + b'\x10' + b'\0' * 8, font)
|
||||
|
||||
def test_buildReversed(self):
|
||||
c4 = self.makeSubtable(4, 3, 1, 0)
|
||||
c4.cmap = {0x0041:'A', 0x0391:'A'}
|
||||
c12 = self.makeSubtable(12, 3, 10, 0)
|
||||
c12.cmap = {0x10314: 'u10314'}
|
||||
cmap = table__c_m_a_p()
|
||||
cmap.tables = [c4, c12]
|
||||
self.assertEqual(cmap.buildReversed(), {'A':{0x0041, 0x0391}, 'u10314':{0x10314}})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
@ -227,7 +227,14 @@ class WOFF2Writer(SFNTWriter):
|
||||
"""
|
||||
if self.sfntVersion == "OTTO":
|
||||
return
|
||||
for tag in ('maxp', 'head', 'loca', 'glyf'):
|
||||
|
||||
# make up glyph names required to decompile glyf table
|
||||
self._decompileTable('maxp')
|
||||
numGlyphs = self.ttFont['maxp'].numGlyphs
|
||||
glyphOrder = ['.notdef'] + ["glyph%.5d" % i for i in range(1, numGlyphs)]
|
||||
self.ttFont.setGlyphOrder(glyphOrder)
|
||||
|
||||
for tag in ('head', 'loca', 'glyf'):
|
||||
self._decompileTable(tag)
|
||||
self.ttFont['glyf'].padding = padding
|
||||
for tag in ('glyf', 'loca'):
|
||||
|
@ -611,9 +611,11 @@ class WOFF2GlyfTableTest(unittest.TestCase):
|
||||
reader = WOFF2Reader(infile)
|
||||
cls.transformedGlyfData = reader.tables['glyf'].loadData(
|
||||
reader.transformBuffer)
|
||||
cls.glyphOrder = ['.notdef'] + ["glyph%.5d" % i for i in range(1, font['maxp'].numGlyphs)]
|
||||
|
||||
def setUp(self):
|
||||
self.font = font = ttLib.TTFont(recalcBBoxes=False, recalcTimestamp=False)
|
||||
font.setGlyphOrder(self.glyphOrder)
|
||||
font['head'] = ttLib.getTableClass('head')()
|
||||
font['maxp'] = ttLib.getTableClass('maxp')()
|
||||
font['loca'] = WOFF2LocaTable()
|
||||
|
Loading…
x
Reference in New Issue
Block a user