Merge pull request #349 from brawer/glyphnames-beyond-bmp

Support non-BMP characters for synthetic glyph names
2015-09-09 15:03:06 +02:00 · 2015-09-09 15:03:06 +02:00 · 5f5b07b8d7
commit 5f5b07b8d7
parent 67cef432e1 84e722cb6a
5 changed files with 75 additions and 44 deletions
--- a/Lib/fontTools/ttLib/init.py
+++ b/Lib/fontTools/ttLib/init.py
@ -511,47 +511,44 @@ class TTFont(object):
 		# Set the glyph order, so the cmap parser has something
 		# to work with (so we don't get called recursively).
 		self.glyphOrder = glyphOrder
-		# Get a (new) temporary cmap (based on the just invented names)
-		try:
-			tempcmap = self['cmap'].getcmap(3, 1)
-		except KeyError:
-			tempcmap = None
-		if tempcmap is not None:
-			# we have a unicode cmap
-			from fontTools import agl
-			cmap = tempcmap.cmap
-			# create a reverse cmap dict
-			reversecmap = {}
-			for unicode, name in list(cmap.items()):
-				reversecmap[name] = unicode
-			allNames = {}
-			for i in range(numGlyphs):
-				tempName = glyphOrder[i]
-				if tempName in reversecmap:
-					unicode = reversecmap[tempName]
-					if unicode in agl.UV2AGL:
-						# get name from the Adobe Glyph List
-						glyphName = agl.UV2AGL[unicode]
-					else:
-						# create uni<CODE> name
-						glyphName = "uni%04X" % unicode
-					tempName = glyphName
-					n = allNames.get(tempName, 0)
-					if n:
-						tempName = glyphName + "#" + str(n)
-					glyphOrder[i] = tempName
-					allNames[tempName] = n + 1
-			# Delete the temporary cmap table from the cache, so it can
-			# be parsed again with the right names.
-			del self.tables['cmap']
-		else:
-			pass # no unicode cmap available, stick with the invented names
+
+		# Make up glyph names based on the reversed cmap table. Because some
+		# glyphs (eg. ligatures or alternates) may not be reachable via cmap,
+		# this naming table will usually not cover all glyphs in the font.
+		# If the font has no Unicode cmap table, reversecmap will be empty.
+		reversecmap = self['cmap'].buildReversed()
+		useCount = {}
+		for i in range(numGlyphs):
+			tempName = glyphOrder[i]
+			if tempName in reversecmap:
+				# If a font maps both U+0041 LATIN CAPITAL LETTER A and
+				# U+0391 GREEK CAPITAL LETTER ALPHA to the same glyph,
+				# we prefer naming the glyph as "A".
+				glyphName = self._makeGlyphName(min(reversecmap[tempName]))
+				numUses = useCount[glyphName] = useCount.get(glyphName, 0) + 1
+				if numUses > 1:
+					glyphName = "%s.alt%d" % (glyphName, numUses - 1)
+				glyphOrder[i] = glyphName
+
+		# Delete the temporary cmap table from the cache, so it can
+		# be parsed again with the right names.
+		del self.tables['cmap']
 		self.glyphOrder = glyphOrder
 		if cmapLoading:
 			# restore partially loaded cmap, so it can continue loading
 			# using the proper names.
 			self.tables['cmap'] = cmapLoading

+	@staticmethod
+	def _makeGlyphName(codepoint):
+		from fontTools import agl  # Adobe Glyph List
+		if codepoint in agl.UV2AGL:
+			return agl.UV2AGL[codepoint]
+		elif codepoint <= 0xFFFF:
+			return "uni%04X" % codepoint
+		else:
+			return "u%X" % codepoint
+
 	def getGlyphNames(self):
 		"""Get a list of glyph names, sorted alphabetically."""
 		glyphNames = sorted(self.getGlyphOrder()[:])
--- a/Lib/fontTools/ttLib/tables/_c_m_a_p.py
+++ b/Lib/fontTools/ttLib/tables/_c_m_a_p.py
@ -20,6 +20,21 @@ class table__c_m_a_p(DefaultTable.DefaultTable):
 				return subtable
 		return None # not found

+	def buildReversed(self):
+		"""Returns a reverse cmap such as {'one':{0x31}, 'A':{0x41,0x391}}.
+
+		The values are sets of Unicode codepoints because
+		some fonts map different codepoints to the same glyph.
+		For example, U+0041 LATIN CAPITAL LETTER A and U+0391
+		GREEK CAPITAL LETTER ALPHA are sometimes the same glyph.
+		"""
+		result = {}
+		for subtable in self.tables:
+			if subtable.isUnicode():
+				for codepoint, name in subtable.cmap.items():
+					result.setdefault(name, set()).add(codepoint)
+		return result
+
 	def decompile(self, data, ttFont):
 		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
 		self.tableVersion = int(tableVersion)
--- a/Lib/fontTools/ttLib/tables/_c_m_a_p_test.py
+++ b/Lib/fontTools/ttLib/tables/_c_m_a_p_test.py
@ -2,37 +2,37 @@ from __future__ import print_function, division, absolute_import, unicode_litera
 from fontTools.misc.py23 import *
 from fontTools import ttLib
 import unittest
-from ._c_m_a_p import CmapSubtable
+from ._c_m_a_p import CmapSubtable, table__c_m_a_p

 class CmapSubtableTest(unittest.TestCase):

-	def makeSubtable(self, platformID, platEncID, langID):
-		subtable = CmapSubtable(None)
+	def makeSubtable(self, cmapFormat, platformID, platEncID, langID):
+		subtable = CmapSubtable.newSubtable(cmapFormat)
 		subtable.platformID, subtable.platEncID, subtable.language = (platformID, platEncID, langID)
 		return subtable

 	def test_toUnicode_utf16be(self):
-		subtable = self.makeSubtable(0, 2, 7)
+		subtable = self.makeSubtable(4, 0, 2, 7)
 		self.assertEqual("utf_16_be", subtable.getEncoding())
 		self.assertEqual(True, subtable.isUnicode())

 	def test_toUnicode_macroman(self):
-		subtable = self.makeSubtable(1, 0, 7)  # MacRoman
+		subtable = self.makeSubtable(4, 1, 0, 7)  # MacRoman
 		self.assertEqual("mac_roman", subtable.getEncoding())
 		self.assertEqual(False, subtable.isUnicode())

 	def test_toUnicode_macromanian(self):
-		subtable = self.makeSubtable(1, 0, 37)  # Mac Romanian
+		subtable = self.makeSubtable(4, 1, 0, 37)  # Mac Romanian
 		self.assertNotEqual(None, subtable.getEncoding())
 		self.assertEqual(False, subtable.isUnicode())

 	def test_extended_mac_encodings(self):
-		subtable = self.makeSubtable(1, 1, 0) # Mac Japanese
+		subtable = self.makeSubtable(4, 1, 1, 0) # Mac Japanese
 		self.assertNotEqual(None, subtable.getEncoding())
 		self.assertEqual(False, subtable.isUnicode())

 	def test_extended_unknown(self):
-		subtable = self.makeSubtable(10, 11, 12)
+		subtable = self.makeSubtable(4, 10, 11, 12)
 		self.assertEqual(subtable.getEncoding(), None)
 		self.assertEqual(subtable.getEncoding("ascii"), "ascii")
 		self.assertEqual(subtable.getEncoding(default="xyz"), "xyz")
@ -49,5 +49,15 @@ class CmapSubtableTest(unittest.TestCase):
 		font.setGlyphOrder([])
 		subtable.decompile(b'\0' * 7 + b'\x10' + b'\0' * 8, font)

+	def test_buildReversed(self):
+		c4 = self.makeSubtable(4, 3, 1, 0)
+		c4.cmap = {0x0041:'A', 0x0391:'A'}
+		c12 = self.makeSubtable(12, 3, 10, 0)
+		c12.cmap = {0x10314: 'u10314'}
+		cmap = table__c_m_a_p()
+		cmap.tables = [c4, c12]
+		self.assertEqual(cmap.buildReversed(), {'A':{0x0041, 0x0391}, 'u10314':{0x10314}})
+
+
 if __name__ == "__main__":
 	unittest.main()
--- a/Lib/fontTools/ttLib/woff2.py
+++ b/Lib/fontTools/ttLib/woff2.py
@ -227,7 +227,14 @@ class WOFF2Writer(SFNTWriter):
 		"""
 		if self.sfntVersion == "OTTO":
 			return
-		for tag in ('maxp', 'head', 'loca', 'glyf'):
+
+		# make up glyph names required to decompile glyf table
+		self._decompileTable('maxp')
+		numGlyphs = self.ttFont['maxp'].numGlyphs
+		glyphOrder = ['.notdef'] + ["glyph%.5d" % i for i in range(1, numGlyphs)]
+		self.ttFont.setGlyphOrder(glyphOrder)
+
+		for tag in ('head', 'loca', 'glyf'):
 			self._decompileTable(tag)
 		self.ttFont['glyf'].padding = padding
 		for tag in ('glyf', 'loca'):
--- a/Lib/fontTools/ttLib/woff2_test.py
+++ b/Lib/fontTools/ttLib/woff2_test.py
@ -611,9 +611,11 @@ class WOFF2GlyfTableTest(unittest.TestCase):
 		reader = WOFF2Reader(infile)
 		cls.transformedGlyfData = reader.tables['glyf'].loadData(
 			reader.transformBuffer)
+		cls.glyphOrder = ['.notdef'] + ["glyph%.5d" % i for i in range(1, font['maxp'].numGlyphs)]

 	def setUp(self):
 		self.font = font = ttLib.TTFont(recalcBBoxes=False, recalcTimestamp=False)
+		font.setGlyphOrder(self.glyphOrder)
 		font['head'] = ttLib.getTableClass('head')()
 		font['maxp'] = ttLib.getTableClass('maxp')()
 		font['loca'] = WOFF2LocaTable()