diff --git a/Lib/fontTools/ttLib/tables/_n_a_m_e.py b/Lib/fontTools/ttLib/tables/_n_a_m_e.py index 12270d899..97269e9c8 100644 --- a/Lib/fontTools/ttLib/tables/_n_a_m_e.py +++ b/Lib/fontTools/ttLib/tables/_n_a_m_e.py @@ -161,32 +161,41 @@ class table__n_a_m_e(DefaultTable.DefaultTable): raise ValueError("nameID must be less than 32768") return nameID - def addMultilingualName(self, names, ttFont, nameID=None): + def addMultilingualName(self, names, ttFont=None, nameID=None): + """Add a multilingual name, returning its name ID + + 'names' is a dictionary with the name in multiple languages, + such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}. + The keys can be arbitrary IETF BCP 47 language codes; + the values are Unicode strings. + + 'ttFont' is the TTFont to which the names are added, or None. + If present, the font's 'ltag' table can get populated + to store exotic language codes, which allows encoding + names that otherwise cannot get encoded at all. + + 'nameID' is the name ID to be used, or None to let the library + pick an unused name ID. + """ if not hasattr(self, 'names'): self.names = [] if nameID is None: nameID = self._findUnusedNameID() + # TODO: Should minimize BCP 47 language codes. + # https://github.com/fonttools/fonttools/issues/930 for lang, name in sorted(names.items()): - # Add a Macintosh name. See section “The language identifier” in - # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html - macLang = _MAC_LANGUAGE_CODES.get(lang.lower()) - if macLang is not None: - macScript = _MAC_LANGUAGE_TO_SCRIPT[macLang] - self.names.append(makeName(name, nameID, 1, macScript, macLang)) + # Apple platforms have been recognizing Windows names + # since early OSX (~2001), so we only add names + # for the Macintosh platform when we cannot not make + # a Windows name. This can happen for exotic BCP47 + # language tags that have no Windows language code. + windowsName = _makeWindowsName(name, nameID, lang) + if windowsName is not None: + self.names.append(windowsName) else: - ltag = ttFont.tables.get("ltag") - if ltag is None: - ltag = ttFont["ltag"] = newTable("ltag") - # 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)” - # “The preferred platform-specific code for Unicode would be 3 or 4.” - # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html - self.names.append(makeName(name, nameID, 0, 4, ltag.addTag(lang))) - # Add a Windows name. - windowsLang = _WINDOWS_LANGUAGE_CODES.get(lang.lower()) - if windowsLang is not None: - self.names.append(makeName(name, nameID, 3, 1, windowsLang)) - else: - log.warning("cannot add name in language %s because fonttools does not yet support name table format 1" % lang) + macName = _makeMacName(name, nameID, lang, ttFont) + if macName is not None: + self.names.append(macName) return nameID def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255): @@ -224,6 +233,76 @@ def makeName(string, nameID, platformID, platEncID, langID): return name +def _makeWindowsName(name, nameID, language): + """Create a NameRecord for the Microsoft Windows platform + + 'language' is an arbitrary IETF BCP 47 language identifier such + as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows + does not support the desired language, the result will be None. + Future versions of fonttools might return a NameRecord for the + OpenType 'name' table format 1, but this is not implemented yet. + """ + langID = _WINDOWS_LANGUAGE_CODES.get(language.lower()) + if langID is not None: + return makeName(name, nameID, 3, 1, langID) + else: + log.warning("cannot add Windows name in language %s " + "because fonttools does not yet support " + "name table format 1" % language) + return None + + +def _makeMacName(name, nameID, language, font=None): + """Create a NameRecord for Apple platforms + + 'language' is an arbitrary IETF BCP 47 language identifier such + as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we + create a Macintosh NameRecord that is understood by old applications + (platform ID 1 and an old-style Macintosh language enum). If this + is not possible, we create a Unicode NameRecord (platform ID 0) + whose language points to the font’s 'ltag' table. The latter + can encode any string in any language, but legacy applications + might not recognize the format (in which case they will ignore + those names). + + 'font' should be the TTFont for which you want to create a name. + If 'font' is None, we only return NameRecords for legacy Macintosh; + in that case, the result will be None for names that need to + be encoded with an 'ltag' table. + + See the section “The language identifier” in Apple’s specification: + https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html + """ + macLang = _MAC_LANGUAGE_CODES.get(language.lower()) + macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang) + if macLang is not None and macScript is not None: + encoding = getEncoding(1, macScript, macLang, default="ascii") + # Check if we can actually encode this name. If we can't, + # for example because we have no support for the legacy + # encoding, or because the name string contains Unicode + # characters that the legacy encoding cannot represent, + # we fall back to encoding the name in Unicode and put + # the language tag into the ltag table. + try: + _ = tobytes(name, encoding, errors="strict") + return makeName(name, nameID, 1, macScript, macLang) + except UnicodeEncodeError: + pass + if font is not None: + ltag = font.tables.get("ltag") + if ltag is None: + ltag = font["ltag"] = newTable("ltag") + # 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)” + # “The preferred platform-specific code for Unicode would be 3 or 4.” + # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html + return makeName(name, nameID, 0, 4, ltag.addTag(language)) + else: + log.warning("cannot store language %s into 'ltag' table " + "without having access to the TTFont object" % + language) + return None + + class NameRecord(object): def getEncoding(self, default='ascii'): diff --git a/Tests/ttLib/tables/_n_a_m_e_test.py b/Tests/ttLib/tables/_n_a_m_e_test.py index d434f20a7..eb21148c0 100644 --- a/Tests/ttLib/tables/_n_a_m_e_test.py +++ b/Tests/ttLib/tables/_n_a_m_e_test.py @@ -12,6 +12,13 @@ from fontTools.ttLib.tables._n_a_m_e import ( table__n_a_m_e, NameRecord, nameRecordFormat, nameRecordSize, makeName, log) +def names(nameTable): + result = [(n.nameID, n.platformID, n.platEncID, n.langID, n.string) + for n in nameTable.names] + result.sort() + return result + + class NameTableTest(unittest.TestCase): def test_getDebugName(self): @@ -70,32 +77,98 @@ class NameTableTest(unittest.TestCase): table.addName(b"abc") # must be unicode string def test_addMultilingualName(self): + # Microsoft Windows has language codes for “English” (en) + # and for “Standard German as used in Switzerland” (de-CH). + # In this case, we expect that the implementation just + # encodes the name for the Windows platform; Apple platforms + # have been able to decode Windows names since the early days + # of OSX (~2001). However, Windows has no language code for + # “Swiss German as used in Liechtenstein” (gsw-LI), so we + # expect that the implementation populates the 'ltag' table + # to represent that particular, rather exotic BCP47 code. font = FakeFont(glyphs=[".notdef", "A"]) nameTable = font.tables['name'] = newTable("name") - widthID = nameTable.addMultilingualName( - {"en": "Width", "de-CH": "Breite", "gsw": "Bräiti"}, - ttFont=font) - xHeightID = nameTable.addMultilingualName( - {"en": "X-Height", "gsw": "X-Hööchi"}, ttFont=font) - self.assertEqual(widthID, 256) - self.assertEqual(xHeightID, 257) - names = [(n.nameID, n.platformID, n.platEncID, n.langID, n.string) - for n in nameTable.names] - names.sort() - self.assertEqual(names, [ - (256, 0, 4, 0, "Breite"), - (256, 0, 4, 1, "Bräiti"), - (256, 1, 0, 0, "Width"), + with CapturingLogHandler(log, "WARNING") as captor: + widthID = nameTable.addMultilingualName({ + "en": "Width", + "de-CH": "Breite", + "gsw-LI": "Bräiti", + }, ttFont=font) + self.assertEqual(widthID, 256) + xHeightID = nameTable.addMultilingualName({ + "en": "X-Height", + "gsw-LI": "X-Hööchi" + }, ttFont=font) + self.assertEqual(xHeightID, 257) + captor.assertRegex("cannot add Windows name in language gsw-LI") + self.assertEqual(names(nameTable), [ + (256, 0, 4, 0, "Bräiti"), (256, 3, 1, 0x0409, "Width"), - (256, 3, 1, 0x0484, "Bräiti"), (256, 3, 1, 0x0807, "Breite"), - (257, 0, 4, 1, "X-Hööchi"), - (257, 1, 0, 0, "X-Height"), + (257, 0, 4, 0, "X-Hööchi"), (257, 3, 1, 0x0409, "X-Height"), - (257, 3, 1, 0x0484, "X-Hööchi"), ]) self.assertEqual(set(font.tables.keys()), {"ltag", "name"}) - self.assertEqual(font["ltag"].tags, ["de-CH", "gsw"]) + self.assertEqual(font["ltag"].tags, ["gsw-LI"]) + + def test_addMultilingualName_legacyMacEncoding(self): + # Windows has no language code for Latin; MacOS has a code; + # and we actually can convert the name to the legacy MacRoman + # encoding. In this case, we expect that the name gets encoded + # as Macintosh name (platformID 1) with the corresponding Mac + # language code (133); the 'ltag' table should not be used. + font = FakeFont(glyphs=[".notdef", "A"]) + nameTable = font.tables['name'] = newTable("name") + with CapturingLogHandler(log, "WARNING") as captor: + nameTable.addMultilingualName({"la": "SPQR"}, + ttFont=font) + captor.assertRegex("cannot add Windows name in language la") + self.assertEqual(names(nameTable), [(256, 1, 0, 131, "SPQR")]) + self.assertNotIn("ltag", font.tables.keys()) + + def test_addMultilingualName_legacyMacEncodingButUnencodableName(self): + # Windows has no language code for Latin; MacOS has a code; + # but we cannot encode the name into this encoding because + # it contains characters that are not representable. + # In this case, we expect that the name gets encoded as + # Unicode name (platformID 0) with the language tag being + # added to the 'ltag' table. + font = FakeFont(glyphs=[".notdef", "A"]) + nameTable = font.tables['name'] = newTable("name") + with CapturingLogHandler(log, "WARNING") as captor: + nameTable.addMultilingualName({"la": "ⱾƤℚⱤ"}, + ttFont=font) + captor.assertRegex("cannot add Windows name in language la") + self.assertEqual(names(nameTable), [(256, 0, 4, 0, "ⱾƤℚⱤ")]) + self.assertIn("ltag", font.tables) + self.assertEqual(font["ltag"].tags, ["la"]) + + def test_addMultilingualName_legacyMacEncodingButNoCodec(self): + # Windows has no language code for “Azeri written in the + # Arabic script” (az-Arab); MacOS would have a code (50); + # but we cannot encode the name into the legacy encoding + # because we have no codec for MacArabic in fonttools. + # In this case, we expect that the name gets encoded as + # Unicode name (platformID 0) with the language tag being + # added to the 'ltag' table. + font = FakeFont(glyphs=[".notdef", "A"]) + nameTable = font.tables['name'] = newTable("name") + with CapturingLogHandler(log, "WARNING") as captor: + nameTable.addMultilingualName({"az-Arab": "آذربايجان ديلی"}, + ttFont=font) + captor.assertRegex("cannot add Windows name in language az-Arab") + self.assertEqual(names(nameTable), [(256, 0, 4, 0, "آذربايجان ديلی")]) + self.assertIn("ltag", font.tables) + self.assertEqual(font["ltag"].tags, ["az-Arab"]) + + def test_addMultilingualName_noTTFont(self): + # If the ttFont argument is not passed, the implementation + # should add whatever names it can, but it should not crash + # just because it cannot build an ltag table. + nameTable = newTable("name") + with CapturingLogHandler(log, "WARNING") as captor: + nameTable.addMultilingualName({"en": "A", "la": "ⱾƤℚⱤ"}) + captor.assertRegex("cannot store language la into 'ltag' table") def test_decompile_badOffset(self): # https://github.com/behdad/fonttools/issues/525