From be8ec35934e98089cdd2f0542a6b3d346fc21b9d Mon Sep 17 00:00:00 2001 From: Khaled Hosny Date: Sat, 11 Nov 2023 01:22:18 +0200 Subject: [PATCH] [OS/2] Add recalcCodePageRanges Ported from the ufo2ft code (which is port from FontForge code), with some additional functions for parity with recalcUnicodeRanges. --- Lib/fontTools/ttLib/tables/O_S_2f_2.py | 125 +++++++++++++++++++++++++ Tests/ttLib/tables/O_S_2f_2_test.py | 68 ++++++++++++-- 2 files changed, 184 insertions(+), 9 deletions(-) diff --git a/Lib/fontTools/ttLib/tables/O_S_2f_2.py b/Lib/fontTools/ttLib/tables/O_S_2f_2.py index 7b403026a..b4126b835 100644 --- a/Lib/fontTools/ttLib/tables/O_S_2f_2.py +++ b/Lib/fontTools/ttLib/tables/O_S_2f_2.py @@ -340,6 +340,45 @@ class table_O_S_2f_2(DefaultTable.DefaultTable): self.setUnicodeRanges(bits) return bits + def getCodePageRanges(self): + """Return the set of 'ulCodePageRange*' bits currently enabled.""" + bits = set() + ul1, ul2 = self.ulCodePageRange1, self.ulCodePageRange2 + for i in range(32): + if ul1 & (1 << i): + bits.add(i) + if ul2 & (1 << i): + bits.add(i + 32) + return bits + + def setCodePageRanges(self, bits): + """Set the 'ulCodePageRange*' fields to the specified 'bits'.""" + ul1, ul2 = 0, 0 + for bit in bits: + if 0 <= bit < 32: + ul1 |= 1 << bit + elif 32 <= bit < 64: + ul2 |= 1 << (bit - 32) + else: + raise ValueError(f"expected 0 <= int <= 63, found: {bit:r}") + self.ulCodePageRange1, self.ulCodePageRange2 = ul1, ul2 + + def recalcCodePageRanges(self, ttFont, pruneOnly=False): + unicodes = set() + for table in ttFont["cmap"].tables: + if table.isUnicode(): + unicodes.update(table.cmap.keys()) + bits = calcCodePageRanges(unicodes) + if pruneOnly: + bits &= self.getCodePageRanges() + # when no codepage ranges can be enabled, fall back to enabling bit 0 + # (Latin 1) so that the font works in MS Word: + # https://github.com/googlei18n/fontmake/issues/468 + if not bits: + bits = {0} + self.setCodePageRanges(bits) + return bits + def recalcAvgCharWidth(self, ttFont): """Recalculate xAvgCharWidth using metrics from ttFont's 'hmtx' table. @@ -611,6 +650,92 @@ def intersectUnicodeRanges(unicodes, inverse=False): return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits +def calcCodePageRanges(unicodes): + """Given a set of Unicode codepoints (integers), calculate the + corresponding OS/2 CodePage range bits. + This is a direct translation of FontForge implementation: + https://github.com/fontforge/fontforge/blob/7b2c074/fontforge/tottf.c#L3158 + """ + bits = set() + hasAscii = set(range(0x20, 0x7E)).issubset(unicodes) + hasLineart = ord("┤") in unicodes + + for uni in unicodes: + if uni == ord("Þ") and hasAscii: + bits.add(0) # Latin 1 + elif uni == ord("Ľ") and hasAscii: + bits.add(1) # Latin 2: Eastern Europe + if hasLineart: + bits.add(58) # Latin 2 + elif uni == ord("Б"): + bits.add(2) # Cyrillic + if ord("Ѕ") in unicodes and hasLineart: + bits.add(57) # IBM Cyrillic + if ord("╜") in unicodes and hasLineart: + bits.add(49) # MS-DOS Russian + elif uni == ord("Ά"): + bits.add(3) # Greek + if hasLineart and ord("½") in unicodes: + bits.add(48) # IBM Greek + if hasLineart and ord("√") in unicodes: + bits.add(60) # Greek, former 437 G + elif uni == ord("İ") and hasAscii: + bits.add(4) # Turkish + if hasLineart: + bits.add(56) # IBM turkish + elif uni == ord("א"): + bits.add(5) # Hebrew + if hasLineart and ord("√") in unicodes: + bits.add(53) # Hebrew + elif uni == ord("ر"): + bits.add(6) # Arabic + if ord("√") in unicodes: + bits.add(51) # Arabic + if hasLineart: + bits.add(61) # Arabic; ASMO 708 + elif uni == ord("ŗ") and hasAscii: + bits.add(7) # Windows Baltic + if hasLineart: + bits.add(59) # MS-DOS Baltic + elif uni == ord("₫") and hasAscii: + bits.add(8) # Vietnamese + elif uni == ord("ๅ"): + bits.add(16) # Thai + elif uni == ord("エ"): + bits.add(17) # JIS/Japan + elif uni == ord("ㄅ"): + bits.add(18) # Chinese: Simplified + elif uni == ord("ㄱ"): + bits.add(19) # Korean wansung + elif uni == ord("央"): + bits.add(20) # Chinese: Traditional + elif uni == ord("곴"): + bits.add(21) # Korean Johab + elif uni == ord("♥") and hasAscii: + bits.add(30) # OEM Character Set + # TODO: Symbol bit has a special meaning (check the spec), we need + # to confirm if this is wanted by default. + # elif chr(0xF000) <= char <= chr(0xF0FF): + # codepageRanges.add(31) # Symbol Character Set + elif uni == ord("þ") and hasAscii and hasLineart: + bits.add(54) # MS-DOS Icelandic + elif uni == ord("╚") and hasAscii: + bits.add(62) # WE/Latin 1 + bits.add(63) # US + elif hasAscii and hasLineart and ord("√") in unicodes: + if uni == ord("Å"): + bits.add(50) # MS-DOS Nordic + elif uni == ord("é"): + bits.add(52) # MS-DOS Canadian French + elif uni == ord("õ"): + bits.add(55) # MS-DOS Portuguese + + if hasAscii and ord("‰") in unicodes and ord("∑") in unicodes: + bits.add(29) # Macintosh Character Set (US Roman) + + return bits + + if __name__ == "__main__": import doctest, sys diff --git a/Tests/ttLib/tables/O_S_2f_2_test.py b/Tests/ttLib/tables/O_S_2f_2_test.py index 9567b9ec3..a05c70240 100644 --- a/Tests/ttLib/tables/O_S_2f_2_test.py +++ b/Tests/ttLib/tables/O_S_2f_2_test.py @@ -4,6 +4,18 @@ import unittest class OS2TableTest(unittest.TestCase): + @staticmethod + def makeOS2_cmap(mapping): + font = TTFont() + font["OS/2"] = os2 = newTable("OS/2") + font["cmap"] = cmap = newTable("cmap") + st = getTableModule("cmap").CmapSubtable.newSubtable(4) + st.platformID, st.platEncID, st.language = 3, 1, 0 + st.cmap = mapping + cmap.tables = [] + cmap.tables.append(st) + return font, os2, cmap + def test_getUnicodeRanges(self): table = table_O_S_2f_2() table.ulUnicodeRange1 = 0xFFFFFFFF @@ -27,14 +39,9 @@ class OS2TableTest(unittest.TestCase): table.setUnicodeRanges([-1, 127, 255]) def test_recalcUnicodeRanges(self): - font = TTFont() - font["OS/2"] = os2 = newTable("OS/2") - font["cmap"] = cmap = newTable("cmap") - st = getTableModule("cmap").CmapSubtable.newSubtable(4) - st.platformID, st.platEncID, st.language = 3, 1, 0 - st.cmap = {0x0041: "A", 0x03B1: "alpha", 0x0410: "Acyr"} - cmap.tables = [] - cmap.tables.append(st) + font, os2, cmap = self.makeOS2_cmap( + {0x0041: "A", 0x03B1: "alpha", 0x0410: "Acyr"} + ) os2.setUnicodeRanges({0, 1, 9}) # 'pruneOnly' will clear any bits for which there's no intersection: # bit 1 ('Latin 1 Supplement'), in this case. However, it won't set @@ -43,7 +50,7 @@ class OS2TableTest(unittest.TestCase): # try again with pruneOnly=False: bit 7 is now set. self.assertEqual(os2.recalcUnicodeRanges(font), {0, 7, 9}) # add a non-BMP char from 'Mahjong Tiles' block (bit 122) - st.cmap[0x1F000] = "eastwindtile" + cmap.tables[0].cmap[0x1F000] = "eastwindtile" # the bit 122 and the special bit 57 ('Non Plane 0') are also enabled self.assertEqual(os2.recalcUnicodeRanges(font), {0, 7, 9, 57, 122}) @@ -55,6 +62,49 @@ class OS2TableTest(unittest.TestCase): (set(range(123)) - {9, 57, 122}), ) + def test_getCodePageRanges(self): + table = table_O_S_2f_2() + table.ulCodePageRange1 = 0xFFFFFFFF + table.ulCodePageRange2 = 0xFFFFFFFF + bits = table.getCodePageRanges() + for i in range(63): + self.assertIn(i, bits) + + def test_setCodePageRanges(self): + table = table_O_S_2f_2() + table.ulCodePageRange1 = 0 + table.ulCodePageRange2 = 0 + bits = set(range(64)) + table.setCodePageRanges(bits) + self.assertEqual(table.getCodePageRanges(), bits) + with self.assertRaises(ValueError): + table.setCodePageRanges([-1]) + with self.assertRaises(ValueError): + table.setCodePageRanges([64]) + with self.assertRaises(ValueError): + table.setCodePageRanges([255]) + + def test_recalcCodePageRanges(self): + font, os2, cmap = self.makeOS2_cmap( + {ord("A"): "A", ord("Ά"): "Alphatonos", ord("Б"): "Be"} + ) + os2.setCodePageRanges({0, 2, 9}) + + # With pruneOnly=True, should clear any CodePage for which there are no + # characters in the cmap. + self.assertEqual(os2.recalcCodePageRanges(font, pruneOnly=True), {2}) + + # With pruneOnly=False, should also set CodePages not initially set. + self.assertEqual(os2.recalcCodePageRanges(font), {2, 3}) + + # Add a Korean character, should set CodePage 21 (Korean Johab) + cmap.tables[0].cmap[ord("곴")] = "goss" + self.assertEqual(os2.recalcCodePageRanges(font), {2, 3, 21}) + + # Remove all characters from cmap, should still set CodePage 0 (Latin 1) + cmap.tables[0].cmap = {} + self.assertEqual(os2.recalcCodePageRanges(font), {0}) + if __name__ == "__main__": import sys