[OS/2] Add recalcCodePageRanges

Ported from the ufo2ft code (which is port from FontForge code), with
some additional functions for parity with recalcUnicodeRanges.
This commit is contained in:
Khaled Hosny 2023-11-11 01:22:18 +02:00
parent 6fa1a76e06
commit be8ec35934
2 changed files with 184 additions and 9 deletions

View File

@ -340,6 +340,45 @@ class table_O_S_2f_2(DefaultTable.DefaultTable):
self.setUnicodeRanges(bits)
return bits
def getCodePageRanges(self):
"""Return the set of 'ulCodePageRange*' bits currently enabled."""
bits = set()
ul1, ul2 = self.ulCodePageRange1, self.ulCodePageRange2
for i in range(32):
if ul1 & (1 << i):
bits.add(i)
if ul2 & (1 << i):
bits.add(i + 32)
return bits
def setCodePageRanges(self, bits):
"""Set the 'ulCodePageRange*' fields to the specified 'bits'."""
ul1, ul2 = 0, 0
for bit in bits:
if 0 <= bit < 32:
ul1 |= 1 << bit
elif 32 <= bit < 64:
ul2 |= 1 << (bit - 32)
else:
raise ValueError(f"expected 0 <= int <= 63, found: {bit:r}")
self.ulCodePageRange1, self.ulCodePageRange2 = ul1, ul2
def recalcCodePageRanges(self, ttFont, pruneOnly=False):
unicodes = set()
for table in ttFont["cmap"].tables:
if table.isUnicode():
unicodes.update(table.cmap.keys())
bits = calcCodePageRanges(unicodes)
if pruneOnly:
bits &= self.getCodePageRanges()
# when no codepage ranges can be enabled, fall back to enabling bit 0
# (Latin 1) so that the font works in MS Word:
# https://github.com/googlei18n/fontmake/issues/468
if not bits:
bits = {0}
self.setCodePageRanges(bits)
return bits
def recalcAvgCharWidth(self, ttFont):
"""Recalculate xAvgCharWidth using metrics from ttFont's 'hmtx' table.
@ -611,6 +650,92 @@ def intersectUnicodeRanges(unicodes, inverse=False):
return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits
def calcCodePageRanges(unicodes):
"""Given a set of Unicode codepoints (integers), calculate the
corresponding OS/2 CodePage range bits.
This is a direct translation of FontForge implementation:
https://github.com/fontforge/fontforge/blob/7b2c074/fontforge/tottf.c#L3158
"""
bits = set()
hasAscii = set(range(0x20, 0x7E)).issubset(unicodes)
hasLineart = ord("") in unicodes
for uni in unicodes:
if uni == ord("Þ") and hasAscii:
bits.add(0) # Latin 1
elif uni == ord("Ľ") and hasAscii:
bits.add(1) # Latin 2: Eastern Europe
if hasLineart:
bits.add(58) # Latin 2
elif uni == ord("Б"):
bits.add(2) # Cyrillic
if ord("Ѕ") in unicodes and hasLineart:
bits.add(57) # IBM Cyrillic
if ord("") in unicodes and hasLineart:
bits.add(49) # MS-DOS Russian
elif uni == ord("Ά"):
bits.add(3) # Greek
if hasLineart and ord("½") in unicodes:
bits.add(48) # IBM Greek
if hasLineart and ord("") in unicodes:
bits.add(60) # Greek, former 437 G
elif uni == ord("İ") and hasAscii:
bits.add(4) # Turkish
if hasLineart:
bits.add(56) # IBM turkish
elif uni == ord("א"):
bits.add(5) # Hebrew
if hasLineart and ord("") in unicodes:
bits.add(53) # Hebrew
elif uni == ord("ر"):
bits.add(6) # Arabic
if ord("") in unicodes:
bits.add(51) # Arabic
if hasLineart:
bits.add(61) # Arabic; ASMO 708
elif uni == ord("ŗ") and hasAscii:
bits.add(7) # Windows Baltic
if hasLineart:
bits.add(59) # MS-DOS Baltic
elif uni == ord("") and hasAscii:
bits.add(8) # Vietnamese
elif uni == ord(""):
bits.add(16) # Thai
elif uni == ord(""):
bits.add(17) # JIS/Japan
elif uni == ord(""):
bits.add(18) # Chinese: Simplified
elif uni == ord(""):
bits.add(19) # Korean wansung
elif uni == ord(""):
bits.add(20) # Chinese: Traditional
elif uni == ord(""):
bits.add(21) # Korean Johab
elif uni == ord("") and hasAscii:
bits.add(30) # OEM Character Set
# TODO: Symbol bit has a special meaning (check the spec), we need
# to confirm if this is wanted by default.
# elif chr(0xF000) <= char <= chr(0xF0FF):
# codepageRanges.add(31) # Symbol Character Set
elif uni == ord("þ") and hasAscii and hasLineart:
bits.add(54) # MS-DOS Icelandic
elif uni == ord("") and hasAscii:
bits.add(62) # WE/Latin 1
bits.add(63) # US
elif hasAscii and hasLineart and ord("") in unicodes:
if uni == ord("Å"):
bits.add(50) # MS-DOS Nordic
elif uni == ord("é"):
bits.add(52) # MS-DOS Canadian French
elif uni == ord("õ"):
bits.add(55) # MS-DOS Portuguese
if hasAscii and ord("") in unicodes and ord("") in unicodes:
bits.add(29) # Macintosh Character Set (US Roman)
return bits
if __name__ == "__main__":
import doctest, sys

View File

@ -4,6 +4,18 @@ import unittest
class OS2TableTest(unittest.TestCase):
@staticmethod
def makeOS2_cmap(mapping):
font = TTFont()
font["OS/2"] = os2 = newTable("OS/2")
font["cmap"] = cmap = newTable("cmap")
st = getTableModule("cmap").CmapSubtable.newSubtable(4)
st.platformID, st.platEncID, st.language = 3, 1, 0
st.cmap = mapping
cmap.tables = []
cmap.tables.append(st)
return font, os2, cmap
def test_getUnicodeRanges(self):
table = table_O_S_2f_2()
table.ulUnicodeRange1 = 0xFFFFFFFF
@ -27,14 +39,9 @@ class OS2TableTest(unittest.TestCase):
table.setUnicodeRanges([-1, 127, 255])
def test_recalcUnicodeRanges(self):
font = TTFont()
font["OS/2"] = os2 = newTable("OS/2")
font["cmap"] = cmap = newTable("cmap")
st = getTableModule("cmap").CmapSubtable.newSubtable(4)
st.platformID, st.platEncID, st.language = 3, 1, 0
st.cmap = {0x0041: "A", 0x03B1: "alpha", 0x0410: "Acyr"}
cmap.tables = []
cmap.tables.append(st)
font, os2, cmap = self.makeOS2_cmap(
{0x0041: "A", 0x03B1: "alpha", 0x0410: "Acyr"}
)
os2.setUnicodeRanges({0, 1, 9})
# 'pruneOnly' will clear any bits for which there's no intersection:
# bit 1 ('Latin 1 Supplement'), in this case. However, it won't set
@ -43,7 +50,7 @@ class OS2TableTest(unittest.TestCase):
# try again with pruneOnly=False: bit 7 is now set.
self.assertEqual(os2.recalcUnicodeRanges(font), {0, 7, 9})
# add a non-BMP char from 'Mahjong Tiles' block (bit 122)
st.cmap[0x1F000] = "eastwindtile"
cmap.tables[0].cmap[0x1F000] = "eastwindtile"
# the bit 122 and the special bit 57 ('Non Plane 0') are also enabled
self.assertEqual(os2.recalcUnicodeRanges(font), {0, 7, 9, 57, 122})
@ -55,6 +62,49 @@ class OS2TableTest(unittest.TestCase):
(set(range(123)) - {9, 57, 122}),
)
def test_getCodePageRanges(self):
table = table_O_S_2f_2()
table.ulCodePageRange1 = 0xFFFFFFFF
table.ulCodePageRange2 = 0xFFFFFFFF
bits = table.getCodePageRanges()
for i in range(63):
self.assertIn(i, bits)
def test_setCodePageRanges(self):
table = table_O_S_2f_2()
table.ulCodePageRange1 = 0
table.ulCodePageRange2 = 0
bits = set(range(64))
table.setCodePageRanges(bits)
self.assertEqual(table.getCodePageRanges(), bits)
with self.assertRaises(ValueError):
table.setCodePageRanges([-1])
with self.assertRaises(ValueError):
table.setCodePageRanges([64])
with self.assertRaises(ValueError):
table.setCodePageRanges([255])
def test_recalcCodePageRanges(self):
font, os2, cmap = self.makeOS2_cmap(
{ord("A"): "A", ord("Ά"): "Alphatonos", ord("Б"): "Be"}
)
os2.setCodePageRanges({0, 2, 9})
# With pruneOnly=True, should clear any CodePage for which there are no
# characters in the cmap.
self.assertEqual(os2.recalcCodePageRanges(font, pruneOnly=True), {2})
# With pruneOnly=False, should also set CodePages not initially set.
self.assertEqual(os2.recalcCodePageRanges(font), {2, 3})
# Add a Korean character, should set CodePage 21 (Korean Johab)
cmap.tables[0].cmap[ord("")] = "goss"
self.assertEqual(os2.recalcCodePageRanges(font), {2, 3, 21})
# Remove all characters from cmap, should still set CodePage 0 (Latin 1)
cmap.tables[0].cmap = {}
self.assertEqual(os2.recalcCodePageRanges(font), {0})
if __name__ == "__main__":
import sys