[OS/2] Add recalcCodePageRanges
Ported from the ufo2ft code (which is port from FontForge code), with some additional functions for parity with recalcUnicodeRanges.
This commit is contained in:
parent
6fa1a76e06
commit
be8ec35934
@ -340,6 +340,45 @@ class table_O_S_2f_2(DefaultTable.DefaultTable):
|
||||
self.setUnicodeRanges(bits)
|
||||
return bits
|
||||
|
||||
def getCodePageRanges(self):
|
||||
"""Return the set of 'ulCodePageRange*' bits currently enabled."""
|
||||
bits = set()
|
||||
ul1, ul2 = self.ulCodePageRange1, self.ulCodePageRange2
|
||||
for i in range(32):
|
||||
if ul1 & (1 << i):
|
||||
bits.add(i)
|
||||
if ul2 & (1 << i):
|
||||
bits.add(i + 32)
|
||||
return bits
|
||||
|
||||
def setCodePageRanges(self, bits):
|
||||
"""Set the 'ulCodePageRange*' fields to the specified 'bits'."""
|
||||
ul1, ul2 = 0, 0
|
||||
for bit in bits:
|
||||
if 0 <= bit < 32:
|
||||
ul1 |= 1 << bit
|
||||
elif 32 <= bit < 64:
|
||||
ul2 |= 1 << (bit - 32)
|
||||
else:
|
||||
raise ValueError(f"expected 0 <= int <= 63, found: {bit:r}")
|
||||
self.ulCodePageRange1, self.ulCodePageRange2 = ul1, ul2
|
||||
|
||||
def recalcCodePageRanges(self, ttFont, pruneOnly=False):
|
||||
unicodes = set()
|
||||
for table in ttFont["cmap"].tables:
|
||||
if table.isUnicode():
|
||||
unicodes.update(table.cmap.keys())
|
||||
bits = calcCodePageRanges(unicodes)
|
||||
if pruneOnly:
|
||||
bits &= self.getCodePageRanges()
|
||||
# when no codepage ranges can be enabled, fall back to enabling bit 0
|
||||
# (Latin 1) so that the font works in MS Word:
|
||||
# https://github.com/googlei18n/fontmake/issues/468
|
||||
if not bits:
|
||||
bits = {0}
|
||||
self.setCodePageRanges(bits)
|
||||
return bits
|
||||
|
||||
def recalcAvgCharWidth(self, ttFont):
|
||||
"""Recalculate xAvgCharWidth using metrics from ttFont's 'hmtx' table.
|
||||
|
||||
@ -611,6 +650,92 @@ def intersectUnicodeRanges(unicodes, inverse=False):
|
||||
return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits
|
||||
|
||||
|
||||
def calcCodePageRanges(unicodes):
|
||||
"""Given a set of Unicode codepoints (integers), calculate the
|
||||
corresponding OS/2 CodePage range bits.
|
||||
This is a direct translation of FontForge implementation:
|
||||
https://github.com/fontforge/fontforge/blob/7b2c074/fontforge/tottf.c#L3158
|
||||
"""
|
||||
bits = set()
|
||||
hasAscii = set(range(0x20, 0x7E)).issubset(unicodes)
|
||||
hasLineart = ord("┤") in unicodes
|
||||
|
||||
for uni in unicodes:
|
||||
if uni == ord("Þ") and hasAscii:
|
||||
bits.add(0) # Latin 1
|
||||
elif uni == ord("Ľ") and hasAscii:
|
||||
bits.add(1) # Latin 2: Eastern Europe
|
||||
if hasLineart:
|
||||
bits.add(58) # Latin 2
|
||||
elif uni == ord("Б"):
|
||||
bits.add(2) # Cyrillic
|
||||
if ord("Ѕ") in unicodes and hasLineart:
|
||||
bits.add(57) # IBM Cyrillic
|
||||
if ord("╜") in unicodes and hasLineart:
|
||||
bits.add(49) # MS-DOS Russian
|
||||
elif uni == ord("Ά"):
|
||||
bits.add(3) # Greek
|
||||
if hasLineart and ord("½") in unicodes:
|
||||
bits.add(48) # IBM Greek
|
||||
if hasLineart and ord("√") in unicodes:
|
||||
bits.add(60) # Greek, former 437 G
|
||||
elif uni == ord("İ") and hasAscii:
|
||||
bits.add(4) # Turkish
|
||||
if hasLineart:
|
||||
bits.add(56) # IBM turkish
|
||||
elif uni == ord("א"):
|
||||
bits.add(5) # Hebrew
|
||||
if hasLineart and ord("√") in unicodes:
|
||||
bits.add(53) # Hebrew
|
||||
elif uni == ord("ر"):
|
||||
bits.add(6) # Arabic
|
||||
if ord("√") in unicodes:
|
||||
bits.add(51) # Arabic
|
||||
if hasLineart:
|
||||
bits.add(61) # Arabic; ASMO 708
|
||||
elif uni == ord("ŗ") and hasAscii:
|
||||
bits.add(7) # Windows Baltic
|
||||
if hasLineart:
|
||||
bits.add(59) # MS-DOS Baltic
|
||||
elif uni == ord("₫") and hasAscii:
|
||||
bits.add(8) # Vietnamese
|
||||
elif uni == ord("ๅ"):
|
||||
bits.add(16) # Thai
|
||||
elif uni == ord("エ"):
|
||||
bits.add(17) # JIS/Japan
|
||||
elif uni == ord("ㄅ"):
|
||||
bits.add(18) # Chinese: Simplified
|
||||
elif uni == ord("ㄱ"):
|
||||
bits.add(19) # Korean wansung
|
||||
elif uni == ord("央"):
|
||||
bits.add(20) # Chinese: Traditional
|
||||
elif uni == ord("곴"):
|
||||
bits.add(21) # Korean Johab
|
||||
elif uni == ord("♥") and hasAscii:
|
||||
bits.add(30) # OEM Character Set
|
||||
# TODO: Symbol bit has a special meaning (check the spec), we need
|
||||
# to confirm if this is wanted by default.
|
||||
# elif chr(0xF000) <= char <= chr(0xF0FF):
|
||||
# codepageRanges.add(31) # Symbol Character Set
|
||||
elif uni == ord("þ") and hasAscii and hasLineart:
|
||||
bits.add(54) # MS-DOS Icelandic
|
||||
elif uni == ord("╚") and hasAscii:
|
||||
bits.add(62) # WE/Latin 1
|
||||
bits.add(63) # US
|
||||
elif hasAscii and hasLineart and ord("√") in unicodes:
|
||||
if uni == ord("Å"):
|
||||
bits.add(50) # MS-DOS Nordic
|
||||
elif uni == ord("é"):
|
||||
bits.add(52) # MS-DOS Canadian French
|
||||
elif uni == ord("õ"):
|
||||
bits.add(55) # MS-DOS Portuguese
|
||||
|
||||
if hasAscii and ord("‰") in unicodes and ord("∑") in unicodes:
|
||||
bits.add(29) # Macintosh Character Set (US Roman)
|
||||
|
||||
return bits
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest, sys
|
||||
|
||||
|
@ -4,6 +4,18 @@ import unittest
|
||||
|
||||
|
||||
class OS2TableTest(unittest.TestCase):
|
||||
@staticmethod
|
||||
def makeOS2_cmap(mapping):
|
||||
font = TTFont()
|
||||
font["OS/2"] = os2 = newTable("OS/2")
|
||||
font["cmap"] = cmap = newTable("cmap")
|
||||
st = getTableModule("cmap").CmapSubtable.newSubtable(4)
|
||||
st.platformID, st.platEncID, st.language = 3, 1, 0
|
||||
st.cmap = mapping
|
||||
cmap.tables = []
|
||||
cmap.tables.append(st)
|
||||
return font, os2, cmap
|
||||
|
||||
def test_getUnicodeRanges(self):
|
||||
table = table_O_S_2f_2()
|
||||
table.ulUnicodeRange1 = 0xFFFFFFFF
|
||||
@ -27,14 +39,9 @@ class OS2TableTest(unittest.TestCase):
|
||||
table.setUnicodeRanges([-1, 127, 255])
|
||||
|
||||
def test_recalcUnicodeRanges(self):
|
||||
font = TTFont()
|
||||
font["OS/2"] = os2 = newTable("OS/2")
|
||||
font["cmap"] = cmap = newTable("cmap")
|
||||
st = getTableModule("cmap").CmapSubtable.newSubtable(4)
|
||||
st.platformID, st.platEncID, st.language = 3, 1, 0
|
||||
st.cmap = {0x0041: "A", 0x03B1: "alpha", 0x0410: "Acyr"}
|
||||
cmap.tables = []
|
||||
cmap.tables.append(st)
|
||||
font, os2, cmap = self.makeOS2_cmap(
|
||||
{0x0041: "A", 0x03B1: "alpha", 0x0410: "Acyr"}
|
||||
)
|
||||
os2.setUnicodeRanges({0, 1, 9})
|
||||
# 'pruneOnly' will clear any bits for which there's no intersection:
|
||||
# bit 1 ('Latin 1 Supplement'), in this case. However, it won't set
|
||||
@ -43,7 +50,7 @@ class OS2TableTest(unittest.TestCase):
|
||||
# try again with pruneOnly=False: bit 7 is now set.
|
||||
self.assertEqual(os2.recalcUnicodeRanges(font), {0, 7, 9})
|
||||
# add a non-BMP char from 'Mahjong Tiles' block (bit 122)
|
||||
st.cmap[0x1F000] = "eastwindtile"
|
||||
cmap.tables[0].cmap[0x1F000] = "eastwindtile"
|
||||
# the bit 122 and the special bit 57 ('Non Plane 0') are also enabled
|
||||
self.assertEqual(os2.recalcUnicodeRanges(font), {0, 7, 9, 57, 122})
|
||||
|
||||
@ -55,6 +62,49 @@ class OS2TableTest(unittest.TestCase):
|
||||
(set(range(123)) - {9, 57, 122}),
|
||||
)
|
||||
|
||||
def test_getCodePageRanges(self):
|
||||
table = table_O_S_2f_2()
|
||||
table.ulCodePageRange1 = 0xFFFFFFFF
|
||||
table.ulCodePageRange2 = 0xFFFFFFFF
|
||||
bits = table.getCodePageRanges()
|
||||
for i in range(63):
|
||||
self.assertIn(i, bits)
|
||||
|
||||
def test_setCodePageRanges(self):
|
||||
table = table_O_S_2f_2()
|
||||
table.ulCodePageRange1 = 0
|
||||
table.ulCodePageRange2 = 0
|
||||
bits = set(range(64))
|
||||
table.setCodePageRanges(bits)
|
||||
self.assertEqual(table.getCodePageRanges(), bits)
|
||||
with self.assertRaises(ValueError):
|
||||
table.setCodePageRanges([-1])
|
||||
with self.assertRaises(ValueError):
|
||||
table.setCodePageRanges([64])
|
||||
with self.assertRaises(ValueError):
|
||||
table.setCodePageRanges([255])
|
||||
|
||||
def test_recalcCodePageRanges(self):
|
||||
font, os2, cmap = self.makeOS2_cmap(
|
||||
{ord("A"): "A", ord("Ά"): "Alphatonos", ord("Б"): "Be"}
|
||||
)
|
||||
os2.setCodePageRanges({0, 2, 9})
|
||||
|
||||
# With pruneOnly=True, should clear any CodePage for which there are no
|
||||
# characters in the cmap.
|
||||
self.assertEqual(os2.recalcCodePageRanges(font, pruneOnly=True), {2})
|
||||
|
||||
# With pruneOnly=False, should also set CodePages not initially set.
|
||||
self.assertEqual(os2.recalcCodePageRanges(font), {2, 3})
|
||||
|
||||
# Add a Korean character, should set CodePage 21 (Korean Johab)
|
||||
cmap.tables[0].cmap[ord("곴")] = "goss"
|
||||
self.assertEqual(os2.recalcCodePageRanges(font), {2, 3, 21})
|
||||
|
||||
# Remove all characters from cmap, should still set CodePage 0 (Latin 1)
|
||||
cmap.tables[0].cmap = {}
|
||||
self.assertEqual(os2.recalcCodePageRanges(font), {0})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
Loading…
x
Reference in New Issue
Block a user