[OS/2] Add recalcCodePageRanges
Ported from the ufo2ft code (which is port from FontForge code), with some additional functions for parity with recalcUnicodeRanges.
This commit is contained in:
parent
6fa1a76e06
commit
be8ec35934
@ -340,6 +340,45 @@ class table_O_S_2f_2(DefaultTable.DefaultTable):
|
|||||||
self.setUnicodeRanges(bits)
|
self.setUnicodeRanges(bits)
|
||||||
return bits
|
return bits
|
||||||
|
|
||||||
|
def getCodePageRanges(self):
|
||||||
|
"""Return the set of 'ulCodePageRange*' bits currently enabled."""
|
||||||
|
bits = set()
|
||||||
|
ul1, ul2 = self.ulCodePageRange1, self.ulCodePageRange2
|
||||||
|
for i in range(32):
|
||||||
|
if ul1 & (1 << i):
|
||||||
|
bits.add(i)
|
||||||
|
if ul2 & (1 << i):
|
||||||
|
bits.add(i + 32)
|
||||||
|
return bits
|
||||||
|
|
||||||
|
def setCodePageRanges(self, bits):
|
||||||
|
"""Set the 'ulCodePageRange*' fields to the specified 'bits'."""
|
||||||
|
ul1, ul2 = 0, 0
|
||||||
|
for bit in bits:
|
||||||
|
if 0 <= bit < 32:
|
||||||
|
ul1 |= 1 << bit
|
||||||
|
elif 32 <= bit < 64:
|
||||||
|
ul2 |= 1 << (bit - 32)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"expected 0 <= int <= 63, found: {bit:r}")
|
||||||
|
self.ulCodePageRange1, self.ulCodePageRange2 = ul1, ul2
|
||||||
|
|
||||||
|
def recalcCodePageRanges(self, ttFont, pruneOnly=False):
|
||||||
|
unicodes = set()
|
||||||
|
for table in ttFont["cmap"].tables:
|
||||||
|
if table.isUnicode():
|
||||||
|
unicodes.update(table.cmap.keys())
|
||||||
|
bits = calcCodePageRanges(unicodes)
|
||||||
|
if pruneOnly:
|
||||||
|
bits &= self.getCodePageRanges()
|
||||||
|
# when no codepage ranges can be enabled, fall back to enabling bit 0
|
||||||
|
# (Latin 1) so that the font works in MS Word:
|
||||||
|
# https://github.com/googlei18n/fontmake/issues/468
|
||||||
|
if not bits:
|
||||||
|
bits = {0}
|
||||||
|
self.setCodePageRanges(bits)
|
||||||
|
return bits
|
||||||
|
|
||||||
def recalcAvgCharWidth(self, ttFont):
|
def recalcAvgCharWidth(self, ttFont):
|
||||||
"""Recalculate xAvgCharWidth using metrics from ttFont's 'hmtx' table.
|
"""Recalculate xAvgCharWidth using metrics from ttFont's 'hmtx' table.
|
||||||
|
|
||||||
@ -611,6 +650,92 @@ def intersectUnicodeRanges(unicodes, inverse=False):
|
|||||||
return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits
|
return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits
|
||||||
|
|
||||||
|
|
||||||
|
def calcCodePageRanges(unicodes):
|
||||||
|
"""Given a set of Unicode codepoints (integers), calculate the
|
||||||
|
corresponding OS/2 CodePage range bits.
|
||||||
|
This is a direct translation of FontForge implementation:
|
||||||
|
https://github.com/fontforge/fontforge/blob/7b2c074/fontforge/tottf.c#L3158
|
||||||
|
"""
|
||||||
|
bits = set()
|
||||||
|
hasAscii = set(range(0x20, 0x7E)).issubset(unicodes)
|
||||||
|
hasLineart = ord("┤") in unicodes
|
||||||
|
|
||||||
|
for uni in unicodes:
|
||||||
|
if uni == ord("Þ") and hasAscii:
|
||||||
|
bits.add(0) # Latin 1
|
||||||
|
elif uni == ord("Ľ") and hasAscii:
|
||||||
|
bits.add(1) # Latin 2: Eastern Europe
|
||||||
|
if hasLineart:
|
||||||
|
bits.add(58) # Latin 2
|
||||||
|
elif uni == ord("Б"):
|
||||||
|
bits.add(2) # Cyrillic
|
||||||
|
if ord("Ѕ") in unicodes and hasLineart:
|
||||||
|
bits.add(57) # IBM Cyrillic
|
||||||
|
if ord("╜") in unicodes and hasLineart:
|
||||||
|
bits.add(49) # MS-DOS Russian
|
||||||
|
elif uni == ord("Ά"):
|
||||||
|
bits.add(3) # Greek
|
||||||
|
if hasLineart and ord("½") in unicodes:
|
||||||
|
bits.add(48) # IBM Greek
|
||||||
|
if hasLineart and ord("√") in unicodes:
|
||||||
|
bits.add(60) # Greek, former 437 G
|
||||||
|
elif uni == ord("İ") and hasAscii:
|
||||||
|
bits.add(4) # Turkish
|
||||||
|
if hasLineart:
|
||||||
|
bits.add(56) # IBM turkish
|
||||||
|
elif uni == ord("א"):
|
||||||
|
bits.add(5) # Hebrew
|
||||||
|
if hasLineart and ord("√") in unicodes:
|
||||||
|
bits.add(53) # Hebrew
|
||||||
|
elif uni == ord("ر"):
|
||||||
|
bits.add(6) # Arabic
|
||||||
|
if ord("√") in unicodes:
|
||||||
|
bits.add(51) # Arabic
|
||||||
|
if hasLineart:
|
||||||
|
bits.add(61) # Arabic; ASMO 708
|
||||||
|
elif uni == ord("ŗ") and hasAscii:
|
||||||
|
bits.add(7) # Windows Baltic
|
||||||
|
if hasLineart:
|
||||||
|
bits.add(59) # MS-DOS Baltic
|
||||||
|
elif uni == ord("₫") and hasAscii:
|
||||||
|
bits.add(8) # Vietnamese
|
||||||
|
elif uni == ord("ๅ"):
|
||||||
|
bits.add(16) # Thai
|
||||||
|
elif uni == ord("エ"):
|
||||||
|
bits.add(17) # JIS/Japan
|
||||||
|
elif uni == ord("ㄅ"):
|
||||||
|
bits.add(18) # Chinese: Simplified
|
||||||
|
elif uni == ord("ㄱ"):
|
||||||
|
bits.add(19) # Korean wansung
|
||||||
|
elif uni == ord("央"):
|
||||||
|
bits.add(20) # Chinese: Traditional
|
||||||
|
elif uni == ord("곴"):
|
||||||
|
bits.add(21) # Korean Johab
|
||||||
|
elif uni == ord("♥") and hasAscii:
|
||||||
|
bits.add(30) # OEM Character Set
|
||||||
|
# TODO: Symbol bit has a special meaning (check the spec), we need
|
||||||
|
# to confirm if this is wanted by default.
|
||||||
|
# elif chr(0xF000) <= char <= chr(0xF0FF):
|
||||||
|
# codepageRanges.add(31) # Symbol Character Set
|
||||||
|
elif uni == ord("þ") and hasAscii and hasLineart:
|
||||||
|
bits.add(54) # MS-DOS Icelandic
|
||||||
|
elif uni == ord("╚") and hasAscii:
|
||||||
|
bits.add(62) # WE/Latin 1
|
||||||
|
bits.add(63) # US
|
||||||
|
elif hasAscii and hasLineart and ord("√") in unicodes:
|
||||||
|
if uni == ord("Å"):
|
||||||
|
bits.add(50) # MS-DOS Nordic
|
||||||
|
elif uni == ord("é"):
|
||||||
|
bits.add(52) # MS-DOS Canadian French
|
||||||
|
elif uni == ord("õ"):
|
||||||
|
bits.add(55) # MS-DOS Portuguese
|
||||||
|
|
||||||
|
if hasAscii and ord("‰") in unicodes and ord("∑") in unicodes:
|
||||||
|
bits.add(29) # Macintosh Character Set (US Roman)
|
||||||
|
|
||||||
|
return bits
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import doctest, sys
|
import doctest, sys
|
||||||
|
|
||||||
|
@ -4,6 +4,18 @@ import unittest
|
|||||||
|
|
||||||
|
|
||||||
class OS2TableTest(unittest.TestCase):
|
class OS2TableTest(unittest.TestCase):
|
||||||
|
@staticmethod
|
||||||
|
def makeOS2_cmap(mapping):
|
||||||
|
font = TTFont()
|
||||||
|
font["OS/2"] = os2 = newTable("OS/2")
|
||||||
|
font["cmap"] = cmap = newTable("cmap")
|
||||||
|
st = getTableModule("cmap").CmapSubtable.newSubtable(4)
|
||||||
|
st.platformID, st.platEncID, st.language = 3, 1, 0
|
||||||
|
st.cmap = mapping
|
||||||
|
cmap.tables = []
|
||||||
|
cmap.tables.append(st)
|
||||||
|
return font, os2, cmap
|
||||||
|
|
||||||
def test_getUnicodeRanges(self):
|
def test_getUnicodeRanges(self):
|
||||||
table = table_O_S_2f_2()
|
table = table_O_S_2f_2()
|
||||||
table.ulUnicodeRange1 = 0xFFFFFFFF
|
table.ulUnicodeRange1 = 0xFFFFFFFF
|
||||||
@ -27,14 +39,9 @@ class OS2TableTest(unittest.TestCase):
|
|||||||
table.setUnicodeRanges([-1, 127, 255])
|
table.setUnicodeRanges([-1, 127, 255])
|
||||||
|
|
||||||
def test_recalcUnicodeRanges(self):
|
def test_recalcUnicodeRanges(self):
|
||||||
font = TTFont()
|
font, os2, cmap = self.makeOS2_cmap(
|
||||||
font["OS/2"] = os2 = newTable("OS/2")
|
{0x0041: "A", 0x03B1: "alpha", 0x0410: "Acyr"}
|
||||||
font["cmap"] = cmap = newTable("cmap")
|
)
|
||||||
st = getTableModule("cmap").CmapSubtable.newSubtable(4)
|
|
||||||
st.platformID, st.platEncID, st.language = 3, 1, 0
|
|
||||||
st.cmap = {0x0041: "A", 0x03B1: "alpha", 0x0410: "Acyr"}
|
|
||||||
cmap.tables = []
|
|
||||||
cmap.tables.append(st)
|
|
||||||
os2.setUnicodeRanges({0, 1, 9})
|
os2.setUnicodeRanges({0, 1, 9})
|
||||||
# 'pruneOnly' will clear any bits for which there's no intersection:
|
# 'pruneOnly' will clear any bits for which there's no intersection:
|
||||||
# bit 1 ('Latin 1 Supplement'), in this case. However, it won't set
|
# bit 1 ('Latin 1 Supplement'), in this case. However, it won't set
|
||||||
@ -43,7 +50,7 @@ class OS2TableTest(unittest.TestCase):
|
|||||||
# try again with pruneOnly=False: bit 7 is now set.
|
# try again with pruneOnly=False: bit 7 is now set.
|
||||||
self.assertEqual(os2.recalcUnicodeRanges(font), {0, 7, 9})
|
self.assertEqual(os2.recalcUnicodeRanges(font), {0, 7, 9})
|
||||||
# add a non-BMP char from 'Mahjong Tiles' block (bit 122)
|
# add a non-BMP char from 'Mahjong Tiles' block (bit 122)
|
||||||
st.cmap[0x1F000] = "eastwindtile"
|
cmap.tables[0].cmap[0x1F000] = "eastwindtile"
|
||||||
# the bit 122 and the special bit 57 ('Non Plane 0') are also enabled
|
# the bit 122 and the special bit 57 ('Non Plane 0') are also enabled
|
||||||
self.assertEqual(os2.recalcUnicodeRanges(font), {0, 7, 9, 57, 122})
|
self.assertEqual(os2.recalcUnicodeRanges(font), {0, 7, 9, 57, 122})
|
||||||
|
|
||||||
@ -55,6 +62,49 @@ class OS2TableTest(unittest.TestCase):
|
|||||||
(set(range(123)) - {9, 57, 122}),
|
(set(range(123)) - {9, 57, 122}),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_getCodePageRanges(self):
|
||||||
|
table = table_O_S_2f_2()
|
||||||
|
table.ulCodePageRange1 = 0xFFFFFFFF
|
||||||
|
table.ulCodePageRange2 = 0xFFFFFFFF
|
||||||
|
bits = table.getCodePageRanges()
|
||||||
|
for i in range(63):
|
||||||
|
self.assertIn(i, bits)
|
||||||
|
|
||||||
|
def test_setCodePageRanges(self):
|
||||||
|
table = table_O_S_2f_2()
|
||||||
|
table.ulCodePageRange1 = 0
|
||||||
|
table.ulCodePageRange2 = 0
|
||||||
|
bits = set(range(64))
|
||||||
|
table.setCodePageRanges(bits)
|
||||||
|
self.assertEqual(table.getCodePageRanges(), bits)
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
table.setCodePageRanges([-1])
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
table.setCodePageRanges([64])
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
table.setCodePageRanges([255])
|
||||||
|
|
||||||
|
def test_recalcCodePageRanges(self):
|
||||||
|
font, os2, cmap = self.makeOS2_cmap(
|
||||||
|
{ord("A"): "A", ord("Ά"): "Alphatonos", ord("Б"): "Be"}
|
||||||
|
)
|
||||||
|
os2.setCodePageRanges({0, 2, 9})
|
||||||
|
|
||||||
|
# With pruneOnly=True, should clear any CodePage for which there are no
|
||||||
|
# characters in the cmap.
|
||||||
|
self.assertEqual(os2.recalcCodePageRanges(font, pruneOnly=True), {2})
|
||||||
|
|
||||||
|
# With pruneOnly=False, should also set CodePages not initially set.
|
||||||
|
self.assertEqual(os2.recalcCodePageRanges(font), {2, 3})
|
||||||
|
|
||||||
|
# Add a Korean character, should set CodePage 21 (Korean Johab)
|
||||||
|
cmap.tables[0].cmap[ord("곴")] = "goss"
|
||||||
|
self.assertEqual(os2.recalcCodePageRanges(font), {2, 3, 21})
|
||||||
|
|
||||||
|
# Remove all characters from cmap, should still set CodePage 0 (Latin 1)
|
||||||
|
cmap.tables[0].cmap = {}
|
||||||
|
self.assertEqual(os2.recalcCodePageRanges(font), {0})
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
|
Loading…
x
Reference in New Issue
Block a user