Merge pull request #2326 from daltonmaag/compact-gpos
Add GPOS compaction code in varLib.merger and otlLib.builder + new `fonttools otlLib.optimize` command
This commit is contained in:
commit
4602a9e7ed
@ -1,14 +1,25 @@
|
||||
__all__ = ['popCount']
|
||||
__all__ = ["popCount"]
|
||||
|
||||
|
||||
try:
|
||||
bit_count = int.bit_count
|
||||
except AttributeError:
|
||||
|
||||
def bit_count(v):
|
||||
return bin(v).count('1')
|
||||
return bin(v).count("1")
|
||||
|
||||
|
||||
"""Return number of 1 bits (population count) of the absolute value of an integer.
|
||||
|
||||
See https://docs.python.org/3.10/library/stdtypes.html#int.bit_count
|
||||
"""
|
||||
popCount = bit_count
|
||||
|
||||
|
||||
def bit_indices(v):
|
||||
"""Return list of indices where bits are set, 0 being the index of the least significant bit.
|
||||
|
||||
>>> bit_indices(0b101)
|
||||
[0, 2]
|
||||
"""
|
||||
return [i for i, b in enumerate(bin(v)[::-1]) if b == "1"]
|
||||
|
@ -1,4 +1,5 @@
|
||||
from collections import namedtuple, OrderedDict
|
||||
import os
|
||||
from fontTools.misc.fixedTools import fixedToFloat
|
||||
from fontTools import ttLib
|
||||
from fontTools.ttLib.tables import otTables as ot
|
||||
@ -10,6 +11,7 @@ from fontTools.ttLib.tables.otBase import (
|
||||
)
|
||||
from fontTools.ttLib.tables import otBase
|
||||
from fontTools.feaLib.ast import STATNameStatement
|
||||
from fontTools.otlLib.optimize.gpos import GPOS_COMPACT_MODE_DEFAULT, GPOS_COMPACT_MODE_ENV_KEY, compact_lookup
|
||||
from fontTools.otlLib.error import OpenTypeLibError
|
||||
from functools import reduce
|
||||
import logging
|
||||
@ -1373,7 +1375,17 @@ class PairPosBuilder(LookupBuilder):
|
||||
subtables.extend(buildPairPosGlyphs(self.glyphPairs, self.glyphMap))
|
||||
for key in sorted(builders.keys()):
|
||||
subtables.extend(builders[key].subtables())
|
||||
return self.buildLookup_(subtables)
|
||||
lookup = self.buildLookup_(subtables)
|
||||
|
||||
# Compact the lookup
|
||||
# This is a good moment to do it because the compaction should create
|
||||
# smaller subtables, which may prevent overflows from happening.
|
||||
mode = os.environ.get(GPOS_COMPACT_MODE_ENV_KEY, GPOS_COMPACT_MODE_DEFAULT)
|
||||
if mode and mode != "0":
|
||||
log.info("Compacting GPOS...")
|
||||
compact_lookup(self.font, mode, lookup)
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
class SinglePosBuilder(LookupBuilder):
|
||||
|
68
Lib/fontTools/otlLib/optimize/__init__.py
Normal file
68
Lib/fontTools/otlLib/optimize/__init__.py
Normal file
@ -0,0 +1,68 @@
|
||||
from argparse import RawTextHelpFormatter
|
||||
from textwrap import dedent
|
||||
|
||||
from fontTools.ttLib import TTFont
|
||||
from fontTools.otlLib.optimize.gpos import compact, GPOS_COMPACT_MODE_DEFAULT
|
||||
|
||||
def main(args=None):
|
||||
"""Optimize the layout tables of an existing font."""
|
||||
from argparse import ArgumentParser
|
||||
from fontTools import configLogger
|
||||
|
||||
parser = ArgumentParser(prog="otlLib.optimize", description=main.__doc__, formatter_class=RawTextHelpFormatter)
|
||||
parser.add_argument("font")
|
||||
parser.add_argument(
|
||||
"-o", metavar="OUTPUTFILE", dest="outfile", default=None, help="output file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gpos-compact-mode",
|
||||
help=dedent(
|
||||
f"""\
|
||||
GPOS Lookup type 2 (PairPos) compaction mode:
|
||||
0 = do not attempt to compact PairPos lookups;
|
||||
1 to 8 = create at most 1 to 8 new subtables for each existing
|
||||
subtable, provided that it would yield a 50%% file size saving;
|
||||
9 = create as many new subtables as needed to yield a file size saving.
|
||||
Default: {GPOS_COMPACT_MODE_DEFAULT}.
|
||||
|
||||
This compaction aims to save file size, by splitting large class
|
||||
kerning subtables (Format 2) that contain many zero values into
|
||||
smaller and denser subtables. It's a trade-off between the overhead
|
||||
of several subtables versus the sparseness of one big subtable.
|
||||
|
||||
See the pull request: https://github.com/fonttools/fonttools/pull/2326
|
||||
"""
|
||||
),
|
||||
default=int(GPOS_COMPACT_MODE_DEFAULT),
|
||||
choices=list(range(10)),
|
||||
type=int,
|
||||
)
|
||||
logging_group = parser.add_mutually_exclusive_group(required=False)
|
||||
logging_group.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="Run more verbosely."
|
||||
)
|
||||
logging_group.add_argument(
|
||||
"-q", "--quiet", action="store_true", help="Turn verbosity off."
|
||||
)
|
||||
options = parser.parse_args(args)
|
||||
|
||||
configLogger(
|
||||
level=("DEBUG" if options.verbose else "ERROR" if options.quiet else "INFO")
|
||||
)
|
||||
|
||||
font = TTFont(options.font)
|
||||
# TODO: switch everything to have type(mode) = int when using the Config class
|
||||
compact(font, str(options.gpos_compact_mode))
|
||||
font.save(options.outfile or options.font)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
sys.exit(main())
|
||||
import doctest
|
||||
|
||||
sys.exit(doctest.testmod().failed)
|
||||
|
6
Lib/fontTools/otlLib/optimize/__main__.py
Normal file
6
Lib/fontTools/otlLib/optimize/__main__.py
Normal file
@ -0,0 +1,6 @@
|
||||
import sys
|
||||
from fontTools.otlLib.optimize import main
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
439
Lib/fontTools/otlLib/optimize/gpos.py
Normal file
439
Lib/fontTools/otlLib/optimize/gpos.py
Normal file
@ -0,0 +1,439 @@
|
||||
import logging
|
||||
from collections import defaultdict, namedtuple
|
||||
from functools import reduce
|
||||
from itertools import chain
|
||||
from math import log2
|
||||
from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple
|
||||
|
||||
from fontTools.misc.intTools import bit_count, bit_indices
|
||||
from fontTools.ttLib import TTFont
|
||||
from fontTools.ttLib.tables import otBase, otTables
|
||||
|
||||
# NOTE: activating this optimization via the environment variable is
|
||||
# experimental and may not be supported once an alternative mechanism
|
||||
# is in place. See: https://github.com/fonttools/fonttools/issues/2349
|
||||
GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE"
|
||||
GPOS_COMPACT_MODE_DEFAULT = "0"
|
||||
|
||||
log = logging.getLogger("fontTools.otlLib.optimize.gpos")
|
||||
|
||||
|
||||
def compact(font: TTFont, mode: str) -> TTFont:
|
||||
# Ideal plan:
|
||||
# 1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable
|
||||
# https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable
|
||||
# 2. Extract glyph-glyph kerning and class-kerning from all present subtables
|
||||
# 3. Regroup into different subtable arrangements
|
||||
# 4. Put back into the lookup
|
||||
#
|
||||
# Actual implementation:
|
||||
# 2. Only class kerning is optimized currently
|
||||
# 3. If the input kerning is already in several subtables, the subtables
|
||||
# are not grouped together first; instead each subtable is treated
|
||||
# independently, so currently this step is:
|
||||
# Split existing subtables into more smaller subtables
|
||||
gpos = font["GPOS"]
|
||||
for lookup in gpos.table.LookupList.Lookup:
|
||||
if lookup.LookupType == 2:
|
||||
compact_lookup(font, mode, lookup)
|
||||
elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2:
|
||||
compact_ext_lookup(font, mode, lookup)
|
||||
return font
|
||||
|
||||
|
||||
def compact_lookup(font: TTFont, mode: str, lookup: otTables.Lookup) -> None:
|
||||
new_subtables = compact_pair_pos(font, mode, lookup.SubTable)
|
||||
lookup.SubTable = new_subtables
|
||||
lookup.SubTableCount = len(new_subtables)
|
||||
|
||||
|
||||
def compact_ext_lookup(font: TTFont, mode: str, lookup: otTables.Lookup) -> None:
|
||||
new_subtables = compact_pair_pos(
|
||||
font, mode, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable]
|
||||
)
|
||||
new_ext_subtables = []
|
||||
for subtable in new_subtables:
|
||||
ext_subtable = otTables.ExtensionPos()
|
||||
ext_subtable.Format = 1
|
||||
ext_subtable.ExtSubTable = subtable
|
||||
new_ext_subtables.append(ext_subtable)
|
||||
lookup.SubTable = new_ext_subtables
|
||||
lookup.SubTableCount = len(new_ext_subtables)
|
||||
|
||||
|
||||
def compact_pair_pos(
|
||||
font: TTFont, mode: str, subtables: Sequence[otTables.PairPos]
|
||||
) -> Sequence[otTables.PairPos]:
|
||||
new_subtables = []
|
||||
for subtable in subtables:
|
||||
if subtable.Format == 1:
|
||||
# Not doing anything to Format 1 (yet?)
|
||||
new_subtables.append(subtable)
|
||||
elif subtable.Format == 2:
|
||||
new_subtables.extend(compact_class_pairs(font, mode, subtable))
|
||||
return new_subtables
|
||||
|
||||
|
||||
def compact_class_pairs(
|
||||
font: TTFont, mode: str, subtable: otTables.PairPos
|
||||
) -> List[otTables.PairPos]:
|
||||
from fontTools.otlLib.builder import buildPairPosClassesSubtable
|
||||
|
||||
subtables = []
|
||||
classes1: DefaultDict[int, List[str]] = defaultdict(list)
|
||||
for g in subtable.Coverage.glyphs:
|
||||
classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g)
|
||||
classes2: DefaultDict[int, List[str]] = defaultdict(list)
|
||||
for g, i in subtable.ClassDef2.classDefs.items():
|
||||
classes2[i].append(g)
|
||||
all_pairs = {}
|
||||
for i, class1 in enumerate(subtable.Class1Record):
|
||||
for j, class2 in enumerate(class1.Class2Record):
|
||||
if is_really_zero(class2):
|
||||
continue
|
||||
all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = (
|
||||
getattr(class2, "Value1", None),
|
||||
getattr(class2, "Value2", None),
|
||||
)
|
||||
|
||||
if len(mode) == 1 and mode in "123456789":
|
||||
grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost(
|
||||
font, all_pairs, int(mode)
|
||||
)
|
||||
for pairs in grouped_pairs:
|
||||
subtables.append(
|
||||
buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap())
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={mode}")
|
||||
return subtables
|
||||
|
||||
|
||||
def is_really_zero(class2: otTables.Class2Record) -> bool:
|
||||
v1 = getattr(class2, "Value1", None)
|
||||
v2 = getattr(class2, "Value2", None)
|
||||
return (v1 is None or v1.getEffectiveFormat() == 0) and (
|
||||
v2 is None or v2.getEffectiveFormat() == 0
|
||||
)
|
||||
|
||||
|
||||
Pairs = Dict[
|
||||
Tuple[Tuple[str, ...], Tuple[str, ...]],
|
||||
Tuple[otBase.ValueRecord, otBase.ValueRecord],
|
||||
]
|
||||
|
||||
# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958
|
||||
def _getClassRanges(glyphIDs: Iterable[int]):
|
||||
glyphIDs = sorted(glyphIDs)
|
||||
last = glyphIDs[0]
|
||||
ranges = [[last]]
|
||||
for glyphID in glyphIDs[1:]:
|
||||
if glyphID != last + 1:
|
||||
ranges[-1].append(last)
|
||||
ranges.append([glyphID])
|
||||
last = glyphID
|
||||
ranges[-1].append(last)
|
||||
return ranges, glyphIDs[0], glyphIDs[-1]
|
||||
|
||||
|
||||
# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989
|
||||
def _classDef_bytes(
|
||||
class_data: List[Tuple[List[Tuple[int, int]], int, int]],
|
||||
class_ids: List[int],
|
||||
coverage=False,
|
||||
):
|
||||
if not class_ids:
|
||||
return 0
|
||||
first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]]
|
||||
range_count = len(first_ranges)
|
||||
for i in class_ids[1:]:
|
||||
data = class_data[i]
|
||||
range_count += len(data[0])
|
||||
min_glyph_id = min(min_glyph_id, data[1])
|
||||
max_glyph_id = max(max_glyph_id, data[2])
|
||||
glyphCount = max_glyph_id - min_glyph_id + 1
|
||||
# https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1
|
||||
format1_bytes = 6 + glyphCount * 2
|
||||
# https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2
|
||||
format2_bytes = 4 + range_count * 6
|
||||
return min(format1_bytes, format2_bytes)
|
||||
|
||||
|
||||
ClusteringContext = namedtuple(
|
||||
"ClusteringContext",
|
||||
[
|
||||
"lines",
|
||||
"all_class1",
|
||||
"all_class1_data",
|
||||
"all_class2_data",
|
||||
"valueFormat1_bytes",
|
||||
"valueFormat2_bytes",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class Cluster:
|
||||
# TODO(Python 3.7): Turn this into a dataclass
|
||||
# ctx: ClusteringContext
|
||||
# indices: int
|
||||
# Caches
|
||||
# TODO(Python 3.8): use functools.cached_property instead of the
|
||||
# manually cached properties, and remove the cache fields listed below.
|
||||
# _indices: Optional[List[int]] = None
|
||||
# _column_indices: Optional[List[int]] = None
|
||||
# _cost: Optional[int] = None
|
||||
|
||||
__slots__ = "ctx", "indices_bitmask", "_indices", "_column_indices", "_cost"
|
||||
|
||||
def __init__(self, ctx: ClusteringContext, indices_bitmask: int):
|
||||
self.ctx = ctx
|
||||
self.indices_bitmask = indices_bitmask
|
||||
self._indices = None
|
||||
self._column_indices = None
|
||||
self._cost = None
|
||||
|
||||
@property
|
||||
def indices(self):
|
||||
if self._indices is None:
|
||||
self._indices = bit_indices(self.indices_bitmask)
|
||||
return self._indices
|
||||
|
||||
@property
|
||||
def column_indices(self):
|
||||
if self._column_indices is None:
|
||||
# Indices of columns that have a 1 in at least 1 line
|
||||
# => binary OR all the lines
|
||||
bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices))
|
||||
self._column_indices = bit_indices(bitmask)
|
||||
return self._column_indices
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
# Add 1 because Class2=0 cannot be used but needs to be encoded.
|
||||
return len(self.column_indices) + 1
|
||||
|
||||
@property
|
||||
def cost(self):
|
||||
if self._cost is None:
|
||||
self._cost = (
|
||||
# 2 bytes to store the offset to this subtable in the Lookup table above
|
||||
2
|
||||
# Contents of the subtable
|
||||
# From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
|
||||
# uint16 posFormat Format identifier: format = 2
|
||||
+ 2
|
||||
# Offset16 coverageOffset Offset to Coverage table, from beginning of PairPos subtable.
|
||||
+ 2
|
||||
+ self.coverage_bytes
|
||||
# uint16 valueFormat1 ValueRecord definition — for the first glyph of the pair (may be zero).
|
||||
+ 2
|
||||
# uint16 valueFormat2 ValueRecord definition — for the second glyph of the pair (may be zero).
|
||||
+ 2
|
||||
# Offset16 classDef1Offset Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair.
|
||||
+ 2
|
||||
+ self.classDef1_bytes
|
||||
# Offset16 classDef2Offset Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair.
|
||||
+ 2
|
||||
+ self.classDef2_bytes
|
||||
# uint16 class1Count Number of classes in classDef1 table — includes Class 0.
|
||||
+ 2
|
||||
# uint16 class2Count Number of classes in classDef2 table — includes Class 0.
|
||||
+ 2
|
||||
# Class1Record class1Records[class1Count] Array of Class1 records, ordered by classes in classDef1.
|
||||
+ (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes)
|
||||
* len(self.indices)
|
||||
* self.width
|
||||
)
|
||||
return self._cost
|
||||
|
||||
@property
|
||||
def coverage_bytes(self):
|
||||
format1_bytes = (
|
||||
# From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1
|
||||
# uint16 coverageFormat Format identifier — format = 1
|
||||
# uint16 glyphCount Number of glyphs in the glyph array
|
||||
4
|
||||
# uint16 glyphArray[glyphCount] Array of glyph IDs — in numerical order
|
||||
+ sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2
|
||||
)
|
||||
ranges = sorted(
|
||||
chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices)
|
||||
)
|
||||
merged_range_count = 0
|
||||
last = None
|
||||
for (start, end) in ranges:
|
||||
if last is not None and start != last + 1:
|
||||
merged_range_count += 1
|
||||
last = end
|
||||
format2_bytes = (
|
||||
# From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2
|
||||
# uint16 coverageFormat Format identifier — format = 2
|
||||
# uint16 rangeCount Number of RangeRecords
|
||||
4
|
||||
# RangeRecord rangeRecords[rangeCount] Array of glyph ranges — ordered by startGlyphID.
|
||||
# uint16 startGlyphID First glyph ID in the range
|
||||
# uint16 endGlyphID Last glyph ID in the range
|
||||
# uint16 startCoverageIndex Coverage Index of first glyph ID in range
|
||||
+ merged_range_count * 6
|
||||
)
|
||||
return min(format1_bytes, format2_bytes)
|
||||
|
||||
@property
|
||||
def classDef1_bytes(self):
|
||||
# We can skip encoding one of the Class1 definitions, and use
|
||||
# Class1=0 to represent it instead, because Class1 is gated by the
|
||||
# Coverage definition. Use Class1=0 for the highest byte savings.
|
||||
# Going through all options takes too long, pick the biggest class
|
||||
# = what happens in otlLib.builder.ClassDefBuilder.classes()
|
||||
biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i]))
|
||||
return _classDef_bytes(
|
||||
self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index]
|
||||
)
|
||||
|
||||
@property
|
||||
def classDef2_bytes(self):
|
||||
# All Class2 need to be encoded because we can't use Class2=0
|
||||
return _classDef_bytes(self.ctx.all_class2_data, self.column_indices)
|
||||
|
||||
|
||||
def cluster_pairs_by_class2_coverage_custom_cost(
|
||||
font: TTFont,
|
||||
pairs: Pairs,
|
||||
compression: int = 5,
|
||||
) -> List[Pairs]:
|
||||
if not pairs:
|
||||
# The subtable was actually empty?
|
||||
return [pairs]
|
||||
|
||||
# Sorted for reproducibility/determinism
|
||||
all_class1 = sorted(set(pair[0] for pair in pairs))
|
||||
all_class2 = sorted(set(pair[1] for pair in pairs))
|
||||
|
||||
# Use Python's big ints for binary vectors representing each line
|
||||
lines = [
|
||||
sum(
|
||||
1 << i if (class1, class2) in pairs else 0
|
||||
for i, class2 in enumerate(all_class2)
|
||||
)
|
||||
for class1 in all_class1
|
||||
]
|
||||
|
||||
# Map glyph names to ids and work with ints throughout for ClassDef formats
|
||||
name_to_id = font.getReverseGlyphMap()
|
||||
# Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id)
|
||||
all_class1_data = [
|
||||
_getClassRanges(name_to_id[name] for name in cls) for cls in all_class1
|
||||
]
|
||||
all_class2_data = [
|
||||
_getClassRanges(name_to_id[name] for name in cls) for cls in all_class2
|
||||
]
|
||||
|
||||
format1 = 0
|
||||
format2 = 0
|
||||
for pair, value in pairs.items():
|
||||
format1 |= value[0].getEffectiveFormat() if value[0] else 0
|
||||
format2 |= value[1].getEffectiveFormat() if value[1] else 0
|
||||
valueFormat1_bytes = bit_count(format1) * 2
|
||||
valueFormat2_bytes = bit_count(format2) * 2
|
||||
|
||||
ctx = ClusteringContext(
|
||||
lines,
|
||||
all_class1,
|
||||
all_class1_data,
|
||||
all_class2_data,
|
||||
valueFormat1_bytes,
|
||||
valueFormat2_bytes,
|
||||
)
|
||||
|
||||
cluster_cache: Dict[int, Cluster] = {}
|
||||
|
||||
def make_cluster(indices: int) -> Cluster:
|
||||
cluster = cluster_cache.get(indices, None)
|
||||
if cluster is not None:
|
||||
return cluster
|
||||
cluster = Cluster(ctx, indices)
|
||||
cluster_cache[indices] = cluster
|
||||
return cluster
|
||||
|
||||
def merge(cluster: Cluster, other: Cluster) -> Cluster:
|
||||
return make_cluster(cluster.indices_bitmask | other.indices_bitmask)
|
||||
|
||||
# Agglomerative clustering by hand, checking the cost gain of the new
|
||||
# cluster against the previously separate clusters
|
||||
# Start with 1 cluster per line
|
||||
# cluster = set of lines = new subtable
|
||||
clusters = [make_cluster(1 << i) for i in range(len(lines))]
|
||||
|
||||
# Cost of 1 cluster with everything
|
||||
# `(1 << len) - 1` gives a bitmask full of 1's of length `len`
|
||||
cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost
|
||||
log.debug(f" len(clusters) = {len(clusters)}")
|
||||
|
||||
while len(clusters) > 1:
|
||||
lowest_cost_change = None
|
||||
best_cluster_index = None
|
||||
best_other_index = None
|
||||
best_merged = None
|
||||
for i, cluster in enumerate(clusters):
|
||||
for j, other in enumerate(clusters[i + 1 :]):
|
||||
merged = merge(cluster, other)
|
||||
cost_change = merged.cost - cluster.cost - other.cost
|
||||
if lowest_cost_change is None or cost_change < lowest_cost_change:
|
||||
lowest_cost_change = cost_change
|
||||
best_cluster_index = i
|
||||
best_other_index = i + 1 + j
|
||||
best_merged = merged
|
||||
assert lowest_cost_change is not None
|
||||
assert best_cluster_index is not None
|
||||
assert best_other_index is not None
|
||||
assert best_merged is not None
|
||||
|
||||
# If the best merge we found is still taking down the file size, then
|
||||
# there's no question: we must do it, because it's beneficial in both
|
||||
# ways (lower file size and lower number of subtables). However, if the
|
||||
# best merge we found is not reducing file size anymore, then we need to
|
||||
# look at the other stop criteria = the compression factor.
|
||||
if lowest_cost_change > 0:
|
||||
# Stop critera: check whether we should keep merging.
|
||||
# Compute size reduction brought by splitting
|
||||
cost_after_splitting = sum(c.cost for c in clusters)
|
||||
# size_reduction so that after = before * (1 - size_reduction)
|
||||
# E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2
|
||||
size_reduction = 1 - cost_after_splitting / cost_before_splitting
|
||||
|
||||
# Force more merging by taking into account the compression number.
|
||||
# Target behaviour: compression number = 1 to 9, default 5 like gzip
|
||||
# - 1 = accept to add 1 subtable to reduce size by 50%
|
||||
# - 5 = accept to add 5 subtables to reduce size by 50%
|
||||
# See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691
|
||||
# Given the size reduction we have achieved so far, compute how many
|
||||
# new subtables are acceptable.
|
||||
max_new_subtables = -log2(1 - size_reduction) * compression
|
||||
log.debug(
|
||||
f" len(clusters) = {len(clusters):3d} size_reduction={size_reduction:5.2f} max_new_subtables={max_new_subtables}",
|
||||
)
|
||||
if compression == 9:
|
||||
# Override level 9 to mean: create any number of subtables
|
||||
max_new_subtables = len(clusters)
|
||||
|
||||
# If we have managed to take the number of new subtables below the
|
||||
# threshold, then we can stop.
|
||||
if len(clusters) <= max_new_subtables + 1:
|
||||
break
|
||||
|
||||
# No reason to stop yet, do the merge and move on to the next.
|
||||
del clusters[best_other_index]
|
||||
clusters[best_cluster_index] = best_merged
|
||||
|
||||
# All clusters are final; turn bitmasks back into the "Pairs" format
|
||||
pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict)
|
||||
for pair, values in pairs.items():
|
||||
pairs_by_class1[pair[0]][pair] = values
|
||||
pairs_groups: List[Pairs] = []
|
||||
for cluster in clusters:
|
||||
pairs_group: Pairs = dict()
|
||||
for i in cluster.indices:
|
||||
class1 = all_class1[i]
|
||||
pairs_group.update(pairs_by_class1[class1])
|
||||
pairs_groups.append(pairs_group)
|
||||
return pairs_groups
|
@ -1,8 +1,10 @@
|
||||
"""
|
||||
Merge OpenType Layout tables (GDEF / GPOS / GSUB).
|
||||
"""
|
||||
import os
|
||||
import copy
|
||||
from operator import ior
|
||||
import logging
|
||||
from fontTools.misc import classifyTools
|
||||
from fontTools.misc.roundTools import otRound
|
||||
from fontTools.ttLib.tables import otTables as ot
|
||||
@ -13,6 +15,13 @@ from fontTools.varLib.models import nonNone, allNone, allEqual, allEqualTo
|
||||
from fontTools.varLib.varStore import VarStoreInstancer
|
||||
from functools import reduce
|
||||
from fontTools.otlLib.builder import buildSinglePos
|
||||
from fontTools.otlLib.optimize.gpos import (
|
||||
compact_pair_pos,
|
||||
GPOS_COMPACT_MODE_DEFAULT,
|
||||
GPOS_COMPACT_MODE_ENV_KEY,
|
||||
)
|
||||
|
||||
log = logging.getLogger("fontTools.varLib.merger")
|
||||
|
||||
from .errors import (
|
||||
ShouldBeConstant,
|
||||
@ -837,6 +846,15 @@ def merge(merger, self, lst):
|
||||
self.SubTable.pop(-1)
|
||||
self.SubTableCount -= 1
|
||||
|
||||
# Compact the merged subtables
|
||||
# This is a good moment to do it because the compaction should create
|
||||
# smaller subtables, which may prevent overflows from happening.
|
||||
mode = os.environ.get(GPOS_COMPACT_MODE_ENV_KEY, GPOS_COMPACT_MODE_DEFAULT)
|
||||
if mode and mode != "0":
|
||||
log.info("Compacting GPOS...")
|
||||
self.SubTable = compact_pair_pos(merger.font, mode, self.SubTable)
|
||||
self.SubTableCount = len(self.SubTable)
|
||||
|
||||
elif isSinglePos and flattened:
|
||||
singlePosTable = self.SubTable[0]
|
||||
glyphs = singlePosTable.Coverage.glyphs
|
||||
@ -851,7 +869,6 @@ def merge(merger, self, lst):
|
||||
|
||||
del merger.lookup_subtables
|
||||
|
||||
|
||||
#
|
||||
# InstancerMerger
|
||||
#
|
||||
|
15
NEWS.rst
15
NEWS.rst
@ -15,6 +15,11 @@
|
||||
- [post] Fixed parsing ``post`` table format 2.0 when it contains extra garbage
|
||||
at the end of the stringData array (#2314).
|
||||
- [subset] drop empty features unless 'size' with FeatureParams table (#2324).
|
||||
- [otlLib] Added ``otlLib.optimize`` module; added GPOS compaction algorithm.
|
||||
The compaction can be run on existing fonts with ``fonttools otlLib.optimize``
|
||||
or using the snippet ``compact_gpos.py``. There's experimental support for
|
||||
compacting fonts at compilation time using an environment variable, but that
|
||||
might be removed later (#2326).
|
||||
|
||||
4.24.4 (released 2021-05-25)
|
||||
----------------------------
|
||||
@ -498,7 +503,7 @@
|
||||
instance, correctly map the value forward.
|
||||
- [varLib] The avar table can now contain mapping output values that are greater than
|
||||
OR EQUAL to the preceeding value, as the avar specification allows this.
|
||||
- [varLib] The errors of the module are now ordered hierarchically below VarLibError.
|
||||
- [varLib] The errors of the module are now ordered hierarchically below VarLibError.
|
||||
See #1821.
|
||||
|
||||
4.3.0 (released 2020-02-03)
|
||||
@ -792,13 +797,13 @@
|
||||
- [mutator] Set ``OVERLAP_SIMPLE`` and ``OVERLAP_COMPOUND`` glyf flags by
|
||||
default in ``instantiateVariableFont``. Added ``--no-overlap`` cli option
|
||||
to disable this (#1518).
|
||||
- [subset] Fixed subsetting ``VVAR`` table (#1516, #1517).
|
||||
- [subset] Fixed subsetting ``VVAR`` table (#1516, #1517).
|
||||
Fixed subsetting an ``HVAR`` table that has an ``AdvanceWidthMap`` when the
|
||||
option ``--retain-gids`` is used.
|
||||
- [feaLib] Added ``forceChained`` in MultipleSubstStatement (#1511).
|
||||
Fixed double indentation of ``subtable`` statement (#1512).
|
||||
- [feaLib] Added ``forceChained`` in MultipleSubstStatement (#1511).
|
||||
Fixed double indentation of ``subtable`` statement (#1512).
|
||||
Added support for ``subtable`` statement in more places than just PairPos
|
||||
lookups (#1520).
|
||||
lookups (#1520).
|
||||
Handle lookupflag 0 and lookupflag without a value (#1540).
|
||||
- [varLib] In ``load_designspace``, provide a default English name for the
|
||||
``ital`` axis tag.
|
||||
|
144
Snippets/compact_gpos.py
Normal file
144
Snippets/compact_gpos.py
Normal file
@ -0,0 +1,144 @@
|
||||
#! /usr/bin/env python3
|
||||
|
||||
"""
|
||||
Sample script to use the otlLib.optimize.gpos functions to compact GPOS tables
|
||||
of existing fonts. This script takes one or more TTF files as arguments and
|
||||
will create compacted copies of the fonts using all available modes of the GPOS
|
||||
compaction algorithm. For each copy, it will measure the new size of the GPOS
|
||||
table and also the new size of the font in WOFF2 format. All results will be
|
||||
printed to stdout in CSV format, so the savings provided by the algorithm in
|
||||
each mode can be inspected.
|
||||
|
||||
This was initially made to debug the algorithm but can also be used to choose
|
||||
a mode value for a specific font (trade-off between bytes saved in TTF format
|
||||
vs more bytes in WOFF2 format and more subtables).
|
||||
|
||||
Run:
|
||||
|
||||
python Snippets/compact_gpos.py MyFont.ttf > results.csv
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
import csv
|
||||
import time
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, List, Optional, Sequence, Tuple
|
||||
|
||||
from fontTools.ttLib import TTFont
|
||||
from fontTools.otlLib.optimize import compact
|
||||
|
||||
MODES = [str(c) for c in range(1, 10)]
|
||||
|
||||
|
||||
def main(args: Optional[List[str]] = None):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("fonts", type=Path, nargs="+", help="Path to TTFs.")
|
||||
parsed_args = parser.parse_args(args)
|
||||
|
||||
runtimes = defaultdict(list)
|
||||
rows = []
|
||||
font_path: Path
|
||||
for font_path in parsed_args.fonts:
|
||||
font = TTFont(font_path)
|
||||
if "GPOS" not in font:
|
||||
print(f"No GPOS in {font_path.name}, skipping.", file=sys.stderr)
|
||||
continue
|
||||
size_orig = len(font.getTableData("GPOS")) / 1024
|
||||
print(f"Measuring {font_path.name}...", file=sys.stderr)
|
||||
|
||||
fonts = {}
|
||||
font_paths = {}
|
||||
sizes = {}
|
||||
for mode in MODES:
|
||||
print(f" Running mode={mode}", file=sys.stderr)
|
||||
fonts[mode] = TTFont(font_path)
|
||||
before = time.perf_counter()
|
||||
compact(fonts[mode], mode=str(mode))
|
||||
runtimes[mode].append(time.perf_counter() - before)
|
||||
font_paths[mode] = (
|
||||
font_path.parent
|
||||
/ "compact"
|
||||
/ (font_path.stem + f"_{mode}" + font_path.suffix)
|
||||
)
|
||||
font_paths[mode].parent.mkdir(parents=True, exist_ok=True)
|
||||
fonts[mode].save(font_paths[mode])
|
||||
fonts[mode] = TTFont(font_paths[mode])
|
||||
sizes[mode] = len(fonts[mode].getTableData("GPOS")) / 1024
|
||||
|
||||
print(f" Runtimes:", file=sys.stderr)
|
||||
for mode, times in runtimes.items():
|
||||
print(
|
||||
f" {mode:10} {' '.join(f'{t:5.2f}' for t in times)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
# Bonus: measure WOFF2 file sizes.
|
||||
print(f" Measuring WOFF2 sizes", file=sys.stderr)
|
||||
size_woff_orig = woff_size(font, font_path) / 1024
|
||||
sizes_woff = {
|
||||
mode: woff_size(fonts[mode], font_paths[mode]) / 1024 for mode in MODES
|
||||
}
|
||||
|
||||
rows.append(
|
||||
(
|
||||
font_path.name,
|
||||
size_orig,
|
||||
size_woff_orig,
|
||||
*flatten(
|
||||
(
|
||||
sizes[mode],
|
||||
pct(sizes[mode], size_orig),
|
||||
sizes_woff[mode],
|
||||
pct(sizes_woff[mode], size_woff_orig),
|
||||
)
|
||||
for mode in MODES
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
write_csv(rows)
|
||||
|
||||
|
||||
def woff_size(font: TTFont, path: Path) -> int:
|
||||
font.flavor = "woff2"
|
||||
woff_path = path.with_suffix(".woff2")
|
||||
font.save(woff_path)
|
||||
return woff_path.stat().st_size
|
||||
|
||||
|
||||
def write_csv(rows: List[Tuple[Any]]) -> None:
|
||||
sys.stdout.reconfigure(encoding="utf-8")
|
||||
sys.stdout.write("\uFEFF")
|
||||
writer = csv.writer(sys.stdout, lineterminator="\n")
|
||||
writer.writerow(
|
||||
[
|
||||
"File",
|
||||
"Original GPOS Size",
|
||||
"Original WOFF2 Size",
|
||||
*flatten(
|
||||
(
|
||||
f"mode={mode}",
|
||||
f"Change {mode}",
|
||||
f"mode={mode} WOFF2 Size",
|
||||
f"Change {mode} WOFF2 Size",
|
||||
)
|
||||
for mode in MODES
|
||||
),
|
||||
]
|
||||
)
|
||||
for row in rows:
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
def pct(new: float, old: float) -> float:
|
||||
return -(1 - (new / old))
|
||||
|
||||
|
||||
def flatten(seq_seq: Iterable[Iterable[Any]]) -> List[Any]:
|
||||
return [thing for seq in seq_seq for thing in seq]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
175
Tests/otlLib/optimize_test.py
Normal file
175
Tests/otlLib/optimize_test.py
Normal file
@ -0,0 +1,175 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from subprocess import run
|
||||
import contextlib
|
||||
import os
|
||||
from typing import List, Optional, Tuple
|
||||
from fontTools.ttLib import TTFont
|
||||
|
||||
import pytest
|
||||
|
||||
from fontTools.feaLib.builder import addOpenTypeFeaturesFromString
|
||||
from fontTools.fontBuilder import FontBuilder
|
||||
|
||||
from fontTools.ttLib.tables.otBase import OTTableWriter, ValueRecord
|
||||
|
||||
|
||||
def test_main(tmpdir: Path):
|
||||
"""Check that calling the main function on an input TTF works."""
|
||||
glyphs = ".notdef space A Aacute B D".split()
|
||||
features = """
|
||||
@A = [A Aacute];
|
||||
@B = [B D];
|
||||
feature kern {
|
||||
pos @A @B -50;
|
||||
} kern;
|
||||
"""
|
||||
fb = FontBuilder(1000)
|
||||
fb.setupGlyphOrder(glyphs)
|
||||
addOpenTypeFeaturesFromString(fb.font, features)
|
||||
input = tmpdir / "in.ttf"
|
||||
fb.save(str(input))
|
||||
output = tmpdir / "out.ttf"
|
||||
run(
|
||||
[
|
||||
"fonttools",
|
||||
"otlLib.optimize",
|
||||
"--gpos-compact-mode",
|
||||
"5",
|
||||
str(input),
|
||||
"-o",
|
||||
str(output),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
assert output.exists()
|
||||
|
||||
|
||||
# Copy-pasted from https://stackoverflow.com/questions/2059482/python-temporarily-modify-the-current-processs-environment
|
||||
# TODO: remove when moving to the Config class
|
||||
@contextlib.contextmanager
|
||||
def set_env(**environ):
|
||||
"""
|
||||
Temporarily set the process environment variables.
|
||||
|
||||
>>> with set_env(PLUGINS_DIR=u'test/plugins'):
|
||||
... "PLUGINS_DIR" in os.environ
|
||||
True
|
||||
|
||||
>>> "PLUGINS_DIR" in os.environ
|
||||
False
|
||||
|
||||
:type environ: dict[str, unicode]
|
||||
:param environ: Environment variables to set
|
||||
"""
|
||||
old_environ = dict(os.environ)
|
||||
os.environ.update(environ)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
os.environ.clear()
|
||||
os.environ.update(old_environ)
|
||||
|
||||
|
||||
def count_pairpos_subtables(font: TTFont) -> int:
|
||||
subtables = 0
|
||||
for lookup in font["GPOS"].table.LookupList.Lookup:
|
||||
if lookup.LookupType == 2:
|
||||
subtables += len(lookup.SubTable)
|
||||
elif lookup.LookupType == 9:
|
||||
for subtable in lookup.SubTable:
|
||||
if subtable.ExtensionLookupType == 2:
|
||||
subtables += 1
|
||||
return subtables
|
||||
|
||||
|
||||
def count_pairpos_bytes(font: TTFont) -> int:
|
||||
bytes = 0
|
||||
gpos = font["GPOS"]
|
||||
for lookup in font["GPOS"].table.LookupList.Lookup:
|
||||
if lookup.LookupType == 2:
|
||||
w = OTTableWriter(tableTag=gpos.tableTag)
|
||||
lookup.compile(w, font)
|
||||
bytes += len(w.getAllData())
|
||||
elif lookup.LookupType == 9:
|
||||
if any(subtable.ExtensionLookupType == 2 for subtable in lookup.SubTable):
|
||||
w = OTTableWriter(tableTag=gpos.tableTag)
|
||||
lookup.compile(w, font)
|
||||
bytes += len(w.getAllData())
|
||||
return bytes
|
||||
|
||||
|
||||
def get_kerning_by_blocks(blocks: List[Tuple[int, int]]) -> Tuple[List[str], str]:
|
||||
"""Generate a highly compressible font by generating a bunch of rectangular
|
||||
blocks on the diagonal that can easily be sliced into subtables.
|
||||
|
||||
Returns the list of glyphs and feature code of the font.
|
||||
"""
|
||||
value = 0
|
||||
glyphs: List[str] = []
|
||||
rules = []
|
||||
# Each block is like a script in a multi-script font
|
||||
for script, (width, height) in enumerate(blocks):
|
||||
glyphs.extend(f"g_{script}_{i}" for i in range(max(width, height)))
|
||||
for l in range(height):
|
||||
for r in range(width):
|
||||
value += 1
|
||||
rules.append((f"g_{script}_{l}", f"g_{script}_{r}", value))
|
||||
classes = "\n".join([f"@{g} = [{g}];" for g in glyphs])
|
||||
statements = "\n".join([f"pos @{l} @{r} {v};" for (l, r, v) in rules])
|
||||
features = f"""
|
||||
{classes}
|
||||
feature kern {{
|
||||
{statements}
|
||||
}} kern;
|
||||
"""
|
||||
return glyphs, features
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("blocks", "mode", "expected_subtables", "expected_bytes"),
|
||||
[
|
||||
# Mode = 0 = no optimization leads to 650 bytes of GPOS
|
||||
([(15, 3), (2, 10)], None, 1, 602),
|
||||
# Optimization level 1 recognizes the 2 blocks and splits into 2
|
||||
# subtables = adds 1 subtable leading to a size reduction of
|
||||
# (602-298)/602 = 50%
|
||||
([(15, 3), (2, 10)], 1, 2, 298),
|
||||
# On a bigger block configuration, we see that mode=5 doesn't create
|
||||
# as many subtables as it could, because of the stop criteria
|
||||
([(4, 4) for _ in range(20)], 5, 14, 2042),
|
||||
# while level=9 creates as many subtables as there were blocks on the
|
||||
# diagonal and yields a better saving
|
||||
([(4, 4) for _ in range(20)], 9, 20, 1886),
|
||||
# On a fully occupied kerning matrix, even the strategy 9 doesn't
|
||||
# split anything.
|
||||
([(10, 10)], 9, 1, 304)
|
||||
],
|
||||
)
|
||||
def test_optimization_mode(
|
||||
caplog,
|
||||
blocks: List[Tuple[int, int]],
|
||||
mode: Optional[int],
|
||||
expected_subtables: int,
|
||||
expected_bytes: int,
|
||||
):
|
||||
"""Check that the optimizations are off by default, and that increasing
|
||||
the optimization level creates more subtables and a smaller byte size.
|
||||
"""
|
||||
caplog.set_level(logging.DEBUG)
|
||||
|
||||
glyphs, features = get_kerning_by_blocks(blocks)
|
||||
glyphs = [".notdef space"] + glyphs
|
||||
|
||||
env = {}
|
||||
if mode is not None:
|
||||
# NOTE: activating this optimization via the environment variable is
|
||||
# experimental and may not be supported once an alternative mechanism
|
||||
# is in place. See: https://github.com/fonttools/fonttools/issues/2349
|
||||
env["FONTTOOLS_GPOS_COMPACT_MODE"] = str(mode)
|
||||
with set_env(**env):
|
||||
fb = FontBuilder(1000)
|
||||
fb.setupGlyphOrder(glyphs)
|
||||
addOpenTypeFeaturesFromString(fb.font, features)
|
||||
assert expected_subtables == count_pairpos_subtables(fb.font)
|
||||
assert expected_bytes == count_pairpos_bytes(fb.font)
|
Loading…
x
Reference in New Issue
Block a user