diff --git a/Lib/fontTools/misc/intTools.py b/Lib/fontTools/misc/intTools.py index d31a75b1b..6ba03e163 100644 --- a/Lib/fontTools/misc/intTools.py +++ b/Lib/fontTools/misc/intTools.py @@ -1,14 +1,25 @@ -__all__ = ['popCount'] +__all__ = ["popCount"] try: bit_count = int.bit_count except AttributeError: + def bit_count(v): - return bin(v).count('1') + return bin(v).count("1") + """Return number of 1 bits (population count) of the absolute value of an integer. See https://docs.python.org/3.10/library/stdtypes.html#int.bit_count """ popCount = bit_count + + +def bit_indices(v): + """Return list of indices where bits are set, 0 being the index of the least significant bit. + + >>> bit_indices(0b101) + [0, 2] + """ + return [i for i, b in enumerate(bin(v)[::-1]) if b == "1"] diff --git a/Lib/fontTools/otlLib/builder.py b/Lib/fontTools/otlLib/builder.py index 182f7da6a..bfb9d41f8 100644 --- a/Lib/fontTools/otlLib/builder.py +++ b/Lib/fontTools/otlLib/builder.py @@ -1,4 +1,5 @@ from collections import namedtuple, OrderedDict +import os from fontTools.misc.fixedTools import fixedToFloat from fontTools import ttLib from fontTools.ttLib.tables import otTables as ot @@ -10,6 +11,7 @@ from fontTools.ttLib.tables.otBase import ( ) from fontTools.ttLib.tables import otBase from fontTools.feaLib.ast import STATNameStatement +from fontTools.otlLib.optimize.gpos import GPOS_COMPACT_MODE_DEFAULT, GPOS_COMPACT_MODE_ENV_KEY, compact_lookup from fontTools.otlLib.error import OpenTypeLibError from functools import reduce import logging @@ -1373,7 +1375,17 @@ class PairPosBuilder(LookupBuilder): subtables.extend(buildPairPosGlyphs(self.glyphPairs, self.glyphMap)) for key in sorted(builders.keys()): subtables.extend(builders[key].subtables()) - return self.buildLookup_(subtables) + lookup = self.buildLookup_(subtables) + + # Compact the lookup + # This is a good moment to do it because the compaction should create + # smaller subtables, which may prevent overflows from happening. + mode = os.environ.get(GPOS_COMPACT_MODE_ENV_KEY, GPOS_COMPACT_MODE_DEFAULT) + if mode and mode != "0": + log.info("Compacting GPOS...") + compact_lookup(self.font, mode, lookup) + + return lookup class SinglePosBuilder(LookupBuilder): diff --git a/Lib/fontTools/otlLib/optimize/__init__.py b/Lib/fontTools/otlLib/optimize/__init__.py new file mode 100644 index 000000000..5c007e891 --- /dev/null +++ b/Lib/fontTools/otlLib/optimize/__init__.py @@ -0,0 +1,68 @@ +from argparse import RawTextHelpFormatter +from textwrap import dedent + +from fontTools.ttLib import TTFont +from fontTools.otlLib.optimize.gpos import compact, GPOS_COMPACT_MODE_DEFAULT + +def main(args=None): + """Optimize the layout tables of an existing font.""" + from argparse import ArgumentParser + from fontTools import configLogger + + parser = ArgumentParser(prog="otlLib.optimize", description=main.__doc__, formatter_class=RawTextHelpFormatter) + parser.add_argument("font") + parser.add_argument( + "-o", metavar="OUTPUTFILE", dest="outfile", default=None, help="output file" + ) + parser.add_argument( + "--gpos-compact-mode", + help=dedent( + f"""\ + GPOS Lookup type 2 (PairPos) compaction mode: + 0 = do not attempt to compact PairPos lookups; + 1 to 8 = create at most 1 to 8 new subtables for each existing + subtable, provided that it would yield a 50%% file size saving; + 9 = create as many new subtables as needed to yield a file size saving. + Default: {GPOS_COMPACT_MODE_DEFAULT}. + + This compaction aims to save file size, by splitting large class + kerning subtables (Format 2) that contain many zero values into + smaller and denser subtables. It's a trade-off between the overhead + of several subtables versus the sparseness of one big subtable. + + See the pull request: https://github.com/fonttools/fonttools/pull/2326 + """ + ), + default=int(GPOS_COMPACT_MODE_DEFAULT), + choices=list(range(10)), + type=int, + ) + logging_group = parser.add_mutually_exclusive_group(required=False) + logging_group.add_argument( + "-v", "--verbose", action="store_true", help="Run more verbosely." + ) + logging_group.add_argument( + "-q", "--quiet", action="store_true", help="Turn verbosity off." + ) + options = parser.parse_args(args) + + configLogger( + level=("DEBUG" if options.verbose else "ERROR" if options.quiet else "INFO") + ) + + font = TTFont(options.font) + # TODO: switch everything to have type(mode) = int when using the Config class + compact(font, str(options.gpos_compact_mode)) + font.save(options.outfile or options.font) + + + +if __name__ == "__main__": + import sys + + if len(sys.argv) > 1: + sys.exit(main()) + import doctest + + sys.exit(doctest.testmod().failed) + diff --git a/Lib/fontTools/otlLib/optimize/__main__.py b/Lib/fontTools/otlLib/optimize/__main__.py new file mode 100644 index 000000000..03027ecd9 --- /dev/null +++ b/Lib/fontTools/otlLib/optimize/__main__.py @@ -0,0 +1,6 @@ +import sys +from fontTools.otlLib.optimize import main + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/Lib/fontTools/otlLib/optimize/gpos.py b/Lib/fontTools/otlLib/optimize/gpos.py new file mode 100644 index 000000000..79873fadb --- /dev/null +++ b/Lib/fontTools/otlLib/optimize/gpos.py @@ -0,0 +1,439 @@ +import logging +from collections import defaultdict, namedtuple +from functools import reduce +from itertools import chain +from math import log2 +from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple + +from fontTools.misc.intTools import bit_count, bit_indices +from fontTools.ttLib import TTFont +from fontTools.ttLib.tables import otBase, otTables + +# NOTE: activating this optimization via the environment variable is +# experimental and may not be supported once an alternative mechanism +# is in place. See: https://github.com/fonttools/fonttools/issues/2349 +GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE" +GPOS_COMPACT_MODE_DEFAULT = "0" + +log = logging.getLogger("fontTools.otlLib.optimize.gpos") + + +def compact(font: TTFont, mode: str) -> TTFont: + # Ideal plan: + # 1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable + # https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable + # 2. Extract glyph-glyph kerning and class-kerning from all present subtables + # 3. Regroup into different subtable arrangements + # 4. Put back into the lookup + # + # Actual implementation: + # 2. Only class kerning is optimized currently + # 3. If the input kerning is already in several subtables, the subtables + # are not grouped together first; instead each subtable is treated + # independently, so currently this step is: + # Split existing subtables into more smaller subtables + gpos = font["GPOS"] + for lookup in gpos.table.LookupList.Lookup: + if lookup.LookupType == 2: + compact_lookup(font, mode, lookup) + elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2: + compact_ext_lookup(font, mode, lookup) + return font + + +def compact_lookup(font: TTFont, mode: str, lookup: otTables.Lookup) -> None: + new_subtables = compact_pair_pos(font, mode, lookup.SubTable) + lookup.SubTable = new_subtables + lookup.SubTableCount = len(new_subtables) + + +def compact_ext_lookup(font: TTFont, mode: str, lookup: otTables.Lookup) -> None: + new_subtables = compact_pair_pos( + font, mode, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable] + ) + new_ext_subtables = [] + for subtable in new_subtables: + ext_subtable = otTables.ExtensionPos() + ext_subtable.Format = 1 + ext_subtable.ExtSubTable = subtable + new_ext_subtables.append(ext_subtable) + lookup.SubTable = new_ext_subtables + lookup.SubTableCount = len(new_ext_subtables) + + +def compact_pair_pos( + font: TTFont, mode: str, subtables: Sequence[otTables.PairPos] +) -> Sequence[otTables.PairPos]: + new_subtables = [] + for subtable in subtables: + if subtable.Format == 1: + # Not doing anything to Format 1 (yet?) + new_subtables.append(subtable) + elif subtable.Format == 2: + new_subtables.extend(compact_class_pairs(font, mode, subtable)) + return new_subtables + + +def compact_class_pairs( + font: TTFont, mode: str, subtable: otTables.PairPos +) -> List[otTables.PairPos]: + from fontTools.otlLib.builder import buildPairPosClassesSubtable + + subtables = [] + classes1: DefaultDict[int, List[str]] = defaultdict(list) + for g in subtable.Coverage.glyphs: + classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g) + classes2: DefaultDict[int, List[str]] = defaultdict(list) + for g, i in subtable.ClassDef2.classDefs.items(): + classes2[i].append(g) + all_pairs = {} + for i, class1 in enumerate(subtable.Class1Record): + for j, class2 in enumerate(class1.Class2Record): + if is_really_zero(class2): + continue + all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = ( + getattr(class2, "Value1", None), + getattr(class2, "Value2", None), + ) + + if len(mode) == 1 and mode in "123456789": + grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost( + font, all_pairs, int(mode) + ) + for pairs in grouped_pairs: + subtables.append( + buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap()) + ) + else: + raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={mode}") + return subtables + + +def is_really_zero(class2: otTables.Class2Record) -> bool: + v1 = getattr(class2, "Value1", None) + v2 = getattr(class2, "Value2", None) + return (v1 is None or v1.getEffectiveFormat() == 0) and ( + v2 is None or v2.getEffectiveFormat() == 0 + ) + + +Pairs = Dict[ + Tuple[Tuple[str, ...], Tuple[str, ...]], + Tuple[otBase.ValueRecord, otBase.ValueRecord], +] + +# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958 +def _getClassRanges(glyphIDs: Iterable[int]): + glyphIDs = sorted(glyphIDs) + last = glyphIDs[0] + ranges = [[last]] + for glyphID in glyphIDs[1:]: + if glyphID != last + 1: + ranges[-1].append(last) + ranges.append([glyphID]) + last = glyphID + ranges[-1].append(last) + return ranges, glyphIDs[0], glyphIDs[-1] + + +# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989 +def _classDef_bytes( + class_data: List[Tuple[List[Tuple[int, int]], int, int]], + class_ids: List[int], + coverage=False, +): + if not class_ids: + return 0 + first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]] + range_count = len(first_ranges) + for i in class_ids[1:]: + data = class_data[i] + range_count += len(data[0]) + min_glyph_id = min(min_glyph_id, data[1]) + max_glyph_id = max(max_glyph_id, data[2]) + glyphCount = max_glyph_id - min_glyph_id + 1 + # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1 + format1_bytes = 6 + glyphCount * 2 + # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2 + format2_bytes = 4 + range_count * 6 + return min(format1_bytes, format2_bytes) + + +ClusteringContext = namedtuple( + "ClusteringContext", + [ + "lines", + "all_class1", + "all_class1_data", + "all_class2_data", + "valueFormat1_bytes", + "valueFormat2_bytes", + ], +) + + +class Cluster: + # TODO(Python 3.7): Turn this into a dataclass + # ctx: ClusteringContext + # indices: int + # Caches + # TODO(Python 3.8): use functools.cached_property instead of the + # manually cached properties, and remove the cache fields listed below. + # _indices: Optional[List[int]] = None + # _column_indices: Optional[List[int]] = None + # _cost: Optional[int] = None + + __slots__ = "ctx", "indices_bitmask", "_indices", "_column_indices", "_cost" + + def __init__(self, ctx: ClusteringContext, indices_bitmask: int): + self.ctx = ctx + self.indices_bitmask = indices_bitmask + self._indices = None + self._column_indices = None + self._cost = None + + @property + def indices(self): + if self._indices is None: + self._indices = bit_indices(self.indices_bitmask) + return self._indices + + @property + def column_indices(self): + if self._column_indices is None: + # Indices of columns that have a 1 in at least 1 line + # => binary OR all the lines + bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices)) + self._column_indices = bit_indices(bitmask) + return self._column_indices + + @property + def width(self): + # Add 1 because Class2=0 cannot be used but needs to be encoded. + return len(self.column_indices) + 1 + + @property + def cost(self): + if self._cost is None: + self._cost = ( + # 2 bytes to store the offset to this subtable in the Lookup table above + 2 + # Contents of the subtable + # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment + # uint16 posFormat Format identifier: format = 2 + + 2 + # Offset16 coverageOffset Offset to Coverage table, from beginning of PairPos subtable. + + 2 + + self.coverage_bytes + # uint16 valueFormat1 ValueRecord definition — for the first glyph of the pair (may be zero). + + 2 + # uint16 valueFormat2 ValueRecord definition — for the second glyph of the pair (may be zero). + + 2 + # Offset16 classDef1Offset Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair. + + 2 + + self.classDef1_bytes + # Offset16 classDef2Offset Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair. + + 2 + + self.classDef2_bytes + # uint16 class1Count Number of classes in classDef1 table — includes Class 0. + + 2 + # uint16 class2Count Number of classes in classDef2 table — includes Class 0. + + 2 + # Class1Record class1Records[class1Count] Array of Class1 records, ordered by classes in classDef1. + + (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes) + * len(self.indices) + * self.width + ) + return self._cost + + @property + def coverage_bytes(self): + format1_bytes = ( + # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1 + # uint16 coverageFormat Format identifier — format = 1 + # uint16 glyphCount Number of glyphs in the glyph array + 4 + # uint16 glyphArray[glyphCount] Array of glyph IDs — in numerical order + + sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2 + ) + ranges = sorted( + chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices) + ) + merged_range_count = 0 + last = None + for (start, end) in ranges: + if last is not None and start != last + 1: + merged_range_count += 1 + last = end + format2_bytes = ( + # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2 + # uint16 coverageFormat Format identifier — format = 2 + # uint16 rangeCount Number of RangeRecords + 4 + # RangeRecord rangeRecords[rangeCount] Array of glyph ranges — ordered by startGlyphID. + # uint16 startGlyphID First glyph ID in the range + # uint16 endGlyphID Last glyph ID in the range + # uint16 startCoverageIndex Coverage Index of first glyph ID in range + + merged_range_count * 6 + ) + return min(format1_bytes, format2_bytes) + + @property + def classDef1_bytes(self): + # We can skip encoding one of the Class1 definitions, and use + # Class1=0 to represent it instead, because Class1 is gated by the + # Coverage definition. Use Class1=0 for the highest byte savings. + # Going through all options takes too long, pick the biggest class + # = what happens in otlLib.builder.ClassDefBuilder.classes() + biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i])) + return _classDef_bytes( + self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index] + ) + + @property + def classDef2_bytes(self): + # All Class2 need to be encoded because we can't use Class2=0 + return _classDef_bytes(self.ctx.all_class2_data, self.column_indices) + + +def cluster_pairs_by_class2_coverage_custom_cost( + font: TTFont, + pairs: Pairs, + compression: int = 5, +) -> List[Pairs]: + if not pairs: + # The subtable was actually empty? + return [pairs] + + # Sorted for reproducibility/determinism + all_class1 = sorted(set(pair[0] for pair in pairs)) + all_class2 = sorted(set(pair[1] for pair in pairs)) + + # Use Python's big ints for binary vectors representing each line + lines = [ + sum( + 1 << i if (class1, class2) in pairs else 0 + for i, class2 in enumerate(all_class2) + ) + for class1 in all_class1 + ] + + # Map glyph names to ids and work with ints throughout for ClassDef formats + name_to_id = font.getReverseGlyphMap() + # Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id) + all_class1_data = [ + _getClassRanges(name_to_id[name] for name in cls) for cls in all_class1 + ] + all_class2_data = [ + _getClassRanges(name_to_id[name] for name in cls) for cls in all_class2 + ] + + format1 = 0 + format2 = 0 + for pair, value in pairs.items(): + format1 |= value[0].getEffectiveFormat() if value[0] else 0 + format2 |= value[1].getEffectiveFormat() if value[1] else 0 + valueFormat1_bytes = bit_count(format1) * 2 + valueFormat2_bytes = bit_count(format2) * 2 + + ctx = ClusteringContext( + lines, + all_class1, + all_class1_data, + all_class2_data, + valueFormat1_bytes, + valueFormat2_bytes, + ) + + cluster_cache: Dict[int, Cluster] = {} + + def make_cluster(indices: int) -> Cluster: + cluster = cluster_cache.get(indices, None) + if cluster is not None: + return cluster + cluster = Cluster(ctx, indices) + cluster_cache[indices] = cluster + return cluster + + def merge(cluster: Cluster, other: Cluster) -> Cluster: + return make_cluster(cluster.indices_bitmask | other.indices_bitmask) + + # Agglomerative clustering by hand, checking the cost gain of the new + # cluster against the previously separate clusters + # Start with 1 cluster per line + # cluster = set of lines = new subtable + clusters = [make_cluster(1 << i) for i in range(len(lines))] + + # Cost of 1 cluster with everything + # `(1 << len) - 1` gives a bitmask full of 1's of length `len` + cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost + log.debug(f" len(clusters) = {len(clusters)}") + + while len(clusters) > 1: + lowest_cost_change = None + best_cluster_index = None + best_other_index = None + best_merged = None + for i, cluster in enumerate(clusters): + for j, other in enumerate(clusters[i + 1 :]): + merged = merge(cluster, other) + cost_change = merged.cost - cluster.cost - other.cost + if lowest_cost_change is None or cost_change < lowest_cost_change: + lowest_cost_change = cost_change + best_cluster_index = i + best_other_index = i + 1 + j + best_merged = merged + assert lowest_cost_change is not None + assert best_cluster_index is not None + assert best_other_index is not None + assert best_merged is not None + + # If the best merge we found is still taking down the file size, then + # there's no question: we must do it, because it's beneficial in both + # ways (lower file size and lower number of subtables). However, if the + # best merge we found is not reducing file size anymore, then we need to + # look at the other stop criteria = the compression factor. + if lowest_cost_change > 0: + # Stop critera: check whether we should keep merging. + # Compute size reduction brought by splitting + cost_after_splitting = sum(c.cost for c in clusters) + # size_reduction so that after = before * (1 - size_reduction) + # E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2 + size_reduction = 1 - cost_after_splitting / cost_before_splitting + + # Force more merging by taking into account the compression number. + # Target behaviour: compression number = 1 to 9, default 5 like gzip + # - 1 = accept to add 1 subtable to reduce size by 50% + # - 5 = accept to add 5 subtables to reduce size by 50% + # See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691 + # Given the size reduction we have achieved so far, compute how many + # new subtables are acceptable. + max_new_subtables = -log2(1 - size_reduction) * compression + log.debug( + f" len(clusters) = {len(clusters):3d} size_reduction={size_reduction:5.2f} max_new_subtables={max_new_subtables}", + ) + if compression == 9: + # Override level 9 to mean: create any number of subtables + max_new_subtables = len(clusters) + + # If we have managed to take the number of new subtables below the + # threshold, then we can stop. + if len(clusters) <= max_new_subtables + 1: + break + + # No reason to stop yet, do the merge and move on to the next. + del clusters[best_other_index] + clusters[best_cluster_index] = best_merged + + # All clusters are final; turn bitmasks back into the "Pairs" format + pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict) + for pair, values in pairs.items(): + pairs_by_class1[pair[0]][pair] = values + pairs_groups: List[Pairs] = [] + for cluster in clusters: + pairs_group: Pairs = dict() + for i in cluster.indices: + class1 = all_class1[i] + pairs_group.update(pairs_by_class1[class1]) + pairs_groups.append(pairs_group) + return pairs_groups diff --git a/Lib/fontTools/varLib/merger.py b/Lib/fontTools/varLib/merger.py index 888b52c26..aaf2a5130 100644 --- a/Lib/fontTools/varLib/merger.py +++ b/Lib/fontTools/varLib/merger.py @@ -1,8 +1,10 @@ """ Merge OpenType Layout tables (GDEF / GPOS / GSUB). """ +import os import copy from operator import ior +import logging from fontTools.misc import classifyTools from fontTools.misc.roundTools import otRound from fontTools.ttLib.tables import otTables as ot @@ -13,6 +15,13 @@ from fontTools.varLib.models import nonNone, allNone, allEqual, allEqualTo from fontTools.varLib.varStore import VarStoreInstancer from functools import reduce from fontTools.otlLib.builder import buildSinglePos +from fontTools.otlLib.optimize.gpos import ( + compact_pair_pos, + GPOS_COMPACT_MODE_DEFAULT, + GPOS_COMPACT_MODE_ENV_KEY, +) + +log = logging.getLogger("fontTools.varLib.merger") from .errors import ( ShouldBeConstant, @@ -837,6 +846,15 @@ def merge(merger, self, lst): self.SubTable.pop(-1) self.SubTableCount -= 1 + # Compact the merged subtables + # This is a good moment to do it because the compaction should create + # smaller subtables, which may prevent overflows from happening. + mode = os.environ.get(GPOS_COMPACT_MODE_ENV_KEY, GPOS_COMPACT_MODE_DEFAULT) + if mode and mode != "0": + log.info("Compacting GPOS...") + self.SubTable = compact_pair_pos(merger.font, mode, self.SubTable) + self.SubTableCount = len(self.SubTable) + elif isSinglePos and flattened: singlePosTable = self.SubTable[0] glyphs = singlePosTable.Coverage.glyphs @@ -851,7 +869,6 @@ def merge(merger, self, lst): del merger.lookup_subtables - # # InstancerMerger # diff --git a/NEWS.rst b/NEWS.rst index 16dca63ee..9de1c6c36 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -15,6 +15,11 @@ - [post] Fixed parsing ``post`` table format 2.0 when it contains extra garbage at the end of the stringData array (#2314). - [subset] drop empty features unless 'size' with FeatureParams table (#2324). +- [otlLib] Added ``otlLib.optimize`` module; added GPOS compaction algorithm. + The compaction can be run on existing fonts with ``fonttools otlLib.optimize`` + or using the snippet ``compact_gpos.py``. There's experimental support for + compacting fonts at compilation time using an environment variable, but that + might be removed later (#2326). 4.24.4 (released 2021-05-25) ---------------------------- @@ -498,7 +503,7 @@ instance, correctly map the value forward. - [varLib] The avar table can now contain mapping output values that are greater than OR EQUAL to the preceeding value, as the avar specification allows this. -- [varLib] The errors of the module are now ordered hierarchically below VarLibError. +- [varLib] The errors of the module are now ordered hierarchically below VarLibError. See #1821. 4.3.0 (released 2020-02-03) @@ -792,13 +797,13 @@ - [mutator] Set ``OVERLAP_SIMPLE`` and ``OVERLAP_COMPOUND`` glyf flags by default in ``instantiateVariableFont``. Added ``--no-overlap`` cli option to disable this (#1518). -- [subset] Fixed subsetting ``VVAR`` table (#1516, #1517). +- [subset] Fixed subsetting ``VVAR`` table (#1516, #1517). Fixed subsetting an ``HVAR`` table that has an ``AdvanceWidthMap`` when the option ``--retain-gids`` is used. -- [feaLib] Added ``forceChained`` in MultipleSubstStatement (#1511). - Fixed double indentation of ``subtable`` statement (#1512). +- [feaLib] Added ``forceChained`` in MultipleSubstStatement (#1511). + Fixed double indentation of ``subtable`` statement (#1512). Added support for ``subtable`` statement in more places than just PairPos - lookups (#1520). + lookups (#1520). Handle lookupflag 0 and lookupflag without a value (#1540). - [varLib] In ``load_designspace``, provide a default English name for the ``ital`` axis tag. diff --git a/Snippets/compact_gpos.py b/Snippets/compact_gpos.py new file mode 100644 index 000000000..a5bd2f8b4 --- /dev/null +++ b/Snippets/compact_gpos.py @@ -0,0 +1,144 @@ +#! /usr/bin/env python3 + +""" +Sample script to use the otlLib.optimize.gpos functions to compact GPOS tables +of existing fonts. This script takes one or more TTF files as arguments and +will create compacted copies of the fonts using all available modes of the GPOS +compaction algorithm. For each copy, it will measure the new size of the GPOS +table and also the new size of the font in WOFF2 format. All results will be +printed to stdout in CSV format, so the savings provided by the algorithm in +each mode can be inspected. + +This was initially made to debug the algorithm but can also be used to choose +a mode value for a specific font (trade-off between bytes saved in TTF format +vs more bytes in WOFF2 format and more subtables). + +Run: + +python Snippets/compact_gpos.py MyFont.ttf > results.csv +""" + +import argparse +from collections import defaultdict +import csv +import time +import sys +from pathlib import Path +from typing import Any, Iterable, List, Optional, Sequence, Tuple + +from fontTools.ttLib import TTFont +from fontTools.otlLib.optimize import compact + +MODES = [str(c) for c in range(1, 10)] + + +def main(args: Optional[List[str]] = None): + parser = argparse.ArgumentParser() + parser.add_argument("fonts", type=Path, nargs="+", help="Path to TTFs.") + parsed_args = parser.parse_args(args) + + runtimes = defaultdict(list) + rows = [] + font_path: Path + for font_path in parsed_args.fonts: + font = TTFont(font_path) + if "GPOS" not in font: + print(f"No GPOS in {font_path.name}, skipping.", file=sys.stderr) + continue + size_orig = len(font.getTableData("GPOS")) / 1024 + print(f"Measuring {font_path.name}...", file=sys.stderr) + + fonts = {} + font_paths = {} + sizes = {} + for mode in MODES: + print(f" Running mode={mode}", file=sys.stderr) + fonts[mode] = TTFont(font_path) + before = time.perf_counter() + compact(fonts[mode], mode=str(mode)) + runtimes[mode].append(time.perf_counter() - before) + font_paths[mode] = ( + font_path.parent + / "compact" + / (font_path.stem + f"_{mode}" + font_path.suffix) + ) + font_paths[mode].parent.mkdir(parents=True, exist_ok=True) + fonts[mode].save(font_paths[mode]) + fonts[mode] = TTFont(font_paths[mode]) + sizes[mode] = len(fonts[mode].getTableData("GPOS")) / 1024 + + print(f" Runtimes:", file=sys.stderr) + for mode, times in runtimes.items(): + print( + f" {mode:10} {' '.join(f'{t:5.2f}' for t in times)}", + file=sys.stderr, + ) + + # Bonus: measure WOFF2 file sizes. + print(f" Measuring WOFF2 sizes", file=sys.stderr) + size_woff_orig = woff_size(font, font_path) / 1024 + sizes_woff = { + mode: woff_size(fonts[mode], font_paths[mode]) / 1024 for mode in MODES + } + + rows.append( + ( + font_path.name, + size_orig, + size_woff_orig, + *flatten( + ( + sizes[mode], + pct(sizes[mode], size_orig), + sizes_woff[mode], + pct(sizes_woff[mode], size_woff_orig), + ) + for mode in MODES + ), + ) + ) + + write_csv(rows) + + +def woff_size(font: TTFont, path: Path) -> int: + font.flavor = "woff2" + woff_path = path.with_suffix(".woff2") + font.save(woff_path) + return woff_path.stat().st_size + + +def write_csv(rows: List[Tuple[Any]]) -> None: + sys.stdout.reconfigure(encoding="utf-8") + sys.stdout.write("\uFEFF") + writer = csv.writer(sys.stdout, lineterminator="\n") + writer.writerow( + [ + "File", + "Original GPOS Size", + "Original WOFF2 Size", + *flatten( + ( + f"mode={mode}", + f"Change {mode}", + f"mode={mode} WOFF2 Size", + f"Change {mode} WOFF2 Size", + ) + for mode in MODES + ), + ] + ) + for row in rows: + writer.writerow(row) + + +def pct(new: float, old: float) -> float: + return -(1 - (new / old)) + + +def flatten(seq_seq: Iterable[Iterable[Any]]) -> List[Any]: + return [thing for seq in seq_seq for thing in seq] + + +if __name__ == "__main__": + main() diff --git a/Tests/otlLib/optimize_test.py b/Tests/otlLib/optimize_test.py new file mode 100644 index 000000000..40cf389e3 --- /dev/null +++ b/Tests/otlLib/optimize_test.py @@ -0,0 +1,175 @@ +import logging +from pathlib import Path +from subprocess import run +import contextlib +import os +from typing import List, Optional, Tuple +from fontTools.ttLib import TTFont + +import pytest + +from fontTools.feaLib.builder import addOpenTypeFeaturesFromString +from fontTools.fontBuilder import FontBuilder + +from fontTools.ttLib.tables.otBase import OTTableWriter, ValueRecord + + +def test_main(tmpdir: Path): + """Check that calling the main function on an input TTF works.""" + glyphs = ".notdef space A Aacute B D".split() + features = """ + @A = [A Aacute]; + @B = [B D]; + feature kern { + pos @A @B -50; + } kern; + """ + fb = FontBuilder(1000) + fb.setupGlyphOrder(glyphs) + addOpenTypeFeaturesFromString(fb.font, features) + input = tmpdir / "in.ttf" + fb.save(str(input)) + output = tmpdir / "out.ttf" + run( + [ + "fonttools", + "otlLib.optimize", + "--gpos-compact-mode", + "5", + str(input), + "-o", + str(output), + ], + check=True, + ) + assert output.exists() + + +# Copy-pasted from https://stackoverflow.com/questions/2059482/python-temporarily-modify-the-current-processs-environment +# TODO: remove when moving to the Config class +@contextlib.contextmanager +def set_env(**environ): + """ + Temporarily set the process environment variables. + + >>> with set_env(PLUGINS_DIR=u'test/plugins'): + ... "PLUGINS_DIR" in os.environ + True + + >>> "PLUGINS_DIR" in os.environ + False + + :type environ: dict[str, unicode] + :param environ: Environment variables to set + """ + old_environ = dict(os.environ) + os.environ.update(environ) + try: + yield + finally: + os.environ.clear() + os.environ.update(old_environ) + + +def count_pairpos_subtables(font: TTFont) -> int: + subtables = 0 + for lookup in font["GPOS"].table.LookupList.Lookup: + if lookup.LookupType == 2: + subtables += len(lookup.SubTable) + elif lookup.LookupType == 9: + for subtable in lookup.SubTable: + if subtable.ExtensionLookupType == 2: + subtables += 1 + return subtables + + +def count_pairpos_bytes(font: TTFont) -> int: + bytes = 0 + gpos = font["GPOS"] + for lookup in font["GPOS"].table.LookupList.Lookup: + if lookup.LookupType == 2: + w = OTTableWriter(tableTag=gpos.tableTag) + lookup.compile(w, font) + bytes += len(w.getAllData()) + elif lookup.LookupType == 9: + if any(subtable.ExtensionLookupType == 2 for subtable in lookup.SubTable): + w = OTTableWriter(tableTag=gpos.tableTag) + lookup.compile(w, font) + bytes += len(w.getAllData()) + return bytes + + +def get_kerning_by_blocks(blocks: List[Tuple[int, int]]) -> Tuple[List[str], str]: + """Generate a highly compressible font by generating a bunch of rectangular + blocks on the diagonal that can easily be sliced into subtables. + + Returns the list of glyphs and feature code of the font. + """ + value = 0 + glyphs: List[str] = [] + rules = [] + # Each block is like a script in a multi-script font + for script, (width, height) in enumerate(blocks): + glyphs.extend(f"g_{script}_{i}" for i in range(max(width, height))) + for l in range(height): + for r in range(width): + value += 1 + rules.append((f"g_{script}_{l}", f"g_{script}_{r}", value)) + classes = "\n".join([f"@{g} = [{g}];" for g in glyphs]) + statements = "\n".join([f"pos @{l} @{r} {v};" for (l, r, v) in rules]) + features = f""" + {classes} + feature kern {{ + {statements} + }} kern; + """ + return glyphs, features + + +@pytest.mark.parametrize( + ("blocks", "mode", "expected_subtables", "expected_bytes"), + [ + # Mode = 0 = no optimization leads to 650 bytes of GPOS + ([(15, 3), (2, 10)], None, 1, 602), + # Optimization level 1 recognizes the 2 blocks and splits into 2 + # subtables = adds 1 subtable leading to a size reduction of + # (602-298)/602 = 50% + ([(15, 3), (2, 10)], 1, 2, 298), + # On a bigger block configuration, we see that mode=5 doesn't create + # as many subtables as it could, because of the stop criteria + ([(4, 4) for _ in range(20)], 5, 14, 2042), + # while level=9 creates as many subtables as there were blocks on the + # diagonal and yields a better saving + ([(4, 4) for _ in range(20)], 9, 20, 1886), + # On a fully occupied kerning matrix, even the strategy 9 doesn't + # split anything. + ([(10, 10)], 9, 1, 304) + ], +) +def test_optimization_mode( + caplog, + blocks: List[Tuple[int, int]], + mode: Optional[int], + expected_subtables: int, + expected_bytes: int, +): + """Check that the optimizations are off by default, and that increasing + the optimization level creates more subtables and a smaller byte size. + """ + caplog.set_level(logging.DEBUG) + + glyphs, features = get_kerning_by_blocks(blocks) + glyphs = [".notdef space"] + glyphs + + env = {} + if mode is not None: + # NOTE: activating this optimization via the environment variable is + # experimental and may not be supported once an alternative mechanism + # is in place. See: https://github.com/fonttools/fonttools/issues/2349 + env["FONTTOOLS_GPOS_COMPACT_MODE"] = str(mode) + with set_env(**env): + fb = FontBuilder(1000) + fb.setupGlyphOrder(glyphs) + addOpenTypeFeaturesFromString(fb.font, features) + assert expected_subtables == count_pairpos_subtables(fb.font) + assert expected_bytes == count_pairpos_bytes(fb.font)