From bba5e8b2133d430c63c09e95eab23a1b534f08ae Mon Sep 17 00:00:00 2001
From: Jany Belluz <jany.belluz@daltonmaag.com>
Date: Fri, 28 May 2021 16:46:20 +0100
Subject: [PATCH 01/10] Add optional compaction of GPOS PairPos subtables in
 otlLib.optimize.gpos

See pull request: https://github.com/fonttools/fonttools/pull/2326

The new module `otlLib.optimize.gpos` provides `compact` functions that
can reduce the file size of GPOS PairPos lookups by splitting subtables
in a smart way to avoid storing zero-valued pairs.

The compaction methods are called from `otlLib.builder` and
`varLib.merger` so that static and variable fonts can benefit from the
optimization at compile time.

The new module `otlLib.optimize` is also executable, to allow running
the optimization on existing fonts.

The optimization is a trade-off because on the one hand it can reduce
significantly the byte size of the GPOS table (up to 50% in random
Google Fonts) but on the other hand it adds to the compilation time and
may make fonts very slightly bigger once compressed to WOFF2 (because
WOFF2 doesn't mind about zero values and compresses them very well).

As such, the optimization is off by default, and you can activate it by
providing the environment variable `FONTTOOLS_GPOS_COMPACT_MODE=5` for
example (values from 0 = off to 9 = max file size savings, but many more
subtables).
---
 Lib/fontTools/misc/intTools.py            |  15 +-
 Lib/fontTools/otlLib/builder.py           |  12 +-
 Lib/fontTools/otlLib/optimize/__init__.py |  66 ++++
 Lib/fontTools/otlLib/optimize/__main__.py |   6 +
 Lib/fontTools/otlLib/optimize/gpos.py     | 394 ++++++++++++++++++++++
 Lib/fontTools/varLib/merger.py            |  17 +-
 Snippets/compact_gpos.py                  | 124 +++++++
 Tests/otlLib/optimize_test.py             |  46 +++
 8 files changed, 676 insertions(+), 4 deletions(-)
 create mode 100644 Lib/fontTools/otlLib/optimize/__init__.py
 create mode 100644 Lib/fontTools/otlLib/optimize/__main__.py
 create mode 100644 Lib/fontTools/otlLib/optimize/gpos.py
 create mode 100644 Snippets/compact_gpos.py
 create mode 100644 Tests/otlLib/optimize_test.py

diff --git a/Lib/fontTools/misc/intTools.py b/Lib/fontTools/misc/intTools.py
index d31a75b1b..6ba03e163 100644
--- a/Lib/fontTools/misc/intTools.py
+++ b/Lib/fontTools/misc/intTools.py
@@ -1,14 +1,25 @@
-__all__ = ['popCount']
+__all__ = ["popCount"]
 
 
 try:
     bit_count = int.bit_count
 except AttributeError:
+
     def bit_count(v):
-        return bin(v).count('1')
+        return bin(v).count("1")
+
 
 """Return number of 1 bits (population count) of the absolute value of an integer.
 
 See https://docs.python.org/3.10/library/stdtypes.html#int.bit_count
 """
 popCount = bit_count
+
+
+def bit_indices(v):
+    """Return list of indices where bits are set, 0 being the index of the least significant bit.
+
+    >>> bit_indices(0b101)
+    [0, 2]
+    """
+    return [i for i, b in enumerate(bin(v)[::-1]) if b == "1"]
diff --git a/Lib/fontTools/otlLib/builder.py b/Lib/fontTools/otlLib/builder.py
index 182f7da6a..b58a5f880 100644
--- a/Lib/fontTools/otlLib/builder.py
+++ b/Lib/fontTools/otlLib/builder.py
@@ -1,4 +1,5 @@
 from collections import namedtuple, OrderedDict
+import os
 from fontTools.misc.fixedTools import fixedToFloat
 from fontTools import ttLib
 from fontTools.ttLib.tables import otTables as ot
@@ -10,6 +11,7 @@ from fontTools.ttLib.tables.otBase import (
 )
 from fontTools.ttLib.tables import otBase
 from fontTools.feaLib.ast import STATNameStatement
+from fontTools.otlLib.optimize.gpos import GPOS_COMPACT_MODE_DEFAULT, GPOS_COMPACT_MODE_ENV_KEY, compact_lookup
 from fontTools.otlLib.error import OpenTypeLibError
 from functools import reduce
 import logging
@@ -1373,7 +1375,15 @@ class PairPosBuilder(LookupBuilder):
             subtables.extend(buildPairPosGlyphs(self.glyphPairs, self.glyphMap))
         for key in sorted(builders.keys()):
             subtables.extend(builders[key].subtables())
-        return self.buildLookup_(subtables)
+        lookup = self.buildLookup_(subtables)
+
+        # Compact the lookup
+        mode = os.environ.get(GPOS_COMPACT_MODE_ENV_KEY, GPOS_COMPACT_MODE_DEFAULT)
+        if mode and mode != "0":
+            log.info("Compacting GPOS...")
+            compact_lookup(self.font, mode, lookup)
+
+        return lookup
 
 
 class SinglePosBuilder(LookupBuilder):
diff --git a/Lib/fontTools/otlLib/optimize/__init__.py b/Lib/fontTools/otlLib/optimize/__init__.py
new file mode 100644
index 000000000..22dcd9933
--- /dev/null
+++ b/Lib/fontTools/otlLib/optimize/__init__.py
@@ -0,0 +1,66 @@
+from argparse import RawTextHelpFormatter
+from textwrap import dedent
+
+from fontTools.ttLib import TTFont
+from fontTools.otlLib.optimize.gpos import compact, GPOS_COMPACT_MODE_DEFAULT
+
+def main(args=None):
+    """Optimize the layout tables of an existing font."""
+    from argparse import ArgumentParser
+    from fontTools import configLogger
+
+    parser = ArgumentParser(prog="otlLib.optimize", description=main.__doc__, formatter_class=RawTextHelpFormatter)
+    parser.add_argument("font")
+    parser.add_argument(
+        "-o", metavar="OUTPUTFILE", dest="outfile", default=None, help="output file"
+    )
+    parser.add_argument(
+        "--gpos-compact-mode",
+        help=dedent(
+            f"""\
+            GPOS Lookup type 2 (PairPos) compaction mode:
+                0 = do not attempt to compact PairPos lookups;
+                1 to 8 = create at most 1 to 8 new subtables for each existing
+                    subtable, provided that it would yield a 50%% file size saving;
+                9 = create as many new subtables as needed to yield a file size saving.
+            Default: {GPOS_COMPACT_MODE_DEFAULT}.
+
+            This compaction aims to save file size, by splitting large class
+            kerning subtables (Format 2) that contain many zero values into
+            smaller and denser subtables. It's a trade-off between the overhead
+            of several subtables versus the sparseness of one big subtable.
+
+            See the pull request: https://github.com/fonttools/fonttools/pull/2326
+            """
+        ),
+        default=int(GPOS_COMPACT_MODE_DEFAULT),
+        choices=list(range(10)),
+    )
+    logging_group = parser.add_mutually_exclusive_group(required=False)
+    logging_group.add_argument(
+        "-v", "--verbose", action="store_true", help="Run more verbosely."
+    )
+    logging_group.add_argument(
+        "-q", "--quiet", action="store_true", help="Turn verbosity off."
+    )
+    options = parser.parse_args(args)
+
+    configLogger(
+        level=("DEBUG" if options.verbose else "ERROR" if options.quiet else "INFO")
+    )
+
+    font = TTFont(options.font)
+    compact(font, options.gpos_compact_mode)
+    font.save(options.outfile or options.font)
+
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) > 1:
+        sys.exit(main())
+    import doctest
+
+    sys.exit(doctest.testmod().failed)
+
diff --git a/Lib/fontTools/otlLib/optimize/__main__.py b/Lib/fontTools/otlLib/optimize/__main__.py
new file mode 100644
index 000000000..03027ecd9
--- /dev/null
+++ b/Lib/fontTools/otlLib/optimize/__main__.py
@@ -0,0 +1,6 @@
+import sys
+from fontTools.otlLib.optimize import main
+
+
+if __name__ == '__main__':
+	sys.exit(main())
diff --git a/Lib/fontTools/otlLib/optimize/gpos.py b/Lib/fontTools/otlLib/optimize/gpos.py
new file mode 100644
index 000000000..6746233ed
--- /dev/null
+++ b/Lib/fontTools/otlLib/optimize/gpos.py
@@ -0,0 +1,394 @@
+import logging
+from collections import defaultdict
+from functools import reduce
+from itertools import chain
+from math import log2
+from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple
+
+from fontTools.misc.intTools import bit_count, bit_indices
+from fontTools.ttLib import TTFont
+from fontTools.ttLib.tables import otBase, otTables
+
+GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE"
+GPOS_COMPACT_MODE_DEFAULT = "0"
+
+log = logging.getLogger("fontTools.otlLib.optimize.gpos")
+
+
+def compact(font: TTFont, mode: str) -> TTFont:
+    # Plan:
+    #  1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable
+    #     https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable
+    #  2. Extract glyph-glyph kerning and class-kerning from all present subtables
+    #  3. Regroup into different subtable arrangements
+    #  4. Put back into the lookup
+    gpos = font["GPOS"]
+    for lookup in gpos.table.LookupList.Lookup:
+        if lookup.LookupType == 2:
+            compact_lookup(font, mode, lookup)
+        elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2:
+            compact_ext_lookup(font, mode, lookup)
+    return font
+
+
+def compact_lookup(font: TTFont, mode: str, lookup: otTables.Lookup) -> None:
+    new_subtables = compact_pair_pos(font, mode, lookup.SubTable)
+    lookup.SubTable = new_subtables
+    lookup.SubTableCount = len(new_subtables)
+
+
+def compact_ext_lookup(font: TTFont, mode: str, lookup: otTables.Lookup) -> None:
+    new_subtables = compact_pair_pos(
+        font, mode, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable]
+    )
+    new_ext_subtables = []
+    for subtable in new_subtables:
+        ext_subtable = otTables.ExtensionPos()
+        ext_subtable.Format = 1
+        ext_subtable.ExtSubTable = subtable
+        new_ext_subtables.append(ext_subtable)
+    lookup.SubTable = new_ext_subtables
+    lookup.SubTableCount = len(new_ext_subtables)
+
+
+def compact_pair_pos(
+    font: TTFont, mode: str, subtables: Sequence[otTables.PairPos]
+) -> Sequence[otTables.PairPos]:
+    new_subtables = []
+    for subtable in subtables:
+        if subtable.Format == 1:
+            # Not doing anything to Format 1 (yet?)
+            new_subtables.append(subtable)
+        elif subtable.Format == 2:
+            new_subtables.extend(compact_class_pairs(font, mode, subtable))
+    return new_subtables
+
+
+def compact_class_pairs(
+    font: TTFont, mode: str, subtable: otTables.PairPos
+) -> List[otTables.PairPos]:
+    from fontTools.otlLib.builder import buildPairPosClassesSubtable
+
+    subtables = []
+    classes1: DefaultDict[int, List[str]] = defaultdict(list)
+    for g in subtable.Coverage.glyphs:
+        classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g)
+    classes2: DefaultDict[int, List[str]] = defaultdict(list)
+    for g, i in subtable.ClassDef2.classDefs.items():
+        classes2[i].append(g)
+    all_pairs = {}
+    for i, class1 in enumerate(subtable.Class1Record):
+        for j, class2 in enumerate(class1.Class2Record):
+            if is_really_zero(class2):
+                continue
+            all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = (
+                getattr(class2, "Value1", None),
+                getattr(class2, "Value2", None),
+            )
+
+    if len(mode) == 1 and mode in "123456789":
+        grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost(
+            font, all_pairs, int(mode)
+        )
+        for pairs in grouped_pairs:
+            subtables.append(
+                buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap())
+            )
+    else:
+        raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={mode}")
+    return subtables
+
+
+def is_really_zero(class2: otTables.Class2Record) -> bool:
+    v1 = getattr(class2, "Value1", None)
+    v2 = getattr(class2, "Value2", None)
+    return (v1 is None or v1.getEffectiveFormat() == 0) and (
+        v2 is None or v2.getEffectiveFormat() == 0
+    )
+
+
+Pairs = Dict[
+    Tuple[Tuple[str, ...], Tuple[str, ...]],
+    Tuple[otBase.ValueRecord, otBase.ValueRecord],
+]
+
+# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958
+def _getClassRanges(glyphIDs: Iterable[int]):
+    glyphIDs = sorted(glyphIDs)
+    last = glyphIDs[0]
+    ranges = [[last]]
+    for glyphID in glyphIDs[1:]:
+        if glyphID != last + 1:
+            ranges[-1].append(last)
+            ranges.append([glyphID])
+        last = glyphID
+    ranges[-1].append(last)
+    return ranges, glyphIDs[0], glyphIDs[-1]
+
+
+# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989
+def _classDef_bytes(
+    class_data: List[Tuple[List[Tuple[int, int]], int, int]], class_ids: List[int], coverage=False
+):
+    if not class_ids:
+        return 0
+    first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]]
+    range_count = len(first_ranges)
+    for i in class_ids[1:]:
+        data = class_data[i]
+        range_count += len(data[0])
+        min_glyph_id = min(min_glyph_id, data[1])
+        max_glyph_id = max(max_glyph_id, data[2])
+    glyphCount = max_glyph_id - min_glyph_id + 1
+    # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1
+    format1_bytes = 6 + glyphCount * 2
+    # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2
+    format2_bytes = 4 + range_count * 6
+    return min(format1_bytes, format2_bytes)
+
+
+def cluster_pairs_by_class2_coverage_custom_cost(
+    font: TTFont,
+    pairs: Pairs,
+    compression: int = 5,
+) -> List[Pairs]:
+    # Sorted for reproducibility/determinism
+    all_class1 = sorted(set(pair[0] for pair in pairs))
+    all_class2 = sorted(set(pair[1] for pair in pairs))
+
+    # Use Python's big ints for binary vectors representing each line
+    lines = [
+        sum(
+            1 << i if (class1, class2) in pairs else 0
+            for i, class2 in enumerate(all_class2)
+        )
+        for class1 in all_class1
+    ]
+
+    # Map glyph names to ids and work with ints throughout for ClassDef formats
+    name_to_id = font.getReverseGlyphMap()
+    # Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id)
+    all_class1_data = [
+        _getClassRanges(name_to_id[name] for name in cls) for cls in all_class1
+    ]
+    all_class2_data = [
+        _getClassRanges(name_to_id[name] for name in cls) for cls in all_class2
+    ]
+
+    format1 = 0
+    format2 = 0
+    for pair, value in pairs.items():
+        format1 |= value[0].getEffectiveFormat() if value[0] else 0
+        format2 |= value[1].getEffectiveFormat() if value[1] else 0
+    valueFormat1_bytes = bit_count(format1) * 2
+    valueFormat2_bytes = bit_count(format2) * 2
+
+    # Agglomerative clustering by hand, checking the cost gain of the new
+    # cluster against the previously separate clusters
+    # Start with 1 cluster per line
+    # cluster = set of lines = new subtable
+    # The class is here so it has a closure over the data above (lines, etc.)
+    class Cluster:
+        # TODO(Python 3.7): Turn this into a dataclass
+        # indices: int
+        # Caches
+        # TODO(Python 3.8): use functools.cached_property instead of the
+        # manually cached properties, and remove the cache fields listed below.
+        # _indices: Optional[List[int]] = None
+        # _column_indices: Optional[List[int]] = None
+        # _cost: Optional[int] = None
+
+        __slots__ = "indices_bitmask", "_indices", "_column_indices", "_cost"
+
+        def __init__(self, indices_bitmask: int):
+            self.indices_bitmask = indices_bitmask
+            self._indices = None
+            self._column_indices = None
+            self._cost = None
+
+        @property
+        def indices(self):
+            if self._indices is None:
+                self._indices = bit_indices(self.indices_bitmask)
+            return self._indices
+
+        @property
+        def column_indices(self):
+            if self._column_indices is None:
+                # Indices of columns that have a 1 in at least 1 line
+                #   => binary OR all the lines
+                bitmask = reduce(int.__or__, (lines[i] for i in self.indices))
+                self._column_indices = bit_indices(bitmask)
+            return self._column_indices
+
+        @property
+        def width(self):
+            # Add 1 because Class2=0 cannot be used but needs to be encoded.
+            return len(self.column_indices) + 1
+
+        @property
+        def cost(self):
+            if self._cost is None:
+                # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
+                self._cost = (
+                    # uint16	posFormat	Format identifier: format = 2
+                    2
+                    # Offset16	coverageOffset	Offset to Coverage table, from beginning of PairPos subtable.
+                    + 2
+                    + self.coverage_bytes
+                    # uint16	valueFormat1	ValueRecord definition — for the first glyph of the pair (may be zero).
+                    + 2
+                    # uint16	valueFormat2	ValueRecord definition — for the second glyph of the pair (may be zero).
+                    + 2
+                    # Offset16	classDef1Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair.
+                    + 2
+                    + self.classDef1_bytes
+                    # Offset16	classDef2Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair.
+                    + 2
+                    + self.classDef2_bytes
+                    # uint16	class1Count	Number of classes in classDef1 table — includes Class 0.
+                    + 2
+                    # uint16	class2Count	Number of classes in classDef2 table — includes Class 0.
+                    + 2
+                    # Class1Record	class1Records[class1Count]	Array of Class1 records, ordered by classes in classDef1.
+                    + (valueFormat1_bytes + valueFormat2_bytes)
+                    * len(self.indices)
+                    * self.width
+                )
+            return self._cost
+
+        @property
+        def coverage_bytes(self):
+            format1_bytes = (
+                # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1
+                # uint16	coverageFormat	Format identifier — format = 1
+                # uint16	glyphCount	Number of glyphs in the glyph array
+                4
+                # uint16	glyphArray[glyphCount]	Array of glyph IDs — in numerical order
+                + sum(len(all_class1[i]) for i in self.indices) * 2
+            )
+            ranges = sorted(chain.from_iterable(all_class1_data[i][0] for i in self.indices))
+            merged_range_count = 0
+            last = None
+            for (start, end) in ranges:
+                if last is not None and start != last + 1:
+                    merged_range_count += 1
+                last = end
+            format2_bytes = (
+                # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2
+                # uint16	coverageFormat	Format identifier — format = 2
+                # uint16	rangeCount	Number of RangeRecords
+                4
+                # RangeRecord	rangeRecords[rangeCount]	Array of glyph ranges — ordered by startGlyphID.
+                # uint16	startGlyphID	First glyph ID in the range
+                # uint16	endGlyphID	Last glyph ID in the range
+                # uint16	startCoverageIndex	Coverage Index of first glyph ID in range
+                + merged_range_count * 6
+            )
+            return min(format1_bytes, format2_bytes)
+
+        @property
+        def classDef1_bytes(self):
+            # We can skip encoding one of the Class1 definitions, and use
+            # Class1=0 to represent it instead, because Class1 is gated by the
+            # Coverage definition. Use Class1=0 for the highest byte savings.
+            # Going through all options takes too long, pick the biggest class
+            # = what happens in otlLib.builder.ClassDefBuilder.classes()
+            biggest_index = max(self.indices, key=lambda i: len(all_class1[i]))
+            return _classDef_bytes(
+                all_class1_data, [i for i in self.indices if i != biggest_index]
+            )
+
+        @property
+        def classDef2_bytes(self):
+            # All Class2 need to be encoded because we can't use Class2=0
+            return _classDef_bytes(all_class2_data, self.column_indices)
+
+        def merge(self, other: "Cluster") -> "Cluster":
+            return make_cluster(self.indices_bitmask | other.indices_bitmask)
+
+    cluster_cache: Dict[int, Cluster] = {}
+
+    def make_cluster(indices: int) -> Cluster:
+        cluster = cluster_cache.get(indices, None)
+        if cluster is not None:
+            return cluster
+        cluster = Cluster(indices)
+        cluster_cache[indices] = cluster
+        return cluster
+
+    clusters = [make_cluster(1 << i) for i in range(len(lines))]
+
+    # Cost of 1 cluster with everything
+    # `(1 << len) - 1` gives a bitmask full of 1's of length `len`
+    cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost
+    log.debug(f"        len(clusters) = {len(clusters)}")
+
+    while len(clusters) > 1:
+        lowest_cost_change = None
+        best_cluster_index = None
+        best_other_index = None
+        best_merged = None
+        for i, cluster in enumerate(clusters):
+            for j, other in enumerate(clusters[i + 1 :]):
+                merged = cluster.merge(other)
+                cost_change = merged.cost - cluster.cost - other.cost
+                if lowest_cost_change is None or cost_change < lowest_cost_change:
+                    lowest_cost_change = cost_change
+                    best_cluster_index = i
+                    best_other_index = i + 1 + j
+                    best_merged = merged
+        assert lowest_cost_change is not None
+        assert best_cluster_index is not None
+        assert best_other_index is not None
+        assert best_merged is not None
+
+        # If the best merge we found is still taking down the file size, then
+        # there's no question: we must do it, because it's beneficial in both
+        # ways (lower file size and lower number of subtables).  However, if the
+        # best merge we found is not reducing file size anymore, then we need to
+        # look at the other stop criteria = the compression factor.
+        if lowest_cost_change > 0:
+            # Stop critera: check whether we should keep merging.
+            # Compute size reduction brought by splitting
+            cost_after_splitting = sum(c.cost for c in clusters)
+            # size_reduction so that after = before * (1 - size_reduction)
+            # E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2
+            size_reduction = 1 - cost_after_splitting / cost_before_splitting
+
+            # Force more merging by taking into account the compression number.
+            # Target behaviour: compression number = 1 to 9, default 5 like gzip
+            #   - 1 = accept to add 1 subtable to reduce size by 50%
+            #   - 5 = accept to add 5 subtables to reduce size by 50%
+            # See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691
+            # Given the size reduction we have achieved so far, compute how many
+            # new subtables are acceptable.
+            max_new_subtables = -log2(1 - size_reduction) * compression
+            log.debug(
+                f"            len(clusters) = {len(clusters):3d}    size_reduction={size_reduction:5.2f}    max_new_subtables={max_new_subtables}",
+            )
+            if compression == 9:
+                # Override level 9 to mean: create any number of subtables
+                max_new_subtables = len(clusters)
+
+            # If we have managed to take the number of new subtables below the
+            # threshold, then we can stop.
+            if len(clusters) <= max_new_subtables + 1:
+                break
+
+        # No reason to stop yet, do the merge and move on to the next.
+        del clusters[best_other_index]
+        clusters[best_cluster_index] = best_merged
+
+    # All clusters are in done; turn bitmasks back into the "Pairs" format
+    pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict)
+    for pair, values in pairs.items():
+        pairs_by_class1[pair[0]][pair] = values
+    pairs_groups: List[Pairs] = []
+    for cluster in clusters:
+        pairs_group: Pairs = dict()
+        for i in cluster.indices:
+            class1 = all_class1[i]
+            pairs_group.update(pairs_by_class1[class1])
+        pairs_groups.append(pairs_group)
+    return pairs_groups
diff --git a/Lib/fontTools/varLib/merger.py b/Lib/fontTools/varLib/merger.py
index 888b52c26..4c60eba5c 100644
--- a/Lib/fontTools/varLib/merger.py
+++ b/Lib/fontTools/varLib/merger.py
@@ -1,8 +1,10 @@
 """
 Merge OpenType Layout tables (GDEF / GPOS / GSUB).
 """
+import os
 import copy
 from operator import ior
+import logging
 from fontTools.misc import classifyTools
 from fontTools.misc.roundTools import otRound
 from fontTools.ttLib.tables import otTables as ot
@@ -13,6 +15,13 @@ from fontTools.varLib.models import nonNone, allNone, allEqual, allEqualTo
 from fontTools.varLib.varStore import VarStoreInstancer
 from functools import reduce
 from fontTools.otlLib.builder import buildSinglePos
+from fontTools.otlLib.optimize.gpos import (
+    compact_pair_pos,
+    GPOS_COMPACT_MODE_DEFAULT,
+    GPOS_COMPACT_MODE_ENV_KEY,
+)
+
+log = logging.getLogger("fontTools.varLib.merger")
 
 from .errors import (
     ShouldBeConstant,
@@ -837,6 +846,13 @@ def merge(merger, self, lst):
 			self.SubTable.pop(-1)
 			self.SubTableCount -= 1
 
+		# Compact the merged subtables
+		mode = os.environ.get(GPOS_COMPACT_MODE_ENV_KEY, GPOS_COMPACT_MODE_DEFAULT)
+		if mode and mode != "0":
+			log.info("Compacting GPOS...")
+			self.SubTable = compact_pair_pos(merger.font, mode, self.SubTable)
+			self.SubTableCount = len(self.SubTable)
+
 	elif isSinglePos and flattened:
 		singlePosTable = self.SubTable[0]
 		glyphs = singlePosTable.Coverage.glyphs
@@ -851,7 +867,6 @@ def merge(merger, self, lst):
 
 	del merger.lookup_subtables
 
-
 #
 # InstancerMerger
 #
diff --git a/Snippets/compact_gpos.py b/Snippets/compact_gpos.py
new file mode 100644
index 000000000..04d60d587
--- /dev/null
+++ b/Snippets/compact_gpos.py
@@ -0,0 +1,124 @@
+import argparse
+from collections import defaultdict
+import csv
+import time
+import sys
+from pathlib import Path
+from typing import Any, Iterable, List, Optional, Sequence, Tuple
+
+from fontTools.ttLib import TTFont
+from fontTools.otlLib.optimize import compact
+
+MODES = [str(c) for c in range(1, 10)]
+
+
+def main(args: Optional[List[str]] = None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument("fonts", type=Path, nargs="+", help="Path to TTFs.")
+    parsed_args = parser.parse_args(args)
+
+    runtimes = defaultdict(list)
+    rows = []
+    font_path: Path
+    for font_path in parsed_args.fonts:
+        font = TTFont(font_path)
+        if "GPOS" not in font:
+            print(f"No GPOS in {font_path.name}, skipping.", file=sys.stderr)
+            continue
+        size_orig = len(font.getTableData("GPOS")) / 1024
+        print(f"Measuring {font_path.name}...", file=sys.stderr)
+
+        fonts = {}
+        font_paths = {}
+        sizes = {}
+        for mode in MODES:
+            print(f"    Running mode={mode}", file=sys.stderr)
+            fonts[mode] = TTFont(font_path)
+            before = time.perf_counter()
+            compact(fonts[mode], mode=str(mode))
+            runtimes[mode].append(time.perf_counter() - before)
+            font_paths[mode] = (
+                font_path.parent
+                / "compact"
+                / (font_path.stem + f"_{mode}" + font_path.suffix)
+            )
+            font_paths[mode].parent.mkdir(parents=True, exist_ok=True)
+            fonts[mode].save(font_paths[mode])
+            fonts[mode] = TTFont(font_paths[mode])
+            sizes[mode] = len(fonts[mode].getTableData("GPOS")) / 1024
+
+        print(f"    Runtimes:", file=sys.stderr)
+        for mode, times in runtimes.items():
+            print(
+                f"        {mode:10} {' '.join(f'{t:5.2f}' for t in times)}",
+                file=sys.stderr,
+            )
+
+        # Bonus: measure WOFF2 file sizes.
+        print(f"    Measuring WOFF2 sizes", file=sys.stderr)
+        size_woff_orig = woff_size(font, font_path) / 1024
+        sizes_woff = {
+            mode: woff_size(fonts[mode], font_paths[mode]) / 1024 for mode in MODES
+        }
+
+        rows.append(
+            (
+                font_path.name,
+                size_orig,
+                size_woff_orig,
+                *flatten(
+                    (
+                        sizes[mode],
+                        pct(sizes[mode], size_orig),
+                        sizes_woff[mode],
+                        pct(sizes_woff[mode], size_woff_orig),
+                    )
+                    for mode in MODES
+                ),
+            )
+        )
+
+    write_csv(rows)
+
+
+def woff_size(font: TTFont, path: Path) -> int:
+    font.flavor = "woff2"
+    woff_path = path.with_suffix(".woff2")
+    font.save(woff_path)
+    return woff_path.stat().st_size
+
+
+def write_csv(rows: List[Tuple[Any]]) -> None:
+    sys.stdout.reconfigure(encoding="utf-8")
+    sys.stdout.write("\uFEFF")
+    writer = csv.writer(sys.stdout, lineterminator="\n")
+    writer.writerow(
+        [
+            "File",
+            "Original GPOS Size",
+            "Original WOFF2 Size",
+            *flatten(
+                (
+                    f"mode={mode}",
+                    f"Change {mode}",
+                    f"mode={mode} WOFF2 Size",
+                    f"Change {mode} WOFF2 Size",
+                )
+                for mode in MODES
+            ),
+        ]
+    )
+    for row in rows:
+        writer.writerow(row)
+
+
+def pct(new: float, old: float) -> float:
+    return -(1 - (new / old))
+
+
+def flatten(seq_seq: Iterable[Iterable[Any]]) -> List[Any]:
+    return [thing for seq in seq_seq for thing in seq]
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tests/otlLib/optimize_test.py b/Tests/otlLib/optimize_test.py
new file mode 100644
index 000000000..eeb69199b
--- /dev/null
+++ b/Tests/otlLib/optimize_test.py
@@ -0,0 +1,46 @@
+from pathlib import Path
+from subprocess import run
+
+from fontTools.ttLib import TTFont, newTable
+from fontTools.feaLib.builder import addOpenTypeFeaturesFromString
+
+
+def test_main(tmpdir: Path):
+    """Check that calling the main function on an input TTF works."""
+    glyphs = ".notdef space A B C a b c".split()
+    features = """
+    lookup GPOS_EXT useExtension {
+        pos a b -10;
+    } GPOS_EXT;
+
+    feature kern {
+        pos A 20;
+        pos A B -50;
+        pos A B' 10 C;
+        lookup GPOS_EXT;
+    } kern;
+    """
+    font = TTFont()
+    font.setGlyphOrder(glyphs)
+    addOpenTypeFeaturesFromString(font, features)
+    font["maxp"] = maxp = newTable("maxp")
+    maxp.tableVersion = 0x00010000
+    maxp.maxZones = 1
+    maxp.maxTwilightPoints = 0
+    maxp.maxStorage = 0
+    maxp.maxFunctionDefs = 0
+    maxp.maxInstructionDefs = 0
+    maxp.maxStackElements = 0
+    maxp.maxSizeOfInstructions = 0
+    maxp.maxComponentElements = 0
+    maxp.maxPoints = 0
+    maxp.maxContours = 0
+    maxp.maxCompositePoints = 0
+    maxp.maxCompositeContours = 0
+    maxp.maxComponentDepth = 0
+    maxp.compile(font)
+    input = tmpdir / "in.ttf"
+    font.save(str(input))
+    output = tmpdir / "out.ttf"
+    run(["fonttools", "otlLib.optimize", str(input), "-o", str(output)], check=True)
+    assert output.exists()

From 604513a38f10736f2531633023744b0e60f7746c Mon Sep 17 00:00:00 2001
From: Jany Belluz <jany.belluz@daltonmaag.com>
Date: Wed, 30 Jun 2021 12:06:09 +0100
Subject: [PATCH 02/10] Fix a few comments

---
 Lib/fontTools/otlLib/builder.py       | 2 ++
 Lib/fontTools/otlLib/optimize/gpos.py | 2 +-
 Lib/fontTools/varLib/merger.py        | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/Lib/fontTools/otlLib/builder.py b/Lib/fontTools/otlLib/builder.py
index b58a5f880..bfb9d41f8 100644
--- a/Lib/fontTools/otlLib/builder.py
+++ b/Lib/fontTools/otlLib/builder.py
@@ -1378,6 +1378,8 @@ class PairPosBuilder(LookupBuilder):
         lookup = self.buildLookup_(subtables)
 
         # Compact the lookup
+        # This is a good moment to do it because the compaction should create
+        # smaller subtables, which may prevent overflows from happening.
         mode = os.environ.get(GPOS_COMPACT_MODE_ENV_KEY, GPOS_COMPACT_MODE_DEFAULT)
         if mode and mode != "0":
             log.info("Compacting GPOS...")
diff --git a/Lib/fontTools/otlLib/optimize/gpos.py b/Lib/fontTools/otlLib/optimize/gpos.py
index 6746233ed..1f6644263 100644
--- a/Lib/fontTools/otlLib/optimize/gpos.py
+++ b/Lib/fontTools/otlLib/optimize/gpos.py
@@ -380,7 +380,7 @@ def cluster_pairs_by_class2_coverage_custom_cost(
         del clusters[best_other_index]
         clusters[best_cluster_index] = best_merged
 
-    # All clusters are in done; turn bitmasks back into the "Pairs" format
+    # All clusters are final; turn bitmasks back into the "Pairs" format
     pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict)
     for pair, values in pairs.items():
         pairs_by_class1[pair[0]][pair] = values
diff --git a/Lib/fontTools/varLib/merger.py b/Lib/fontTools/varLib/merger.py
index 4c60eba5c..aaf2a5130 100644
--- a/Lib/fontTools/varLib/merger.py
+++ b/Lib/fontTools/varLib/merger.py
@@ -847,6 +847,8 @@ def merge(merger, self, lst):
 			self.SubTableCount -= 1
 
 		# Compact the merged subtables
+		# This is a good moment to do it because the compaction should create
+		# smaller subtables, which may prevent overflows from happening.
 		mode = os.environ.get(GPOS_COMPACT_MODE_ENV_KEY, GPOS_COMPACT_MODE_DEFAULT)
 		if mode and mode != "0":
 			log.info("Compacting GPOS...")

From 3b34b228dd317210ced684d40b0779dbeec9a8c4 Mon Sep 17 00:00:00 2001
From: Jany Belluz <jany.belluz@daltonmaag.com>
Date: Wed, 30 Jun 2021 12:37:12 +0100
Subject: [PATCH 03/10] Fix fonttools otlLib.optimize command line

---
 Lib/fontTools/otlLib/optimize/__init__.py |  1 +
 Tests/otlLib/optimize_test.py             | 48 +++++++++--------------
 2 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/Lib/fontTools/otlLib/optimize/__init__.py b/Lib/fontTools/otlLib/optimize/__init__.py
index 22dcd9933..d39bb28d1 100644
--- a/Lib/fontTools/otlLib/optimize/__init__.py
+++ b/Lib/fontTools/otlLib/optimize/__init__.py
@@ -35,6 +35,7 @@ def main(args=None):
         ),
         default=int(GPOS_COMPACT_MODE_DEFAULT),
         choices=list(range(10)),
+        type=int,
     )
     logging_group = parser.add_mutually_exclusive_group(required=False)
     logging_group.add_argument(
diff --git a/Tests/otlLib/optimize_test.py b/Tests/otlLib/optimize_test.py
index eeb69199b..c2f2468eb 100644
--- a/Tests/otlLib/optimize_test.py
+++ b/Tests/otlLib/optimize_test.py
@@ -1,46 +1,34 @@
 from pathlib import Path
 from subprocess import run
 
-from fontTools.ttLib import TTFont, newTable
 from fontTools.feaLib.builder import addOpenTypeFeaturesFromString
+from fontTools.fontBuilder import FontBuilder
 
 
 def test_main(tmpdir: Path):
     """Check that calling the main function on an input TTF works."""
-    glyphs = ".notdef space A B C a b c".split()
+    glyphs = ".notdef space A B".split()
     features = """
-    lookup GPOS_EXT useExtension {
-        pos a b -10;
-    } GPOS_EXT;
-
     feature kern {
-        pos A 20;
         pos A B -50;
-        pos A B' 10 C;
-        lookup GPOS_EXT;
     } kern;
     """
-    font = TTFont()
-    font.setGlyphOrder(glyphs)
-    addOpenTypeFeaturesFromString(font, features)
-    font["maxp"] = maxp = newTable("maxp")
-    maxp.tableVersion = 0x00010000
-    maxp.maxZones = 1
-    maxp.maxTwilightPoints = 0
-    maxp.maxStorage = 0
-    maxp.maxFunctionDefs = 0
-    maxp.maxInstructionDefs = 0
-    maxp.maxStackElements = 0
-    maxp.maxSizeOfInstructions = 0
-    maxp.maxComponentElements = 0
-    maxp.maxPoints = 0
-    maxp.maxContours = 0
-    maxp.maxCompositePoints = 0
-    maxp.maxCompositeContours = 0
-    maxp.maxComponentDepth = 0
-    maxp.compile(font)
+    fb = FontBuilder(1000)
+    fb.setupGlyphOrder(glyphs)
+    addOpenTypeFeaturesFromString(fb.font, features)
     input = tmpdir / "in.ttf"
-    font.save(str(input))
+    fb.save(str(input))
     output = tmpdir / "out.ttf"
-    run(["fonttools", "otlLib.optimize", str(input), "-o", str(output)], check=True)
+    run(
+        [
+            "fonttools",
+            "otlLib.optimize",
+            "--gpos-compact-mode",
+            "5",
+            str(input),
+            "-o",
+            str(output),
+        ],
+        check=True,
+    )
     assert output.exists()

From ef67839fdbb336ee16323ba7a6ce032d55c3786b Mon Sep 17 00:00:00 2001
From: Jany Belluz <jany.belluz@daltonmaag.com>
Date: Wed, 30 Jun 2021 14:04:13 +0100
Subject: [PATCH 04/10] Fix typing error

---
 Lib/fontTools/otlLib/optimize/__init__.py |  3 ++-
 Tests/otlLib/optimize_test.py             | 24 +++++++++++++++++++++--
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/Lib/fontTools/otlLib/optimize/__init__.py b/Lib/fontTools/otlLib/optimize/__init__.py
index d39bb28d1..5c007e891 100644
--- a/Lib/fontTools/otlLib/optimize/__init__.py
+++ b/Lib/fontTools/otlLib/optimize/__init__.py
@@ -51,7 +51,8 @@ def main(args=None):
     )
 
     font = TTFont(options.font)
-    compact(font, options.gpos_compact_mode)
+    # TODO: switch everything to have type(mode) = int when using the Config class
+    compact(font, str(options.gpos_compact_mode))
     font.save(options.outfile or options.font)
 
 
diff --git a/Tests/otlLib/optimize_test.py b/Tests/otlLib/optimize_test.py
index c2f2468eb..4201aed09 100644
--- a/Tests/otlLib/optimize_test.py
+++ b/Tests/otlLib/optimize_test.py
@@ -7,10 +7,12 @@ from fontTools.fontBuilder import FontBuilder
 
 def test_main(tmpdir: Path):
     """Check that calling the main function on an input TTF works."""
-    glyphs = ".notdef space A B".split()
+    glyphs = ".notdef space A Aacute B D".split()
     features = """
+    @A = [A Aacute];
+    @B = [B D];
     feature kern {
-        pos A B -50;
+        pos @A @B -50;
     } kern;
     """
     fb = FontBuilder(1000)
@@ -32,3 +34,21 @@ def test_main(tmpdir: Path):
         check=True,
     )
     assert output.exists()
+
+
+def test_off_by_default(tmpdir: Path):
+    """Check that calling the main function on an input TTF works."""
+    glyphs = ".notdef space A B".split()
+    features = """
+    feature kern {
+        pos A B -50;
+    } kern;
+    """
+    fb = FontBuilder(1000)
+    fb.setupGlyphOrder(glyphs)
+    addOpenTypeFeaturesFromString(fb.font, features)
+    input = tmpdir / "in.ttf"
+    fb.save(str(input))
+    output = tmpdir / "out.ttf"
+    run(["fonttools", "otlLib.optimize", str(input), "-o", str(output)], check=True)
+    assert output.exists()

From 7860dd5fe88e9c35a2b49393ddba8a67c6cbc53b Mon Sep 17 00:00:00 2001
From: Jany Belluz <jany.belluz@daltonmaag.com>
Date: Wed, 30 Jun 2021 18:52:57 +0100
Subject: [PATCH 05/10] Add tests that optimize block matrices

---
 Lib/fontTools/otlLib/optimize/gpos.py |  19 +++-
 Tests/otlLib/optimize_test.py         | 148 +++++++++++++++++++++++---
 2 files changed, 148 insertions(+), 19 deletions(-)

diff --git a/Lib/fontTools/otlLib/optimize/gpos.py b/Lib/fontTools/otlLib/optimize/gpos.py
index 1f6644263..026744941 100644
--- a/Lib/fontTools/otlLib/optimize/gpos.py
+++ b/Lib/fontTools/otlLib/optimize/gpos.py
@@ -128,7 +128,9 @@ def _getClassRanges(glyphIDs: Iterable[int]):
 
 # Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989
 def _classDef_bytes(
-    class_data: List[Tuple[List[Tuple[int, int]], int, int]], class_ids: List[int], coverage=False
+    class_data: List[Tuple[List[Tuple[int, int]], int, int]],
+    class_ids: List[int],
+    coverage=False,
 ):
     if not class_ids:
         return 0
@@ -152,6 +154,10 @@ def cluster_pairs_by_class2_coverage_custom_cost(
     pairs: Pairs,
     compression: int = 5,
 ) -> List[Pairs]:
+    if not pairs:
+        # The subtable was actually empty?
+        return [pairs]
+
     # Sorted for reproducibility/determinism
     all_class1 = sorted(set(pair[0] for pair in pairs))
     all_class2 = sorted(set(pair[1] for pair in pairs))
@@ -229,10 +235,13 @@ def cluster_pairs_by_class2_coverage_custom_cost(
         @property
         def cost(self):
             if self._cost is None:
-                # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
                 self._cost = (
-                    # uint16	posFormat	Format identifier: format = 2
+                    # 2 bytes to store the offset to this subtable in the Lookup table above
                     2
+                    # Contents of the subtable
+                    # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
+                    # uint16	posFormat	Format identifier: format = 2
+                    + 2
                     # Offset16	coverageOffset	Offset to Coverage table, from beginning of PairPos subtable.
                     + 2
                     + self.coverage_bytes
@@ -267,7 +276,9 @@ def cluster_pairs_by_class2_coverage_custom_cost(
                 # uint16	glyphArray[glyphCount]	Array of glyph IDs — in numerical order
                 + sum(len(all_class1[i]) for i in self.indices) * 2
             )
-            ranges = sorted(chain.from_iterable(all_class1_data[i][0] for i in self.indices))
+            ranges = sorted(
+                chain.from_iterable(all_class1_data[i][0] for i in self.indices)
+            )
             merged_range_count = 0
             last = None
             for (start, end) in ranges:
diff --git a/Tests/otlLib/optimize_test.py b/Tests/otlLib/optimize_test.py
index 4201aed09..db1cac5c3 100644
--- a/Tests/otlLib/optimize_test.py
+++ b/Tests/otlLib/optimize_test.py
@@ -1,9 +1,18 @@
+import logging
 from pathlib import Path
 from subprocess import run
+import contextlib
+import os
+from typing import List, Optional, Tuple
+from fontTools.ttLib import TTFont
+
+import pytest
 
 from fontTools.feaLib.builder import addOpenTypeFeaturesFromString
 from fontTools.fontBuilder import FontBuilder
 
+from fontTools.ttLib.tables.otBase import OTTableWriter, ValueRecord
+
 
 def test_main(tmpdir: Path):
     """Check that calling the main function on an input TTF works."""
@@ -36,19 +45,128 @@ def test_main(tmpdir: Path):
     assert output.exists()
 
 
-def test_off_by_default(tmpdir: Path):
-    """Check that calling the main function on an input TTF works."""
-    glyphs = ".notdef space A B".split()
-    features = """
-    feature kern {
-        pos A B -50;
-    } kern;
+# Copy-pasted from https://stackoverflow.com/questions/2059482/python-temporarily-modify-the-current-processs-environment
+# TODO: remove when moving to the Config class
+@contextlib.contextmanager
+def set_env(**environ):
     """
-    fb = FontBuilder(1000)
-    fb.setupGlyphOrder(glyphs)
-    addOpenTypeFeaturesFromString(fb.font, features)
-    input = tmpdir / "in.ttf"
-    fb.save(str(input))
-    output = tmpdir / "out.ttf"
-    run(["fonttools", "otlLib.optimize", str(input), "-o", str(output)], check=True)
-    assert output.exists()
+    Temporarily set the process environment variables.
+
+    >>> with set_env(PLUGINS_DIR=u'test/plugins'):
+    ...   "PLUGINS_DIR" in os.environ
+    True
+
+    >>> "PLUGINS_DIR" in os.environ
+    False
+
+    :type environ: dict[str, unicode]
+    :param environ: Environment variables to set
+    """
+    old_environ = dict(os.environ)
+    os.environ.update(environ)
+    try:
+        yield
+    finally:
+        os.environ.clear()
+        os.environ.update(old_environ)
+
+
+def count_pairpos_subtables(font: TTFont) -> int:
+    subtables = 0
+    for lookup in font["GPOS"].table.LookupList.Lookup:
+        if lookup.LookupType == 2:
+            subtables += len(lookup.SubTable)
+        elif lookup.LookupType == 9:
+            for subtable in lookup.SubTable:
+                if subtable.ExtensionLookupType == 2:
+                    subtables += 1
+    return subtables
+
+
+def count_pairpos_bytes(font: TTFont) -> int:
+    bytes = 0
+    gpos = font["GPOS"]
+    for lookup in font["GPOS"].table.LookupList.Lookup:
+        if lookup.LookupType == 2:
+            w = OTTableWriter(tableTag=gpos.tableTag)
+            lookup.compile(w, font)
+            bytes += len(w.getAllData())
+        elif lookup.LookupType == 9:
+            if any(subtable.ExtensionLookupType == 2 for subtable in lookup.SubTable):
+                w = OTTableWriter(tableTag=gpos.tableTag)
+                lookup.compile(w, font)
+                bytes += len(w.getAllData())
+    return bytes
+
+
+def get_kerning_by_blocks(blocks: List[Tuple[int, int]]) -> Tuple[List[str], str]:
+    """Generate a highly compressible font by generating a bunch of rectangular
+    blocks on the diagonal that can easily be sliced into subtables.
+
+    Returns the list of glyphs and feature code of the font.
+    """
+    value = 0
+    glyphs: List[str] = []
+    rules = []
+    # Each block is like a script in a multi-script font
+    for script, (width, height) in enumerate(blocks):
+        glyphs.extend(f"g_{script}_{i}" for i in range(max(width, height)))
+        for l in range(height):
+            for r in range(width):
+                value += 1
+                rules.append((f"g_{script}_{l}", f"g_{script}_{r}", value))
+    classes = "\n".join([f"@{g} = [{g}];" for g in glyphs])
+    statements = "\n".join([f"pos @{l} @{r} {v};" for (l, r, v) in rules])
+    features = f"""
+        {classes}
+        feature kern {{
+            {statements}
+        }} kern;
+    """
+    return glyphs, features
+
+
+@pytest.mark.parametrize(
+    ("blocks", "mode", "expected_subtables", "expected_bytes"),
+    [
+        # Mode = 0 = no optimization leads to 650 bytes of GPOS
+        ([(15, 3), (2, 10)], None, 1, 602),
+        # Optimization level 1 recognizes the 2 blocks and splits into 2
+        # subtables = adds 1 subtable leading to a size reduction of
+        # (602-298)/602 = 50%
+        ([(15, 3), (2, 10)], 1, 2, 298),
+        # On a bigger block configuration, we see that mode=5 doesn't create
+        # as many subtables as it could, because of the stop criteria
+        ([(4, 4) for _ in range(20)], 5, 14, 2042),
+        # while level=9 creates as many subtables as there were blocks on the
+        # diagonal and yields a better saving
+        ([(4, 4) for _ in range(20)], 9, 20, 1886),
+        # On a fully occupied kerning matrix, even the strategy 9 doesn't
+        # split anything.
+        ([(10, 10)], 9, 1, 304)
+    ],
+)
+def test_optimization_mode(
+    caplog,
+    blocks: List[Tuple[int, int]],
+    mode: Optional[int],
+    expected_subtables: int,
+    expected_bytes: int,
+):
+    """Check that the optimizations are off by default, and that increasing
+    the optimization level creates more subtables and a smaller byte size.
+    """
+    caplog.set_level(logging.DEBUG)
+
+    glyphs, features = get_kerning_by_blocks(blocks)
+    glyphs = [".notdef space"] + glyphs
+
+    env = {}
+    if mode is not None:
+        env["FONTTOOLS_GPOS_COMPACT_MODE"] = str(mode)
+    with set_env(**env):
+        fb = FontBuilder(1000)
+        fb.setupGlyphOrder(glyphs)
+        addOpenTypeFeaturesFromString(fb.font, features)
+        assert expected_subtables == count_pairpos_subtables(fb.font)
+        assert expected_bytes == count_pairpos_bytes(fb.font)

From d1c46f9d101389a176c67caa3c0524432f912289 Mon Sep 17 00:00:00 2001
From: Jany Belluz <jany.belluz@daltonmaag.com>
Date: Mon, 5 Jul 2021 15:03:10 +0100
Subject: [PATCH 06/10] Clarify comment with the plan vs the actual
 implementation

---
 Lib/fontTools/otlLib/optimize/gpos.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/Lib/fontTools/otlLib/optimize/gpos.py b/Lib/fontTools/otlLib/optimize/gpos.py
index 026744941..b2097a96f 100644
--- a/Lib/fontTools/otlLib/optimize/gpos.py
+++ b/Lib/fontTools/otlLib/optimize/gpos.py
@@ -16,12 +16,19 @@ log = logging.getLogger("fontTools.otlLib.optimize.gpos")
 
 
 def compact(font: TTFont, mode: str) -> TTFont:
-    # Plan:
+    # Ideal plan:
     #  1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable
     #     https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable
     #  2. Extract glyph-glyph kerning and class-kerning from all present subtables
     #  3. Regroup into different subtable arrangements
     #  4. Put back into the lookup
+    #
+    # Actual implementation:
+    #  2. Only class kerning is optimized currently
+    #  3. If the input kerning is already in several subtables, the subtables
+    #     are not grouped together first; instead each subtable is treated
+    #     independently, so currently this step is:
+    #     Split existing subtables into more smaller subtables
     gpos = font["GPOS"]
     for lookup in gpos.table.LookupList.Lookup:
         if lookup.LookupType == 2:

From 016aa4cccc572555d7d3feb7cde974aa8cb2fe25 Mon Sep 17 00:00:00 2001
From: Jany Belluz <jany.belluz@daltonmaag.com>
Date: Mon, 5 Jul 2021 15:12:40 +0100
Subject: [PATCH 07/10] Add docstring to snippet

---
 Snippets/compact_gpos.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/Snippets/compact_gpos.py b/Snippets/compact_gpos.py
index 04d60d587..a5bd2f8b4 100644
--- a/Snippets/compact_gpos.py
+++ b/Snippets/compact_gpos.py
@@ -1,3 +1,23 @@
+#! /usr/bin/env python3
+
+"""
+Sample script to use the otlLib.optimize.gpos functions to compact GPOS tables
+of existing fonts. This script takes one or more TTF files as arguments and
+will create compacted copies of the fonts using all available modes of the GPOS
+compaction algorithm. For each copy, it will measure the new size of the GPOS
+table and also the new size of the font in WOFF2 format. All results will be
+printed to stdout in CSV format, so the savings provided by the algorithm in
+each mode can be inspected.
+
+This was initially made to debug the algorithm but can also be used to choose
+a mode value for a specific font (trade-off between bytes saved in TTF format
+vs more bytes in WOFF2 format and more subtables).
+
+Run:
+
+python Snippets/compact_gpos.py MyFont.ttf > results.csv
+"""
+
 import argparse
 from collections import defaultdict
 import csv

From 527179619bd3ba4792afb829c614b1f47d7a5d9b Mon Sep 17 00:00:00 2001
From: Jany Belluz <jany.belluz@daltonmaag.com>
Date: Mon, 5 Jul 2021 15:46:59 +0100
Subject: [PATCH 08/10] Refactor the Cluster class to be top-level instead of
 nested the function

---
 Lib/fontTools/otlLib/optimize/gpos.py | 286 ++++++++++++++------------
 1 file changed, 155 insertions(+), 131 deletions(-)

diff --git a/Lib/fontTools/otlLib/optimize/gpos.py b/Lib/fontTools/otlLib/optimize/gpos.py
index b2097a96f..3d3f83908 100644
--- a/Lib/fontTools/otlLib/optimize/gpos.py
+++ b/Lib/fontTools/otlLib/optimize/gpos.py
@@ -1,5 +1,5 @@
 import logging
-from collections import defaultdict
+from collections import defaultdict, namedtuple
 from functools import reduce
 from itertools import chain
 from math import log2
@@ -156,6 +156,143 @@ def _classDef_bytes(
     return min(format1_bytes, format2_bytes)
 
 
+ClusteringContext = namedtuple(
+    "ClusteringContext",
+    [
+        "lines",
+        "all_class1",
+        "all_class1_data",
+        "all_class2_data",
+        "valueFormat1_bytes",
+        "valueFormat2_bytes",
+    ],
+)
+
+
+class Cluster:
+    # TODO(Python 3.7): Turn this into a dataclass
+    # ctx: ClusteringContext
+    # indices: int
+    # Caches
+    # TODO(Python 3.8): use functools.cached_property instead of the
+    # manually cached properties, and remove the cache fields listed below.
+    # _indices: Optional[List[int]] = None
+    # _column_indices: Optional[List[int]] = None
+    # _cost: Optional[int] = None
+
+    __slots__ = "ctx", "indices_bitmask", "_indices", "_column_indices", "_cost"
+
+    def __init__(self, ctx: ClusteringContext, indices_bitmask: int):
+        self.ctx = ctx
+        self.indices_bitmask = indices_bitmask
+        self._indices = None
+        self._column_indices = None
+        self._cost = None
+
+    @property
+    def indices(self):
+        if self._indices is None:
+            self._indices = bit_indices(self.indices_bitmask)
+        return self._indices
+
+    @property
+    def column_indices(self):
+        if self._column_indices is None:
+            # Indices of columns that have a 1 in at least 1 line
+            #   => binary OR all the lines
+            bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices))
+            self._column_indices = bit_indices(bitmask)
+        return self._column_indices
+
+    @property
+    def width(self):
+        # Add 1 because Class2=0 cannot be used but needs to be encoded.
+        return len(self.column_indices) + 1
+
+    @property
+    def cost(self):
+        if self._cost is None:
+            self._cost = (
+                # 2 bytes to store the offset to this subtable in the Lookup table above
+                2
+                # Contents of the subtable
+                # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
+                # uint16	posFormat	Format identifier: format = 2
+                + 2
+                # Offset16	coverageOffset	Offset to Coverage table, from beginning of PairPos subtable.
+                + 2
+                + self.coverage_bytes
+                # uint16	valueFormat1	ValueRecord definition — for the first glyph of the pair (may be zero).
+                + 2
+                # uint16	valueFormat2	ValueRecord definition — for the second glyph of the pair (may be zero).
+                + 2
+                # Offset16	classDef1Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair.
+                + 2
+                + self.classDef1_bytes
+                # Offset16	classDef2Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair.
+                + 2
+                + self.classDef2_bytes
+                # uint16	class1Count	Number of classes in classDef1 table — includes Class 0.
+                + 2
+                # uint16	class2Count	Number of classes in classDef2 table — includes Class 0.
+                + 2
+                # Class1Record	class1Records[class1Count]	Array of Class1 records, ordered by classes in classDef1.
+                + (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes)
+                * len(self.indices)
+                * self.width
+            )
+        return self._cost
+
+    @property
+    def coverage_bytes(self):
+        format1_bytes = (
+            # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1
+            # uint16	coverageFormat	Format identifier — format = 1
+            # uint16	glyphCount	Number of glyphs in the glyph array
+            4
+            # uint16	glyphArray[glyphCount]	Array of glyph IDs — in numerical order
+            + sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2
+        )
+        ranges = sorted(
+            chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices)
+        )
+        merged_range_count = 0
+        last = None
+        for (start, end) in ranges:
+            if last is not None and start != last + 1:
+                merged_range_count += 1
+            last = end
+        format2_bytes = (
+            # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2
+            # uint16	coverageFormat	Format identifier — format = 2
+            # uint16	rangeCount	Number of RangeRecords
+            4
+            # RangeRecord	rangeRecords[rangeCount]	Array of glyph ranges — ordered by startGlyphID.
+            # uint16	startGlyphID	First glyph ID in the range
+            # uint16	endGlyphID	Last glyph ID in the range
+            # uint16	startCoverageIndex	Coverage Index of first glyph ID in range
+            + merged_range_count * 6
+        )
+        return min(format1_bytes, format2_bytes)
+
+    @property
+    def classDef1_bytes(self):
+        # We can skip encoding one of the Class1 definitions, and use
+        # Class1=0 to represent it instead, because Class1 is gated by the
+        # Coverage definition. Use Class1=0 for the highest byte savings.
+        # Going through all options takes too long, pick the biggest class
+        # = what happens in otlLib.builder.ClassDefBuilder.classes()
+        biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i]))
+        return _classDef_bytes(
+            self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index]
+        )
+
+    @property
+    def classDef2_bytes(self):
+        # All Class2 need to be encoded because we can't use Class2=0
+        return _classDef_bytes(self.ctx.all_class2_data, self.column_indices)
+
+
 def cluster_pairs_by_class2_coverage_custom_cost(
     font: TTFont,
     pairs: Pairs,
@@ -196,134 +333,14 @@ def cluster_pairs_by_class2_coverage_custom_cost(
     valueFormat1_bytes = bit_count(format1) * 2
     valueFormat2_bytes = bit_count(format2) * 2
 
-    # Agglomerative clustering by hand, checking the cost gain of the new
-    # cluster against the previously separate clusters
-    # Start with 1 cluster per line
-    # cluster = set of lines = new subtable
-    # The class is here so it has a closure over the data above (lines, etc.)
-    class Cluster:
-        # TODO(Python 3.7): Turn this into a dataclass
-        # indices: int
-        # Caches
-        # TODO(Python 3.8): use functools.cached_property instead of the
-        # manually cached properties, and remove the cache fields listed below.
-        # _indices: Optional[List[int]] = None
-        # _column_indices: Optional[List[int]] = None
-        # _cost: Optional[int] = None
-
-        __slots__ = "indices_bitmask", "_indices", "_column_indices", "_cost"
-
-        def __init__(self, indices_bitmask: int):
-            self.indices_bitmask = indices_bitmask
-            self._indices = None
-            self._column_indices = None
-            self._cost = None
-
-        @property
-        def indices(self):
-            if self._indices is None:
-                self._indices = bit_indices(self.indices_bitmask)
-            return self._indices
-
-        @property
-        def column_indices(self):
-            if self._column_indices is None:
-                # Indices of columns that have a 1 in at least 1 line
-                #   => binary OR all the lines
-                bitmask = reduce(int.__or__, (lines[i] for i in self.indices))
-                self._column_indices = bit_indices(bitmask)
-            return self._column_indices
-
-        @property
-        def width(self):
-            # Add 1 because Class2=0 cannot be used but needs to be encoded.
-            return len(self.column_indices) + 1
-
-        @property
-        def cost(self):
-            if self._cost is None:
-                self._cost = (
-                    # 2 bytes to store the offset to this subtable in the Lookup table above
-                    2
-                    # Contents of the subtable
-                    # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
-                    # uint16	posFormat	Format identifier: format = 2
-                    + 2
-                    # Offset16	coverageOffset	Offset to Coverage table, from beginning of PairPos subtable.
-                    + 2
-                    + self.coverage_bytes
-                    # uint16	valueFormat1	ValueRecord definition — for the first glyph of the pair (may be zero).
-                    + 2
-                    # uint16	valueFormat2	ValueRecord definition — for the second glyph of the pair (may be zero).
-                    + 2
-                    # Offset16	classDef1Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair.
-                    + 2
-                    + self.classDef1_bytes
-                    # Offset16	classDef2Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair.
-                    + 2
-                    + self.classDef2_bytes
-                    # uint16	class1Count	Number of classes in classDef1 table — includes Class 0.
-                    + 2
-                    # uint16	class2Count	Number of classes in classDef2 table — includes Class 0.
-                    + 2
-                    # Class1Record	class1Records[class1Count]	Array of Class1 records, ordered by classes in classDef1.
-                    + (valueFormat1_bytes + valueFormat2_bytes)
-                    * len(self.indices)
-                    * self.width
-                )
-            return self._cost
-
-        @property
-        def coverage_bytes(self):
-            format1_bytes = (
-                # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1
-                # uint16	coverageFormat	Format identifier — format = 1
-                # uint16	glyphCount	Number of glyphs in the glyph array
-                4
-                # uint16	glyphArray[glyphCount]	Array of glyph IDs — in numerical order
-                + sum(len(all_class1[i]) for i in self.indices) * 2
-            )
-            ranges = sorted(
-                chain.from_iterable(all_class1_data[i][0] for i in self.indices)
-            )
-            merged_range_count = 0
-            last = None
-            for (start, end) in ranges:
-                if last is not None and start != last + 1:
-                    merged_range_count += 1
-                last = end
-            format2_bytes = (
-                # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2
-                # uint16	coverageFormat	Format identifier — format = 2
-                # uint16	rangeCount	Number of RangeRecords
-                4
-                # RangeRecord	rangeRecords[rangeCount]	Array of glyph ranges — ordered by startGlyphID.
-                # uint16	startGlyphID	First glyph ID in the range
-                # uint16	endGlyphID	Last glyph ID in the range
-                # uint16	startCoverageIndex	Coverage Index of first glyph ID in range
-                + merged_range_count * 6
-            )
-            return min(format1_bytes, format2_bytes)
-
-        @property
-        def classDef1_bytes(self):
-            # We can skip encoding one of the Class1 definitions, and use
-            # Class1=0 to represent it instead, because Class1 is gated by the
-            # Coverage definition. Use Class1=0 for the highest byte savings.
-            # Going through all options takes too long, pick the biggest class
-            # = what happens in otlLib.builder.ClassDefBuilder.classes()
-            biggest_index = max(self.indices, key=lambda i: len(all_class1[i]))
-            return _classDef_bytes(
-                all_class1_data, [i for i in self.indices if i != biggest_index]
-            )
-
-        @property
-        def classDef2_bytes(self):
-            # All Class2 need to be encoded because we can't use Class2=0
-            return _classDef_bytes(all_class2_data, self.column_indices)
-
-        def merge(self, other: "Cluster") -> "Cluster":
-            return make_cluster(self.indices_bitmask | other.indices_bitmask)
+    ctx = ClusteringContext(
+        lines,
+        all_class1,
+        all_class1_data,
+        all_class2_data,
+        valueFormat1_bytes,
+        valueFormat2_bytes,
+    )
 
     cluster_cache: Dict[int, Cluster] = {}
 
@@ -331,10 +348,17 @@ def cluster_pairs_by_class2_coverage_custom_cost(
         cluster = cluster_cache.get(indices, None)
         if cluster is not None:
             return cluster
-        cluster = Cluster(indices)
+        cluster = Cluster(ctx, indices)
         cluster_cache[indices] = cluster
         return cluster
 
+    def merge(cluster: Cluster, other: Cluster) -> Cluster:
+        return make_cluster(cluster.indices_bitmask | other.indices_bitmask)
+
+    # Agglomerative clustering by hand, checking the cost gain of the new
+    # cluster against the previously separate clusters
+    # Start with 1 cluster per line
+    # cluster = set of lines = new subtable
     clusters = [make_cluster(1 << i) for i in range(len(lines))]
 
     # Cost of 1 cluster with everything
@@ -349,7 +373,7 @@ def cluster_pairs_by_class2_coverage_custom_cost(
         best_merged = None
         for i, cluster in enumerate(clusters):
             for j, other in enumerate(clusters[i + 1 :]):
-                merged = cluster.merge(other)
+                merged = merge(cluster, other)
                 cost_change = merged.cost - cluster.cost - other.cost
                 if lowest_cost_change is None or cost_change < lowest_cost_change:
                     lowest_cost_change = cost_change

From 2ff3159a5ff31c2d2195ba24d40837584655e9ce Mon Sep 17 00:00:00 2001
From: Jany Belluz <jany.belluz@daltonmaag.com>
Date: Mon, 5 Jul 2021 16:09:59 +0100
Subject: [PATCH 09/10] Mark the environment variable as experimental

---
 Lib/fontTools/otlLib/optimize/gpos.py | 3 +++
 Tests/otlLib/optimize_test.py         | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/Lib/fontTools/otlLib/optimize/gpos.py b/Lib/fontTools/otlLib/optimize/gpos.py
index 3d3f83908..79873fadb 100644
--- a/Lib/fontTools/otlLib/optimize/gpos.py
+++ b/Lib/fontTools/otlLib/optimize/gpos.py
@@ -9,6 +9,9 @@ from fontTools.misc.intTools import bit_count, bit_indices
 from fontTools.ttLib import TTFont
 from fontTools.ttLib.tables import otBase, otTables
 
+# NOTE: activating this optimization via the environment variable is
+# experimental and may not be supported once an alternative mechanism
+# is in place. See: https://github.com/fonttools/fonttools/issues/2349
 GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE"
 GPOS_COMPACT_MODE_DEFAULT = "0"
 
diff --git a/Tests/otlLib/optimize_test.py b/Tests/otlLib/optimize_test.py
index db1cac5c3..40cf389e3 100644
--- a/Tests/otlLib/optimize_test.py
+++ b/Tests/otlLib/optimize_test.py
@@ -163,6 +163,9 @@ def test_optimization_mode(
 
     env = {}
     if mode is not None:
+        # NOTE: activating this optimization via the environment variable is
+        # experimental and may not be supported once an alternative mechanism
+        # is in place. See: https://github.com/fonttools/fonttools/issues/2349
         env["FONTTOOLS_GPOS_COMPACT_MODE"] = str(mode)
     with set_env(**env):
         fb = FontBuilder(1000)

From 731845c1b8a93c108c0ef6f2fc948c43368ecb98 Mon Sep 17 00:00:00 2001
From: Jany Belluz <jany.belluz@daltonmaag.com>
Date: Mon, 5 Jul 2021 17:34:12 +0100
Subject: [PATCH 10/10] Update NEWS.rst

---
 NEWS.rst | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/NEWS.rst b/NEWS.rst
index 16dca63ee..9de1c6c36 100644
--- a/NEWS.rst
+++ b/NEWS.rst
@@ -15,6 +15,11 @@
 - [post] Fixed parsing ``post`` table format 2.0 when it contains extra garbage
   at the end of the stringData array (#2314).
 - [subset] drop empty features unless 'size' with FeatureParams table (#2324).
+- [otlLib] Added ``otlLib.optimize`` module; added GPOS compaction algorithm.
+  The compaction can be run on existing fonts with ``fonttools otlLib.optimize``
+  or using the snippet ``compact_gpos.py``. There's experimental support for
+  compacting fonts at compilation time using an environment variable, but that
+  might be removed later (#2326).
 
 4.24.4 (released 2021-05-25)
 ----------------------------
@@ -498,7 +503,7 @@
   instance, correctly map the value forward.
 - [varLib] The avar table can now contain mapping output values that are greater than
   OR EQUAL to the preceeding value, as the avar specification allows this.
-- [varLib] The errors of the module are now ordered hierarchically below VarLibError. 
+- [varLib] The errors of the module are now ordered hierarchically below VarLibError.
   See #1821.
 
 4.3.0 (released 2020-02-03)
@@ -792,13 +797,13 @@
 - [mutator] Set ``OVERLAP_SIMPLE`` and ``OVERLAP_COMPOUND`` glyf flags by
   default in ``instantiateVariableFont``. Added ``--no-overlap`` cli option
   to disable this (#1518).
-- [subset] Fixed subsetting ``VVAR`` table (#1516, #1517).  
+- [subset] Fixed subsetting ``VVAR`` table (#1516, #1517).
   Fixed subsetting an ``HVAR`` table that has an ``AdvanceWidthMap`` when the
   option ``--retain-gids`` is used.
-- [feaLib] Added ``forceChained`` in MultipleSubstStatement (#1511).  
-  Fixed double indentation of ``subtable`` statement (#1512).  
+- [feaLib] Added ``forceChained`` in MultipleSubstStatement (#1511).
+  Fixed double indentation of ``subtable`` statement (#1512).
   Added support for ``subtable`` statement in more places than just PairPos
-  lookups (#1520).  
+  lookups (#1520).
   Handle lookupflag 0 and lookupflag without a value (#1540).
 - [varLib] In ``load_designspace``, provide a default English name for the
   ``ital`` axis tag.