From 548ffda37bf6a36c4bd3f45255d178d371bb9cf9 Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Thu, 6 Feb 2025 19:01:31 +0000 Subject: [PATCH 1/3] [varStore] use the same sorting for both input/output When the VarStore.optimize() algorithm was improved with the addition of a priority queue where pairs of encodings are sorted by decreasing gain (from merging one into the other), specifically with this commit https://github.com/fonttools/fonttools/commit/47ec18f788, the pre-sorting step of the todo list (before the queue itself was populated) was kept to make sure the algorithm produces stable results no matter the order of the inputs. The optimizer's output was itself sorted, again for stability, but using a different key function with_sort_key() from the one used on the input todo list gain_sort_key(). The rationale for a distinct gain_sort_key, where encodings get sorted by maximum theoretical gain (decreasing, initally, when reverse=True was set, then incrasing as reverse was somehow dropped), is no longer needed now that a priority queue is used (which sorts by actual gains from merging specific pairs of encodings) and is a remnant of the previous algorithm. I propose we keep some pre-sorting to ensure stability (in case the priority queue initially contains multiple pairs with the same gain), but we use the same width_sort_key that is used at the end. Note this doesn't change the overall level of compression achieved by the optimizer, but makes the algorithm a bit less complicated, and easier to match in our alternative Rust implementation. --- Lib/fontTools/varLib/varStore.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/Lib/fontTools/varLib/varStore.py b/Lib/fontTools/varLib/varStore.py index f54fad2db..b8ec8cd92 100644 --- a/Lib/fontTools/varLib/varStore.py +++ b/Lib/fontTools/varLib/varStore.py @@ -554,7 +554,11 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True, quantization=1): # # - Put all encodings into a "todo" list. # - # - Sort todo list by decreasing gain (for stability). + # - Sort todo list (for stability) by width_sort_key(), which is a tuple + # of the following items: + # * The "width" of the encoding. + # * The characteristic bitmap of the encoding, with higher-numbered + # columns compared first. # # - Make a priority-queue of the gain from combining each two # encodings in the todo list. The priority queue is sorted by @@ -575,16 +579,7 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True, quantization=1): # # The output is then sorted for stability, in the following way: # - The VarRegionList of the input is kept intact. - # - All encodings are sorted before the main algorithm, by - # gain_key_sort(), which is a tuple of the following items: - # * The gain of the encoding. - # * The characteristic bitmap of the encoding, with higher-numbered - # columns compared first. - # - The VarData is sorted by width_sort_key(), which is a tuple - # of the following items: - # * The "width" of the encoding. - # * The characteristic bitmap of the encoding, with higher-numbered - # columns compared first. + # - The VarData is sorted by the same width_sort_key() used at the beginning. # - Within each VarData, the items are sorted as vectors of numbers. # # Finally, each VarData is optimized to remove the empty columns and @@ -626,7 +621,7 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True, quantization=1): front_mapping[(major << 16) + minor] = row # Prepare for the main algorithm. - todo = sorted(encodings.values(), key=_Encoding.gain_sort_key) + todo = sorted(encodings.values(), key=_Encoding.width_sort_key) del encodings # Repeatedly pick two best encodings to combine, and combine them. From d4565155b8d265030c10886d60008a7297ae7a0c Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Thu, 6 Feb 2025 19:11:30 +0000 Subject: [PATCH 2/3] [varStore] remove unused _Encoding methods --- Lib/fontTools/varLib/varStore.py | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/Lib/fontTools/varLib/varStore.py b/Lib/fontTools/varLib/varStore.py index b8ec8cd92..21b338e10 100644 --- a/Lib/fontTools/varLib/varStore.py +++ b/Lib/fontTools/varLib/varStore.py @@ -412,25 +412,6 @@ class _Encoding(object): def extend(self, lst): self.items.update(lst) - def get_room(self): - """Maximum number of bytes that can be added to characteristic - while still being beneficial to merge it into another one.""" - count = len(self.items) - return max(0, (self.overhead - 1) // count - self.width) - - room = property(get_room) - - def get_gain(self): - """Maximum possible byte gain from merging this into another - characteristic.""" - count = len(self.items) - return max(0, self.overhead - count) - - gain = property(get_gain) - - def gain_sort_key(self): - return self.gain, self.chars - def width_sort_key(self): return self.width, self.chars @@ -534,13 +515,9 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True, quantization=1): # of the old encoding is completely eliminated. However, each row # now would require more bytes to encode, to the tune of one byte # per characteristic bit that is active in the new encoding but not - # in the old one. The number of bits that can be added to an encoding - # while still beneficial to merge it into another encoding is called - # the "room" for that encoding. + # in the old one. # - # The "gain" of an encodings is the maximum number of bytes we can - # save by merging it into another encoding. The "gain" of merging - # two encodings is how many bytes we save by doing so. + # The "gain" of merging two encodings is how many bytes we save by doing so. # # High-level algorithm: # From 0068f78b9a8b19abea6a0c6240191b67c47a2fa8 Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Fri, 7 Feb 2025 11:06:57 +0000 Subject: [PATCH 3/3] varStore_test: shuffle input order to test algorithm is stable --- Tests/varLib/varStore_test.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/Tests/varLib/varStore_test.py b/Tests/varLib/varStore_test.py index 7eb9d740b..a89fda360 100644 --- a/Tests/varLib/varStore_test.py +++ b/Tests/varLib/varStore_test.py @@ -1,4 +1,5 @@ import pytest +import random from io import StringIO from fontTools.misc.xmlWriter import XMLWriter from fontTools.misc.roundTools import noRound @@ -45,6 +46,17 @@ from fontTools.ttLib.tables.otTables import VarStore [100, 22000, 4000, 173000], ], ), + ( + [{}, {"a": 1}, {"b": 1}, {"a": 1, "b": 1}], + [ + [random.randint(-128, 127) for _ in range(4)], + [random.randint(-128, 127) for _ in range(4)], + [random.randint(-128, 127) for _ in range(4)], + [random.randint(-32768, 32767) for _ in range(4)], + [random.randint(-32768, 32767) for _ in range(4)], + [random.randint(-32768, 32767) for _ in range(4)], + ], + ), ], ) def test_onlineVarStoreBuilder(locations, masterValues): @@ -53,6 +65,8 @@ def test_onlineVarStoreBuilder(locations, masterValues): builder = OnlineVarStoreBuilder(axisTags) builder.setModel(model) varIdxs = [] + # shuffle input order to ensure optimizer produces stable results + random.shuffle(masterValues) for masters in masterValues: _, varIdx = builder.storeMasters(masters) varIdxs.append(varIdx) @@ -178,6 +192,7 @@ def test_optimize(numRegions, varData, expectedNumVarData, expectedBytes): builder = OnlineVarStoreBuilder(axisTags) builder.setModel(model) + random.shuffle(varData) for data in varData: if type(data) is dict: newData = [0] * numRegions @@ -240,6 +255,7 @@ def test_quantize(quantization, expectedBytes): builder = OnlineVarStoreBuilder(axisTags) builder.setModel(model) + random.shuffle(varData) for data in varData: builder.storeMasters(data) @@ -268,7 +284,9 @@ def test_optimize_overflow(): builder = OnlineVarStoreBuilder(axisTags) builder.setModel(model) - for data in range(0, 0xFFFF * 2): + varData = list(range(0, 0xFFFF * 2)) + random.shuffle(varData) + for data in varData: data = [0, data] builder.storeMasters(data, round=noRound)