From 548ffda37bf6a36c4bd3f45255d178d371bb9cf9 Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Thu, 6 Feb 2025 19:01:31 +0000 Subject: [PATCH] [varStore] use the same sorting for both input/output When the VarStore.optimize() algorithm was improved with the addition of a priority queue where pairs of encodings are sorted by decreasing gain (from merging one into the other), specifically with this commit https://github.com/fonttools/fonttools/commit/47ec18f788, the pre-sorting step of the todo list (before the queue itself was populated) was kept to make sure the algorithm produces stable results no matter the order of the inputs. The optimizer's output was itself sorted, again for stability, but using a different key function with_sort_key() from the one used on the input todo list gain_sort_key(). The rationale for a distinct gain_sort_key, where encodings get sorted by maximum theoretical gain (decreasing, initally, when reverse=True was set, then incrasing as reverse was somehow dropped), is no longer needed now that a priority queue is used (which sorts by actual gains from merging specific pairs of encodings) and is a remnant of the previous algorithm. I propose we keep some pre-sorting to ensure stability (in case the priority queue initially contains multiple pairs with the same gain), but we use the same width_sort_key that is used at the end. Note this doesn't change the overall level of compression achieved by the optimizer, but makes the algorithm a bit less complicated, and easier to match in our alternative Rust implementation. --- Lib/fontTools/varLib/varStore.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/Lib/fontTools/varLib/varStore.py b/Lib/fontTools/varLib/varStore.py index f54fad2db..b8ec8cd92 100644 --- a/Lib/fontTools/varLib/varStore.py +++ b/Lib/fontTools/varLib/varStore.py @@ -554,7 +554,11 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True, quantization=1): # # - Put all encodings into a "todo" list. # - # - Sort todo list by decreasing gain (for stability). + # - Sort todo list (for stability) by width_sort_key(), which is a tuple + # of the following items: + # * The "width" of the encoding. + # * The characteristic bitmap of the encoding, with higher-numbered + # columns compared first. # # - Make a priority-queue of the gain from combining each two # encodings in the todo list. The priority queue is sorted by @@ -575,16 +579,7 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True, quantization=1): # # The output is then sorted for stability, in the following way: # - The VarRegionList of the input is kept intact. - # - All encodings are sorted before the main algorithm, by - # gain_key_sort(), which is a tuple of the following items: - # * The gain of the encoding. - # * The characteristic bitmap of the encoding, with higher-numbered - # columns compared first. - # - The VarData is sorted by width_sort_key(), which is a tuple - # of the following items: - # * The "width" of the encoding. - # * The characteristic bitmap of the encoding, with higher-numbered - # columns compared first. + # - The VarData is sorted by the same width_sort_key() used at the beginning. # - Within each VarData, the items are sorted as vectors of numbers. # # Finally, each VarData is optimized to remove the empty columns and @@ -626,7 +621,7 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True, quantization=1): front_mapping[(major << 16) + minor] = row # Prepare for the main algorithm. - todo = sorted(encodings.values(), key=_Encoding.gain_sort_key) + todo = sorted(encodings.values(), key=_Encoding.width_sort_key) del encodings # Repeatedly pick two best encodings to combine, and combine them.