[varStore.optimize] Speed up

This commit is contained in:
Behdad Esfahbod 2023-05-25 08:04:24 -06:00
parent f12f3d863c
commit 9cbde09cdc

View File

@ -361,7 +361,8 @@ class _Encoding(object):
def __init__(self, chars): def __init__(self, chars):
self.chars = chars self.chars = chars
self.width = self._popcount(chars) self.width = self._popcount(chars)
self.overhead = self._characteristic_overhead(chars) self.columns = self._columns(chars)
self.overhead = self._characteristic_overhead(self.columns)
self.items = set() self.items = set()
def append(self, row): def append(self, row):
@ -408,20 +409,29 @@ class _Encoding(object):
return bin(n).count("1") return bin(n).count("1")
@staticmethod @staticmethod
def _characteristic_overhead(chars): def _characteristic_overhead(columns):
"""Returns overhead in bytes of encoding this characteristic """Returns overhead in bytes of encoding this characteristic
as a VarData.""" as a VarData."""
c = 4 + 6 # 4 bytes for LOffset, 6 bytes for VarData header c = 4 + 6 # 4 bytes for LOffset, 6 bytes for VarData header
c += len(columns) * 2
return c
@staticmethod
def _columns(chars):
cols = set()
i = 0
while chars: while chars:
if chars & 0b1111: if chars & 0b1111:
c += 2 cols.add(i)
chars >>= 4 chars >>= 4
return c i += 1
return cols
def gain_from_merging(self, other_encoding): def gain_from_merging(self, other_encoding):
combined_chars = other_encoding.chars | self.chars combined_chars = other_encoding.chars | self.chars
combined_width = _Encoding._popcount(combined_chars) combined_width = _Encoding._popcount(combined_chars)
combined_overhead = _Encoding._characteristic_overhead(combined_chars) combined_columns = self.columns | other_encoding.columns
combined_overhead = _Encoding._characteristic_overhead(combined_columns)
combined_gain = ( combined_gain = (
+self.overhead +self.overhead
+ other_encoding.overhead + other_encoding.overhead
@ -577,7 +587,6 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True, quantization=1):
todo = sorted(encodings.values(), key=_Encoding.gain_sort_key) todo = sorted(encodings.values(), key=_Encoding.gain_sort_key)
del encodings del encodings
# Repeatedly pick two best encodings to combine, and combine them. # Repeatedly pick two best encodings to combine, and combine them.
heap = [] heap = []