[varLib.varStore] Implement VarStore optimizer
On NotoSans-VF.ttf (a 5MB font), saves 15% total size by optimizing GDEF VarStore and resulting GPOS shrinkage.
This commit is contained in:
parent
073227bbb0
commit
3c4203d985
@ -7,6 +7,8 @@ from fontTools.varLib.builder import (buildVarRegionList, buildVarStore,
|
||||
buildVarRegion, buildVarData,
|
||||
VarData_CalculateNumShorts)
|
||||
from functools import partial
|
||||
from collections import defaultdict
|
||||
from array import array
|
||||
|
||||
|
||||
def _getLocationKey(loc):
|
||||
@ -230,19 +232,210 @@ ot.GSUB.remap_device_varidxes = Object_remap_device_varidxes
|
||||
ot.GPOS.remap_device_varidxes = Object_remap_device_varidxes
|
||||
|
||||
|
||||
class _Encoding(object):
|
||||
|
||||
def __init__(self, chars):
|
||||
self.chars = chars
|
||||
self.width = self._popcount(chars)
|
||||
self.overhead = self._characteristic_overhead(chars)
|
||||
self.items = []
|
||||
|
||||
def append(self, row):
|
||||
self.items.append(row)
|
||||
|
||||
def extend(self, lst):
|
||||
self.items.extend(lst)
|
||||
|
||||
def get_room(self):
|
||||
"""Maximum number of bytes that can be added to characteristic
|
||||
while still being beneficial to merge it into another one."""
|
||||
count = len(self.items)
|
||||
return max(0, (self.overhead - 1) // count - self.width)
|
||||
room = property(get_room)
|
||||
|
||||
@property
|
||||
def gain(self):
|
||||
"""Maximum possible byte gain from merging this into another
|
||||
characteristic."""
|
||||
count = len(self.items)
|
||||
return max(0, self.overhead - count * (self.width + 1))
|
||||
|
||||
def sort_key(self):
|
||||
return self.width, self.chars
|
||||
|
||||
def __len__(self):
|
||||
return len(self.items)
|
||||
|
||||
def can_encode(self, chars):
|
||||
return not (chars & ~self.chars)
|
||||
|
||||
def __sub__(self, other):
|
||||
return self._popcount(self.chars & ~other.chars)
|
||||
|
||||
@staticmethod
|
||||
def _popcount(n):
|
||||
# Apparently this is the fastest native way to do it...
|
||||
# https://stackoverflow.com/a/9831671
|
||||
return bin(n).count('1')
|
||||
|
||||
@staticmethod
|
||||
def _characteristic_overhead(chars):
|
||||
"""Returns overhead in bytes of encoding this characteristic
|
||||
as a VarData."""
|
||||
c = 6
|
||||
while chars:
|
||||
if chars & 3:
|
||||
c += 2
|
||||
chars >>= 2
|
||||
return c
|
||||
|
||||
|
||||
def _find_yourself_best_new_encoding(self, done_by_width):
|
||||
self.best_new_encoding = None
|
||||
for new_width in range(self.width+1, self.width+self.room+1):
|
||||
for new_encoding in done_by_width[new_width]:
|
||||
if new_encoding.can_encode(self.chars):
|
||||
break
|
||||
else:
|
||||
new_encoding = None
|
||||
self.best_new_encoding = new_encoding
|
||||
|
||||
|
||||
class _EncodingDict(dict):
|
||||
|
||||
def __missing__(self, chars):
|
||||
r = self[chars] = _Encoding(chars)
|
||||
return r
|
||||
|
||||
def add_row(self, row):
|
||||
chars = self._row_characteristics(row)
|
||||
self[chars].append(row)
|
||||
|
||||
@staticmethod
|
||||
def _row_characteristics(row):
|
||||
"""Returns encoding characteristics for a row."""
|
||||
chars = 0
|
||||
i = 1
|
||||
for v in row:
|
||||
if v:
|
||||
chars += i
|
||||
if not (-128 <= v <= 127):
|
||||
chars += i * 2
|
||||
i <<= 2
|
||||
return chars
|
||||
|
||||
|
||||
def VarStore_optimize(self):
|
||||
"""Optimize storage. Returns mapping from old VarIdxes to new ones."""
|
||||
|
||||
# TODO
|
||||
# Check that no two VarRegions are the same; if they are, fold them.
|
||||
# Also that same VarData does not reference same VarRegion more than once...
|
||||
|
||||
mapping = {}
|
||||
n = len(self.VarRegionList.Region) # Number of columns
|
||||
zeroes = array('h', [0]*n)
|
||||
|
||||
front_mapping = {} # Map from old VarIdxes to full row tuples
|
||||
|
||||
encodings = _EncodingDict()
|
||||
|
||||
# Collect all items into a set of full rows (with lots of zeroes.)
|
||||
for major,data in enumerate(self.VarData):
|
||||
for minor,row in enumerate(data.Item):
|
||||
mapping[(major<<16)+minor] = (major<<16)+minor
|
||||
regionIndices = data.VarRegionIndex
|
||||
|
||||
for minor,item in enumerate(data.Item):
|
||||
|
||||
row = array('h', zeroes)
|
||||
for regionIdx,v in zip(regionIndices, item):
|
||||
row[regionIdx] += v
|
||||
row = tuple(row)
|
||||
|
||||
encodings.add_row(row)
|
||||
front_mapping[(major<<16)+minor] = row
|
||||
|
||||
# Separate encodings that have no gain (are decided) and those having
|
||||
# possible gain (possibly to be merged into others.)
|
||||
encodings = sorted(encodings.values(), key=_Encoding.__len__, reverse=True)
|
||||
done_by_width = defaultdict(list)
|
||||
todo = []
|
||||
for encoding in encodings:
|
||||
if not encoding.gain:
|
||||
done_by_width[encoding.width].append(encoding)
|
||||
else:
|
||||
todo.append(encoding)
|
||||
|
||||
# For each encoding that is possibly to be merged, find the best match
|
||||
# in the decided encodings, and record that.
|
||||
todo.sort(key=_Encoding.get_room)
|
||||
for encoding in todo:
|
||||
encoding._find_yourself_best_new_encoding(done_by_width)
|
||||
|
||||
# Walk through todo encodings, for each, see if merging it with
|
||||
# another todo encoding gains more than each of them merging with
|
||||
# their best decided encoding. If yes, merge them and add resulting
|
||||
# encoding back to todo queue. If not, move the enconding to decided
|
||||
# list. Repeat till done.
|
||||
while todo:
|
||||
encoding = todo.pop()
|
||||
best_idx = None
|
||||
best_gain = 0
|
||||
for i,other_encoding in enumerate(todo):
|
||||
combined_chars = other_encoding.chars | encoding.chars
|
||||
combined_width = _Encoding._popcount(combined_chars)
|
||||
combined_overhead = _Encoding._characteristic_overhead(combined_chars)
|
||||
combined_gain = (
|
||||
+ encoding.overhead
|
||||
+ other_encoding.overhead
|
||||
- combined_overhead
|
||||
- (combined_width - encoding.width) * len(encoding)
|
||||
- (combined_width - other_encoding.width) * len(other_encoding)
|
||||
)
|
||||
this_gain = 0 if encoding.best_new_encoding is None else (
|
||||
+ encoding.overhead
|
||||
- (encoding.best_new_encoding.width - encoding.width) * len(encoding)
|
||||
)
|
||||
other_gain = 0 if other_encoding.best_new_encoding is None else (
|
||||
+ other_encoding.overhead
|
||||
- (other_encoding.best_new_encoding.width - other_encoding.width) * len(other_encoding)
|
||||
)
|
||||
separate_gain = this_gain + other_gain
|
||||
|
||||
if combined_gain > separate_gain:
|
||||
best_idx = i
|
||||
best_gain = combined_gain - separate_gain
|
||||
|
||||
if best_idx is None:
|
||||
# Encoding is decided as is
|
||||
done_by_width[encoding.width].append(encoding)
|
||||
else:
|
||||
other_encoding = todo[best_idx]
|
||||
combined_chars = other_encoding.chars | encoding.chars
|
||||
combined_encoding = _Encoding(combined_chars)
|
||||
combined_encoding.extend(encoding.items)
|
||||
combined_encoding.extend(other_encoding.items)
|
||||
combined_encoding._find_yourself_best_new_encoding(done_by_width)
|
||||
del todo[best_idx]
|
||||
todo.append(combined_encoding)
|
||||
|
||||
# Assemble final store.
|
||||
back_mapping = {} # Mapping from full rows to new VarIdxes
|
||||
encodings = sum(done_by_width.values(), [])
|
||||
encodings.sort(key=_Encoding.sort_key)
|
||||
self.VarData = []
|
||||
for major,encoding in enumerate(encodings):
|
||||
data = ot.VarData()
|
||||
self.VarData.append(data)
|
||||
data.VarRegionIndex = range(n)
|
||||
data.VarRegionCount = len(data.VarRegionIndex)
|
||||
data.Item = sorted(encoding.items)
|
||||
for minor,item in enumerate(data.Item):
|
||||
back_mapping[item] = (major<<16)+minor
|
||||
|
||||
# Compile final mapping.
|
||||
varidx_map = {}
|
||||
for k,v in front_mapping.items():
|
||||
varidx_map[k] = back_mapping[v]
|
||||
|
||||
# Remove unused regions.
|
||||
self.prune_regions()
|
||||
|
||||
# Recalculate things and go home.
|
||||
@ -252,7 +445,7 @@ def VarStore_optimize(self):
|
||||
data.ItemCount = len(data.Item)
|
||||
VarData_CalculateNumShorts(data)
|
||||
|
||||
return mapping
|
||||
return varidx_map
|
||||
|
||||
ot.VarStore.optimize = VarStore_optimize
|
||||
|
||||
@ -283,7 +476,13 @@ def main(args=None):
|
||||
#size = len(writer.getAllData())
|
||||
#print("Before: %7d bytes" % size)
|
||||
|
||||
store.optimize()
|
||||
varidx_map = store.optimize()
|
||||
|
||||
gdef.table.remap_device_varidxes(varidx_map)
|
||||
if 'GSUB' in font:
|
||||
font['GSUB'].table.remap_device_varidxes(varidx_map)
|
||||
if 'GPOS' in font:
|
||||
font['GPOS'].table.remap_device_varidxes(varidx_map)
|
||||
|
||||
writer = OTTableWriter()
|
||||
store.compile(writer, font)
|
||||
|
Loading…
x
Reference in New Issue
Block a user