[varLib.varStore] Implement VarStore optimizer
On NotoSans-VF.ttf (a 5MB font), saves 15% total size by optimizing GDEF VarStore and resulting GPOS shrinkage.
This commit is contained in:
parent
073227bbb0
commit
3c4203d985
@ -7,6 +7,8 @@ from fontTools.varLib.builder import (buildVarRegionList, buildVarStore,
|
|||||||
buildVarRegion, buildVarData,
|
buildVarRegion, buildVarData,
|
||||||
VarData_CalculateNumShorts)
|
VarData_CalculateNumShorts)
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
from collections import defaultdict
|
||||||
|
from array import array
|
||||||
|
|
||||||
|
|
||||||
def _getLocationKey(loc):
|
def _getLocationKey(loc):
|
||||||
@ -230,19 +232,210 @@ ot.GSUB.remap_device_varidxes = Object_remap_device_varidxes
|
|||||||
ot.GPOS.remap_device_varidxes = Object_remap_device_varidxes
|
ot.GPOS.remap_device_varidxes = Object_remap_device_varidxes
|
||||||
|
|
||||||
|
|
||||||
|
class _Encoding(object):
|
||||||
|
|
||||||
|
def __init__(self, chars):
|
||||||
|
self.chars = chars
|
||||||
|
self.width = self._popcount(chars)
|
||||||
|
self.overhead = self._characteristic_overhead(chars)
|
||||||
|
self.items = []
|
||||||
|
|
||||||
|
def append(self, row):
|
||||||
|
self.items.append(row)
|
||||||
|
|
||||||
|
def extend(self, lst):
|
||||||
|
self.items.extend(lst)
|
||||||
|
|
||||||
|
def get_room(self):
|
||||||
|
"""Maximum number of bytes that can be added to characteristic
|
||||||
|
while still being beneficial to merge it into another one."""
|
||||||
|
count = len(self.items)
|
||||||
|
return max(0, (self.overhead - 1) // count - self.width)
|
||||||
|
room = property(get_room)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def gain(self):
|
||||||
|
"""Maximum possible byte gain from merging this into another
|
||||||
|
characteristic."""
|
||||||
|
count = len(self.items)
|
||||||
|
return max(0, self.overhead - count * (self.width + 1))
|
||||||
|
|
||||||
|
def sort_key(self):
|
||||||
|
return self.width, self.chars
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.items)
|
||||||
|
|
||||||
|
def can_encode(self, chars):
|
||||||
|
return not (chars & ~self.chars)
|
||||||
|
|
||||||
|
def __sub__(self, other):
|
||||||
|
return self._popcount(self.chars & ~other.chars)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _popcount(n):
|
||||||
|
# Apparently this is the fastest native way to do it...
|
||||||
|
# https://stackoverflow.com/a/9831671
|
||||||
|
return bin(n).count('1')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _characteristic_overhead(chars):
|
||||||
|
"""Returns overhead in bytes of encoding this characteristic
|
||||||
|
as a VarData."""
|
||||||
|
c = 6
|
||||||
|
while chars:
|
||||||
|
if chars & 3:
|
||||||
|
c += 2
|
||||||
|
chars >>= 2
|
||||||
|
return c
|
||||||
|
|
||||||
|
|
||||||
|
def _find_yourself_best_new_encoding(self, done_by_width):
|
||||||
|
self.best_new_encoding = None
|
||||||
|
for new_width in range(self.width+1, self.width+self.room+1):
|
||||||
|
for new_encoding in done_by_width[new_width]:
|
||||||
|
if new_encoding.can_encode(self.chars):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
new_encoding = None
|
||||||
|
self.best_new_encoding = new_encoding
|
||||||
|
|
||||||
|
|
||||||
|
class _EncodingDict(dict):
|
||||||
|
|
||||||
|
def __missing__(self, chars):
|
||||||
|
r = self[chars] = _Encoding(chars)
|
||||||
|
return r
|
||||||
|
|
||||||
|
def add_row(self, row):
|
||||||
|
chars = self._row_characteristics(row)
|
||||||
|
self[chars].append(row)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _row_characteristics(row):
|
||||||
|
"""Returns encoding characteristics for a row."""
|
||||||
|
chars = 0
|
||||||
|
i = 1
|
||||||
|
for v in row:
|
||||||
|
if v:
|
||||||
|
chars += i
|
||||||
|
if not (-128 <= v <= 127):
|
||||||
|
chars += i * 2
|
||||||
|
i <<= 2
|
||||||
|
return chars
|
||||||
|
|
||||||
|
|
||||||
def VarStore_optimize(self):
|
def VarStore_optimize(self):
|
||||||
"""Optimize storage. Returns mapping from old VarIdxes to new ones."""
|
"""Optimize storage. Returns mapping from old VarIdxes to new ones."""
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
# Check that no two VarRegions are the same; if they are, fold them.
|
# Check that no two VarRegions are the same; if they are, fold them.
|
||||||
# Also that same VarData does not reference same VarRegion more than once...
|
|
||||||
|
|
||||||
mapping = {}
|
n = len(self.VarRegionList.Region) # Number of columns
|
||||||
|
zeroes = array('h', [0]*n)
|
||||||
|
|
||||||
|
front_mapping = {} # Map from old VarIdxes to full row tuples
|
||||||
|
|
||||||
|
encodings = _EncodingDict()
|
||||||
|
|
||||||
|
# Collect all items into a set of full rows (with lots of zeroes.)
|
||||||
for major,data in enumerate(self.VarData):
|
for major,data in enumerate(self.VarData):
|
||||||
for minor,row in enumerate(data.Item):
|
regionIndices = data.VarRegionIndex
|
||||||
mapping[(major<<16)+minor] = (major<<16)+minor
|
|
||||||
|
|
||||||
|
for minor,item in enumerate(data.Item):
|
||||||
|
|
||||||
|
row = array('h', zeroes)
|
||||||
|
for regionIdx,v in zip(regionIndices, item):
|
||||||
|
row[regionIdx] += v
|
||||||
|
row = tuple(row)
|
||||||
|
|
||||||
|
encodings.add_row(row)
|
||||||
|
front_mapping[(major<<16)+minor] = row
|
||||||
|
|
||||||
|
# Separate encodings that have no gain (are decided) and those having
|
||||||
|
# possible gain (possibly to be merged into others.)
|
||||||
|
encodings = sorted(encodings.values(), key=_Encoding.__len__, reverse=True)
|
||||||
|
done_by_width = defaultdict(list)
|
||||||
|
todo = []
|
||||||
|
for encoding in encodings:
|
||||||
|
if not encoding.gain:
|
||||||
|
done_by_width[encoding.width].append(encoding)
|
||||||
|
else:
|
||||||
|
todo.append(encoding)
|
||||||
|
|
||||||
|
# For each encoding that is possibly to be merged, find the best match
|
||||||
|
# in the decided encodings, and record that.
|
||||||
|
todo.sort(key=_Encoding.get_room)
|
||||||
|
for encoding in todo:
|
||||||
|
encoding._find_yourself_best_new_encoding(done_by_width)
|
||||||
|
|
||||||
|
# Walk through todo encodings, for each, see if merging it with
|
||||||
|
# another todo encoding gains more than each of them merging with
|
||||||
|
# their best decided encoding. If yes, merge them and add resulting
|
||||||
|
# encoding back to todo queue. If not, move the enconding to decided
|
||||||
|
# list. Repeat till done.
|
||||||
|
while todo:
|
||||||
|
encoding = todo.pop()
|
||||||
|
best_idx = None
|
||||||
|
best_gain = 0
|
||||||
|
for i,other_encoding in enumerate(todo):
|
||||||
|
combined_chars = other_encoding.chars | encoding.chars
|
||||||
|
combined_width = _Encoding._popcount(combined_chars)
|
||||||
|
combined_overhead = _Encoding._characteristic_overhead(combined_chars)
|
||||||
|
combined_gain = (
|
||||||
|
+ encoding.overhead
|
||||||
|
+ other_encoding.overhead
|
||||||
|
- combined_overhead
|
||||||
|
- (combined_width - encoding.width) * len(encoding)
|
||||||
|
- (combined_width - other_encoding.width) * len(other_encoding)
|
||||||
|
)
|
||||||
|
this_gain = 0 if encoding.best_new_encoding is None else (
|
||||||
|
+ encoding.overhead
|
||||||
|
- (encoding.best_new_encoding.width - encoding.width) * len(encoding)
|
||||||
|
)
|
||||||
|
other_gain = 0 if other_encoding.best_new_encoding is None else (
|
||||||
|
+ other_encoding.overhead
|
||||||
|
- (other_encoding.best_new_encoding.width - other_encoding.width) * len(other_encoding)
|
||||||
|
)
|
||||||
|
separate_gain = this_gain + other_gain
|
||||||
|
|
||||||
|
if combined_gain > separate_gain:
|
||||||
|
best_idx = i
|
||||||
|
best_gain = combined_gain - separate_gain
|
||||||
|
|
||||||
|
if best_idx is None:
|
||||||
|
# Encoding is decided as is
|
||||||
|
done_by_width[encoding.width].append(encoding)
|
||||||
|
else:
|
||||||
|
other_encoding = todo[best_idx]
|
||||||
|
combined_chars = other_encoding.chars | encoding.chars
|
||||||
|
combined_encoding = _Encoding(combined_chars)
|
||||||
|
combined_encoding.extend(encoding.items)
|
||||||
|
combined_encoding.extend(other_encoding.items)
|
||||||
|
combined_encoding._find_yourself_best_new_encoding(done_by_width)
|
||||||
|
del todo[best_idx]
|
||||||
|
todo.append(combined_encoding)
|
||||||
|
|
||||||
|
# Assemble final store.
|
||||||
|
back_mapping = {} # Mapping from full rows to new VarIdxes
|
||||||
|
encodings = sum(done_by_width.values(), [])
|
||||||
|
encodings.sort(key=_Encoding.sort_key)
|
||||||
|
self.VarData = []
|
||||||
|
for major,encoding in enumerate(encodings):
|
||||||
|
data = ot.VarData()
|
||||||
|
self.VarData.append(data)
|
||||||
|
data.VarRegionIndex = range(n)
|
||||||
|
data.VarRegionCount = len(data.VarRegionIndex)
|
||||||
|
data.Item = sorted(encoding.items)
|
||||||
|
for minor,item in enumerate(data.Item):
|
||||||
|
back_mapping[item] = (major<<16)+minor
|
||||||
|
|
||||||
|
# Compile final mapping.
|
||||||
|
varidx_map = {}
|
||||||
|
for k,v in front_mapping.items():
|
||||||
|
varidx_map[k] = back_mapping[v]
|
||||||
|
|
||||||
|
# Remove unused regions.
|
||||||
self.prune_regions()
|
self.prune_regions()
|
||||||
|
|
||||||
# Recalculate things and go home.
|
# Recalculate things and go home.
|
||||||
@ -252,7 +445,7 @@ def VarStore_optimize(self):
|
|||||||
data.ItemCount = len(data.Item)
|
data.ItemCount = len(data.Item)
|
||||||
VarData_CalculateNumShorts(data)
|
VarData_CalculateNumShorts(data)
|
||||||
|
|
||||||
return mapping
|
return varidx_map
|
||||||
|
|
||||||
ot.VarStore.optimize = VarStore_optimize
|
ot.VarStore.optimize = VarStore_optimize
|
||||||
|
|
||||||
@ -283,7 +476,13 @@ def main(args=None):
|
|||||||
#size = len(writer.getAllData())
|
#size = len(writer.getAllData())
|
||||||
#print("Before: %7d bytes" % size)
|
#print("Before: %7d bytes" % size)
|
||||||
|
|
||||||
store.optimize()
|
varidx_map = store.optimize()
|
||||||
|
|
||||||
|
gdef.table.remap_device_varidxes(varidx_map)
|
||||||
|
if 'GSUB' in font:
|
||||||
|
font['GSUB'].table.remap_device_varidxes(varidx_map)
|
||||||
|
if 'GPOS' in font:
|
||||||
|
font['GPOS'].table.remap_device_varidxes(varidx_map)
|
||||||
|
|
||||||
writer = OTTableWriter()
|
writer = OTTableWriter()
|
||||||
store.compile(writer, font)
|
store.compile(writer, font)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user