fonttools/Lib/fontTools/cffLib/specializer.py

# -*- coding: utf-8 -*-

"""T2CharString operator specializer and generalizer."""

from __future__ import print_function, division, absolute_import
from fontTools.misc.py23 import *

def programToCommands(program):
	"""Takes a T2CharString program list and returns list of commands.
	Each command is a two-tuple of commandname,arg-list.  The commandname might
	be None if no commandname shall be emitted (used for glyph width (TODO),
	hintmask/cntrmask argument, as well as stray arguments at the end of the
	program (¯\_(ツ)_/¯)."""

	commands = []
	stack = []
	it = iter(program)
	for token in it:
		if not isinstance(token, basestring):
			stack.append(token)
			continue
		if token in {'hintmask', 'cntrmask'}:
			if stack:
				commands.append((None, stack))
			commands.append((token, []))
			commands.append((None, [next(it)]))
		else:
			commands.append((token,stack))
		stack = []
	if stack:
		commands.append((None, stack))
	return commands

def commandsToProgram(commands):
	"""Takes a commands list as returned by programToCommands() and converts
	it back to a T2CharString program list."""
	program = []
	for op,args in commands:
		program.extend(args)
		if op:
			program.append(op)
	return program


def _everyN(el, n):
	"""Group the list el into groups of size n"""
	if len(el) % n != 0: raise ValueError(el)
	for i in range(0, len(el), n):
		yield el[i:i+n]


class _GeneralizerDecombinerCommandsMap(object):

	@staticmethod
	def rmoveto(args):
		if len(args) != 2: raise ValueError(args)
		yield ('rmoveto', args)
	@staticmethod
	def hmoveto(args):
		if len(args) != 1: raise ValueError(args)
		yield ('rmoveto', [args[0], 0])
	@staticmethod
	def vmoveto(args):
		if len(args) != 1: raise ValueError(args)
		yield ('rmoveto', [0, args[0]])

	@staticmethod
	def rlineto(args):
		for args in _everyN(args, 2):
			yield ('rlineto', args)
	@staticmethod
	def hlineto(args):
		it = iter(args)
		while True:
			yield ('rlineto', [next(it), 0])
			yield ('rlineto', [0, next(it)])
	@staticmethod
	def vlineto(args):
		it = iter(args)
		while True:
			yield ('rlineto', [0, next(it)])
			yield ('rlineto', [next(it), 0])

	@staticmethod
	def rrcurveto(args):
		for args in _everyN(args, 6):
			yield ('rrcurveto', args)
	@staticmethod
	def hhcurveto(args):
		if len(args) < 4 or len(args) % 4 > 1: raise ValueError(args)
		if len(args) % 2 == 1:
			yield ('rrcurveto', [args[1], args[0], args[2], args[3], args[4], 0])
			args = args[5:]
		for args in _everyN(args, 4):
			yield ('rrcurveto', [args[0], 0, args[1], args[2], args[3], 0])
	@staticmethod
	def vvcurveto(args):
		if len(args) < 4 or len(args) % 4 > 1: raise ValueError(args)
		if len(args) % 2 == 1:
			yield ('rrcurveto', [args[0], args[1], args[2], args[3], 0, args[4]])
			args = args[5:]
		for args in _everyN(args, 4):
			yield ('rrcurveto', [0, args[0], args[1], args[2], 0, args[3]])
	@staticmethod
	def hvcurveto(args):
		if len(args) < 4 or len(args) % 8 not in {0,1,4,5}: raise ValueError(args)
		last_args = None
		if len(args) % 2 == 1:
			lastStraight = len(args) % 8 == 5
			args, last_args = args[:-5], args[-5:]
		it = _everyN(args, 4)
		try:
			while True:
				args = next(it)
				yield ('rrcurveto', [args[0], 0, args[1], args[2], 0, args[3]])
				args = next(it)
				yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], 0])
		except StopIteration:
			pass
		if last_args:
			args = last_args
			if lastStraight:
				yield ('rrcurveto', [args[0], 0, args[1], args[2], args[4], args[3]])
			else:
				yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], args[4]])
	@staticmethod
	def vhcurveto(args):
		if len(args) < 4 or len(args) % 8 not in {0,1,4,5}: raise ValueError(args)
		last_args = None
		if len(args) % 2 == 1:
			lastStraight = len(args) % 8 == 5
			args, last_args = args[:-5], args[-5:]
		it = _everyN(args, 4)
		try:
			while True:
				args = next(it)
				yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], 0])
				args = next(it)
				yield ('rrcurveto', [args[0], 0, args[1], args[2], 0, args[3]])
		except StopIteration:
			pass
		if last_args:
			args = last_args
			if lastStraight:
				yield ('rrcurveto', [0, args[0], args[1], args[2], args[3], args[4]])
			else:
				yield ('rrcurveto', [args[0], 0, args[1], args[2], args[4], args[3]])

	@staticmethod
	def rcurveline(args):
		if len(args) < 8 or len(args) % 6 != 2: raise ValueError(args)
		args, last_args = args[:-2], args[-2:]
		for args in _everyN(args, 6):
			yield ('rrcurveto', args)
		yield ('rlineto', last_args)
	@staticmethod
	def rlinecurve(args):
		if len(args) < 8 or len(args) % 2 != 0: raise ValueError(args)
		args, last_args = args[:-6], args[-6:]
		for args in _everyN(args, 2):
			yield ('rlineto', args)
		yield ('rrcurveto', last_args)


def generalizeCommands(commands, ignoreErrors=True):
	result = []
	mapping = _GeneralizerDecombinerCommandsMap
	for op,args in commands:
		func = getattr(mapping, op if op else '', None)
		if not func:
			result.append((op,args))
			continue
		try:
			for command in func(args):
				result.append(command)
		except ValueError:
			if ignoreErrors:
				# Store op as data, such that consumers of commands do not have to
				# deal with incorrect number of arguments.
				result.append((None,args))
				result.append((None, [op]))
			else:
				raise
	return result

def generalizeProgram(program, **kwargs):
	return commandsToProgram(generalizeCommands(programToCommands(program), **kwargs))


def _categorizeVector(v):
	"""
	Takes X,Y vector v and returns one of r, h, v, or 0 depending on which
	of X and/or Y are zero, plus tuple of nonzero ones.  If both are zero,
	it returns a single zero still.

	>>> _categorizeVector((0,0))
	('0', (0,))
	>>> _categorizeVector((1,0))
	('h', (1,))
	>>> _categorizeVector((0,2))
	('v', (2,))
	>>> _categorizeVector((1,2))
	('r', (1, 2))
	"""
	if v[0] == 0:
		if v[1] == 0:
			return '0', type(v)((0,))
		else:
			return 'v', v[1:]
	else:
		if v[1] == 0:
			return 'h', v[:1]
		else:
			return 'r', v
	return "rvh0"[(v[1]==0) * 2 + (v[0]==0)]

def _mergeCategories(a, b, dontCare):
	if a == dontCare: return b
	if b == dontCare: return a
	if a == b: return a
	return None

def _applyJoint(a, b, j):
	if j == '.' or a == 'r' or b == 'r': return a, b
	if a != '0':
		c = 'hv'[(a == 'v') ^ (j == '+')]
		assert b == '0' or b == c
		b = c
	else:
		a = 'hv'[(b == 'v') ^ (j == '+')]
	return a, b

def specializeCommands(commands,
		       ignoreErrors=False,
		       generalizeFirst=True,
		       preserveTopology=False,
		       maxstack=48):

	maxstack -= 3 # AFDKO code uses effective 45 maxstack while the spec says 48.

	# We perform several rounds of optimizations.  They are carefully ordered and are:
	#
	# 0. Generalize commands.
	#    This ensures that they are in our expected simple form, with each line/curve only
	#    having arguments for one segment, and using the generic form (rlineto/rrcurveto).
	#    If caller is sure the input is in this form, they can turn off generalization to
	#    save time.
	#
	# 1. Combine successive rmoveto operations.
	#
	# 2. Specialize rmoveto/rlineto/rrcurveto operators into horizontal/vertical variants.
	#    We specialize into some, made-up, variants as well, which simplifies following
	#    passes.
	#
	# 3. Merge or delete redundant operations, if changing topology is allowed.
	#    OpenType spec declares point numbers in CFF undefined.  As such, we happily
	#    change topology.  If client relies on point numbers (in GPOS anchors, or for
	#    hinting purposes(what?)) they can turn this off.
	#
	# 4. Peephole optimization to revert back some of the h/v variants back into their
	#    original "relative" operator (rline/rrcurveto) if that saves a byte.
	#
	# 5. Combine adjacent operators when possible, minding not to go over max stack size.
	#
	# 6. Resolve any remaining made-up operators into real operators.
	#
	# I have convinced myself that this produces optimal bytecode (except for, possibly
	# one byte each time maxstack size prohibits combining.)  YMMV, but you'd be wrong. :-)
	# A dynamic-programming approach can do the same but would be significantly slower.


	# 0. Generalize commands.
	if generalizeFirst:
		commands = generalizeCommands(commands, ignoreErrors=ignoreErrors)
	else:
		commands = commands[:] # Make copy since we modify in-place later.

	# 1. Combine successive rmoveto operations.
	for i in range(len(commands)-1, 0, -1):
		if 'rmoveto' == commands[i][0] == commands[i-1][0]:
			v1, v2 = commands[i-1][1], commands[i][1]
			commands[i-1] = ('rmoveto', [v1[0]+v2[0], v1[1]+v2[1]])
			del commands[i]

	# 2. Specialize rmoveto/rlineto/rrcurveto operators into horizontal/vertical variants.
	#
	# We, in fact, specialize into more, made-up, variants that special-case when both
	# X and Y components are zero.  This simplifies the following optimization passes.
	# This case is rare, but OCD does not let me skip it.
	#
	# After this round, we will have four variants that use the following mnemonics:
	#
	#  - 'r' for relative,   ie. non-zero X and non-zero Y,
	#  - 'h' for horizontal, ie. zero X and non-zero Y,
	#  - 'v' for vertical,   ie. non-zero X and zero Y,
	#  - '0' for zeros,      ie. zero X and zero Y.
	#
	# The '0' pseudo-operators are not part of the spec, but help simplify the following
	# optimization rounds.  We resolve them at the end.  So, after this, we will have four
	# moveto and four lineto variants:
	#
	#  - 0moveto, 0lineto
	#  - hmoveto, hlineto
	#  - vmoveto, vlineto
	#  - rmoveto, rlineto
	#
	# and sixteen curveto variants.  For example, a '0hcurveto' operator means a curve
	# dx0,dy0,dx1,dy1,dx2,dy2,dx3,dy3 where dx0, dx1, and dy3 are zero but not dx3.
	# An 'rvcurveto' means dx3 is zero but not dx0,dy0,dy3.
	#
	# There are nine different variants of curves without the '0'.  Those nine map exactly
	# to the existing curve variants in the spec: rrcurveto, and the four variants hhcurveto,
	# vvcurveto, hvcurveto, and vhcurveto each cover two cases, one with an odd number of
	# arguments and one without.  Eg. an hhcurveto with an extra argument (odd number of
	# arguments) is in fact an rhcurveto.  The operators in the spec are designed such that
	# all four of rhcurveto, rvcurveto, hrcurveto, and vrcurveto are encodable.
	#
	# Of the curve types with '0', the 00curveto is equivalent to a lineto variant.  The rest
	# of the curve types with a 0 need to be encoded as a h or v variant.  Ie. a '0' can be
	# thought of a "don't care" and can be used as either an 'h' or a 'v'.  As such, we always
	# encode a number 0 as argument when we use a '0' variant.  Later on, we can just substitute
	# the '0' with either 'h' or 'v' and it works.
	#
	# When we get to curve splines however, things become more complicated. When we have a
	# curve spline that starts and ends horizontally, there are two different cases, one
	# where all curves start and end horizontally, another when curves alternate between
	# horizontal-vertical and vertical-horizontal.  To distinguish these cases, we use an
	# extra character in the pseudo-operator names that signifies the spline type:
	#
	#  - '+' means a spline where curves alternate between horizontal and vertical orientations,
	#        and is called a "pizza-slice" spline.
	#  - '=' means a spline where all curves start and end in the same orientation (h or v),
	#        and is called a "french-fries" spline.
	#  - '.' means "don't care", ie. the spline can be encoded / treated as either a pizzal-slice
	#        or french-fries.  This happens where 0s are involved in curves, because those can
	#        be encoded as both h and v.
	#
	# So, from here one, we use rrcurveto as is, but for other variants of curves we use three
	# mnemonic signifiers.  For example, 'h+vcurveto', or 'r.0curveto'.  Rules for combining
	# these will be defined later.
	#
	# There's one more complexity with splines.  If one side of the spline is not horizontal or
	# vertical (or zero), ie. if it's 'r', then it limits which spline types we can encode.
	# Only spline type '=' can start with an 'r' (hhcurveto and vvcurveto operators), and
	# only spline type '+' can end in an 'r' (hvcurveto and vhcurveto operators).
	#
	for i in range(len(commands)):
		op,args = commands[i]

		if op in {'rmoveto', 'rlineto'}:
			c, args = _categorizeVector(args)
			commands[i] = c+op[1:], args
			continue

		if op == 'rrcurveto':
			c1, args1 = _categorizeVector(args[:2])
			c2, args2 = _categorizeVector(args[-2:])
			if c1 == c2 == 'r':
				continue

			join = '.'
			if c1 == 'r':
				join = '='
			elif c2 == 'r':
				join = '+'
			elif c1 != '0' and c2 != '0':
				# Both sides are h and/or v
				join = '=' if c1 == c2 else '+'

			commands[i] = c1+join+c2+'curveto', args1+args[2:4]+args2
			continue

	# 3. Merge or delete redundant operations, if changing topology is allowed.
	if not preserveTopology:
		for i in range(len(commands)-1, -1, -1):
			op, args = commands[i]

			# A 0x0curveto is demoted to a (specialized) lineto.
			if op == '0x0curveto':
				assert len(args) == 4
				c, args = _categorizeVector(args[1:3])
				op = c+'lineto'
				commands[i] = op, args
				# and then...

			# A 0lineto can be deleted.
			if op == '0lineto':
				del commands[i]
				continue

			# Merge adjacent hlineto's and vlineto's.
			if i and op in {'hlineto', 'vlineto'} and op == commands[i-1][0]:
				_, other_args = commands[i-1]
				assert len(args) == 1 and len(other_args) == 1
				commands[i-1] = (op, [other_args[0]+args[0]])
				del commands[i]
				continue

	# 4. Peephole optimization to revert back some of the h/v variants back into their
	#    original "relative" operator (rline/rrcurveto) if that saves a byte.
	for i in range(1, len(commands)-1):
		op,args = commands[i]
		prv,nxt = commands[i-1][0], commands[i+1][0]

		if op in {'0lineto', 'hlineto', 'vlineto'} and prv == nxt == 'rlineto':
			assert len(args) == 1
			args = [0, args[0]] if op[0] == 'v' else [args[0], 0]
			commands[i] = ('rlineto', args)
			continue

		if op[3:] == 'curveto' and len(args) == 5 and prv == nxt == 'rrcurveto':
			assert (op[0] == 'r') ^ (op[2] == 'r')
			args = args[:]
			if op[0] == 'v':
				pos = 0
			elif op[0] != 'r':
				pos = 1
			elif op[1] == 'v':
				pos = 4
			else:
				pos = 5
			args.insert(pos, 0)
			commands[i] = ('rrcurveto', args)
			continue

	# 5. Combine adjacent operators when possible, minding not to go over max stack size.
	for i in range(len(commands)-1, 0, -1):
		op1,args1 = commands[i-1]
		op2,args2 = commands[i]
		new_op = None

		# Merge logic...
		if {op1, op2} <= {'rlineto', 'rrcurveto'}:
			if op1 == op2:
				new_op = op1
			else:
				if op2 == 'rrcurveto' and len(args2) == 6:
					new_op = 'rlinecurve'
				elif len(args2) == 2:
					new_op = 'rcurveline'
		elif {op1, op2} == {'vlineto', 'hlineto'}:
			new_op = op1
		elif 'curveto' == op1[3:] == op2[3:]:
			# Two curves can merge if their spline types are compatible, ie.
			# at least one is a wildcard spline ('.') or otherwise the two are
			# both pizza ('+') or both fries ('='), and
			#
			# The joining orientations are NOT 'r', and are compatible, ie.
			# at least one is a '0', or otherwise they are both 'h' or both
			# 'v'.
			#
			# The _mergeCategories() function does such compatibility matching.

			d0, j1, d1 = op1[:3]
			d2, j2, d3 = op2[:3]

			j = _mergeCategories(j1, j2, '.')
			d = _mergeCategories(d1, d2, '0')
			if j and d and d != 'r':

				if j == '.' and d != '0':
					# Need to resolve join, if middle is oriented but
					# join type is free...  Happens for example for:
					#
					# 0 0 1 2 3 0 rrcurveto 4 0 5 6 0 0 rrcurveto
					#
					# which can be combined both into a h=hcurveto, or
					# a v+vcurveto.  But would be wrong to combine into
					# 0.0curveto.  That would lose the orientation of the
					# middle segments!!
					#
					# Ok, this is one place that now I'm convinced my model
					# is not powerful enough... and maybe dynamic-programming
					# is needed after all.  I'll keep thinking about how
					# to fix this without too much work...
					j = '=' # XXX arbitrary

				# Propagate...
				d0,d = _applyJoint(d0, d, j) # WRONG?
				d,d3 = _applyJoint(d, d3, j) # WRONG?
				d0,d = _applyJoint(d0, d, j) # WRONG?

				new_op = d0+j+d3+'curveto'

		if new_op and len(args1) + len(args2) <= maxstack:
			commands[i-1] = (new_op, args1+args2)
			del commands[i]

	# 6. Resolve any remaining made-up operators into real operators.
	for i in range(len(commands)):
		op,args = commands[i]

		if op in {'0moveto', '0lineto'}:
			commands[i] = 'h'+op[1:], args
			continue

		if op[3:] == 'curveto':
			if op[0] == 'r' or op[2] == 'r':
				assert len(args) % 2 == 1
			if op[1] == '+':
				if (op[0] == 'v' or
				    (op[2] == 'h' and len(args) % 8 >= 4) or
				    (op[2] == 'v' and len(args) % 8 <  4)):
					op = 'vhcurveto'
				else:
					op = 'hvcurveto'

				if len(args) % 2 == 1 and ((op[0] == 'h') ^ (len(args) % 8 == 1)):
					# Swap last two args order
					args = args[:-2]+args[-1:]+args[-2:-1]
			else:
				op = 'vvcurveto' if op[0] == 'v' or op[2] == 'v' else 'hhcurveto'
				if len(args) % 2 == 1 and op[0] == 'h':
					# Swap first two args order
					args = args[1:2]+args[:1]+args[2:]
			commands[i] = op, args
			continue

	return commands

def specializeProgram(program, **kwargs):
	return commandsToProgram(specializeCommands(programToCommands(program), **kwargs))


if __name__ == '__main__':
	import sys
	if len(sys.argv) == 1:
		import doctest
		sys.exit(doctest.testmod().failed)
	program = []
	for token in sys.argv[1:]:
		try:
			token = int(token)
		except ValueError:
			try:
				token = float(token)
			except ValueError:
				pass
		program.append(token)
	print("Program:"); print(program)
	commands = programToCommands(program)
	print("Commands:"); print(commands)
	program2 = commandsToProgram(commands)
	print("Program from commands:"); print(program2)
	assert program == program2
	print("Generalized program:"); print(generalizeProgram(program))
	print("Specialized program:"); print(specializeProgram(program))