Merge pull request #913 from anthrotype/vtt-tsi-length

[TSI*] Fix computation of the textLength of VTT private tables entries
2017-04-11 10:20:36 +01:00 · 2017-04-11 10:20:36 +01:00 · 7de51cff57
commit 7de51cff57
parent e5e795ac9f f1e9bf30e4
9 changed files with 393 additions and 31 deletions
--- a/Lib/fontTools/misc/loggingTools.py
+++ b/Lib/fontTools/misc/loggingTools.py
@ -463,6 +463,13 @@ class CapturingLogHandler(logging.Handler):
 	def createLock(self):
 		self.lock = None

+	def assertRegex(self, regexp):
+		import re
+		pattern = re.compile(regexp)
+		for r in self.records:
+			if pattern.search(r.msg):
+				return True
+		assert 0, "Pattern '%s' not found in logger records" % regexp

 def deprecateArgument(name, msg, category=UserWarning):
 	""" Raise a warning about deprecated function argument 'name'. """
--- a/Lib/fontTools/misc/py23.py
+++ b/Lib/fontTools/misc/py23.py
@ -477,6 +477,25 @@ if not hasattr(logging, 'lastResort'):
 	logging.setLoggerClass(_Logger)


+try:
+	from types import SimpleNamespace
+except ImportError:
+	class SimpleNamespace(object):
+		"""
+		A backport of Python 3.3's ``types.SimpleNamespace``.
+		"""
+		def __init__(self, **kwargs):
+			self.__dict__.update(kwargs)
+
+		def __repr__(self):
+			keys = sorted(self.__dict__)
+			items = ("{0}={1!r}".format(k, self.__dict__[k]) for k in keys)
+			return "{0}({1})".format(type(self).__name__, ", ".join(items))
+
+		def __eq__(self, other):
+			return self.__dict__ == other.__dict__
+
+
 if __name__ == "__main__":
 	import doctest, sys
 	sys.exit(doctest.testmod().failed)
--- a/Lib/fontTools/ttLib/tables/T_S_I__0.py
+++ b/Lib/fontTools/ttLib/tables/T_S_I__0.py
@ -1,9 +1,16 @@
+""" TSI{0,1,2,3,5} are private tables used by Microsoft Visual TrueType (VTT)
+tool to store its hinting source data.
+
+TSI0 is the index table containing the lengths and offsets for the glyph
+programs and 'extra' programs ('fpgm', 'prep', and 'cvt') that are contained
+in the TSI1 table.
+"""
 from __future__ import print_function, division, absolute_import
 from fontTools.misc.py23 import *
 from . import DefaultTable
 import struct

-tsi0Format = '>HHl'
+tsi0Format = '>HHL'

 def fixlongs(glyphID, textLength, textOffset):
 	return int(glyphID), int(textLength), textOffset
@ -22,7 +29,7 @@ class table_T_S_I__0(DefaultTable.DefaultTable):
 			indices.append((glyphID, textLength, textOffset))
 			data = data[size:]
 		assert len(data) == 0
-		assert indices[-5] == (0XFFFE, 0, -1409540300), "bad magic number"  # 0xABFC1F34
+		assert indices[-5] == (0XFFFE, 0, 0xABFC1F34), "bad magic number"
 		self.indices = indices[:-5]
 		self.extra_indices = indices[-4:]

@ -30,11 +37,11 @@ class table_T_S_I__0(DefaultTable.DefaultTable):
 		if not hasattr(self, "indices"):
 			# We have no corresponding table (TSI1 or TSI3); let's return
 			# no data, which effectively means "ignore us".
-			return ""
+			return b""
 		data = b""
 		for index, textLength, textOffset in self.indices:
 			data = data + struct.pack(tsi0Format, index, textLength, textOffset)
-		data = data + struct.pack(tsi0Format, 0XFFFE, 0, -1409540300)  # 0xABFC1F34
+		data = data + struct.pack(tsi0Format, 0XFFFE, 0, 0xABFC1F34)
 		for index, textLength, textOffset in self.extra_indices:
 			data = data + struct.pack(tsi0Format, index, textLength, textOffset)
 		return data
--- a/Lib/fontTools/ttLib/tables/T_S_I__1.py
+++ b/Lib/fontTools/ttLib/tables/T_S_I__1.py
@ -1,6 +1,14 @@
+""" TSI{0,1,2,3,5} are private tables used by Microsoft Visual TrueType (VTT)
+tool to store its hinting source data.
+
+TSI1 contains the text of the glyph programs in the form of low-level assembly
+code, as well as the 'extra' programs 'fpgm', 'ppgm' (i.e. 'prep'), and 'cvt'.
+"""
 from __future__ import print_function, division, absolute_import
 from fontTools.misc.py23 import *
 from . import DefaultTable
+import logging
+

 class table_T_S_I__1(DefaultTable.DefaultTable):

@ -8,33 +16,69 @@ class table_T_S_I__1(DefaultTable.DefaultTable):

 	indextable = "TSI0"

-	def decompile(self, data, ttFont):
-		indextable = ttFont[self.indextable]
-		self.glyphPrograms = {}
-		for i in range(len(indextable.indices)):
-			glyphID, textLength, textOffset = indextable.indices[i]
-			if textLength == 0x8000:
-				# Ugh. Hi Beat!
-				textLength = indextable.indices[i+1][1]
-			if textLength > 0x8000:
-				pass  # XXX Hmmm.
-			text = data[textOffset:textOffset+textLength]
-			assert len(text) == textLength
-			if text:
-				self.glyphPrograms[ttFont.getGlyphName(glyphID)] = text
+	def __init__(self, tag=None):
+		super(table_T_S_I__1, self).__init__(tag)
+		self.log = logging.getLogger(self.__class__.__module__)

-		self.extraPrograms = {}
-		for i in range(len(indextable.extra_indices)):
-			extraCode, textLength, textOffset = indextable.extra_indices[i]
-			if textLength == 0x8000:
-				if self.extras[extraCode] == "fpgm":	# this is the last one
-					textLength = len(data) - textOffset
+	def decompile(self, data, ttFont):
+		totalLength = len(data)
+		indextable = ttFont[self.indextable]
+		for indices, isExtra in zip(
+				(indextable.indices, indextable.extra_indices), (False, True)):
+			programs = {}
+			for i, (glyphID, textLength, textOffset) in enumerate(indices):
+				if isExtra:
+					name = self.extras[glyphID]
 				else:
-					textLength = indextable.extra_indices[i+1][1]
-			text = data[textOffset:textOffset+textLength]
-			assert len(text) == textLength
-			if text:
-				self.extraPrograms[self.extras[extraCode]] = text
+					name = ttFont.getGlyphName(glyphID)
+				if textOffset > totalLength:
+					self.log.warning("textOffset > totalLength; %r skipped" % name)
+					continue
+				if textLength < 0x8000:
+					# If the length stored in the record is less than 32768, then use
+					# that as the length of the record.
+					pass
+				elif textLength == 0x8000:
+					# If the length is 32768, compute the actual length as follows:
+					isLast = i == (len(indices)-1)
+					if isLast:
+						if isExtra:
+							# For the last "extra" record (the very last record of the
+							# table), the length is the difference between the total
+							# length of the TSI1 table and the textOffset of the final
+							# record.
+							nextTextOffset = totalLength
+						else:
+							# For the last "normal" record (the last record just prior
+							# to the record containing the "magic number"), the length
+							# is the difference between the textOffset of the record
+							# following the "magic number" (0xFFFE) record (i.e. the
+							# first "extra" record), and the textOffset of the last
+							# "normal" record.
+							nextTextOffset = indextable.extra_indices[0][2]
+					else:
+						# For all other records with a length of 0x8000, the length is
+						# the difference between the textOffset of the record in
+						# question and the textOffset of the next record.
+						nextTextOffset = indices[i+1][2]
+					assert nextTextOffset >= textOffset, "entries not sorted by offset"
+					if nextTextOffset > totalLength:
+						self.log.warning(
+							"nextTextOffset > totalLength; %r truncated" % name)
+						nextTextOffset = totalLength
+					textLength = nextTextOffset - textOffset
+				else:
+					from fontTools import ttLib
+					raise ttLib.TTLibError(
+						"%r textLength (%d) must not be > 32768" % (name, textLength))
+				text = data[textOffset:textOffset+textLength]
+				assert len(text) == textLength
+				if text:
+					programs[name] = text
+			if isExtra:
+				self.extraPrograms = programs
+			else:
+				self.glyphPrograms = programs

 	def compile(self, ttFont):
 		if not hasattr(self, "glyphPrograms"):
@ -55,7 +99,7 @@ class table_T_S_I__1(DefaultTable.DefaultTable):
 				text = b""
 			textLength = len(text)
 			if textLength >= 0x8000:
-				textLength = 0x8000  # XXX ???
+				textLength = 0x8000
 			indices.append((i, textLength, len(data)))
 			data = data + text

@ -71,7 +115,7 @@ class table_T_S_I__1(DefaultTable.DefaultTable):
 				text = b""
 			textLength = len(text)
 			if textLength >= 0x8000:
-				textLength = 0x8000  # XXX ???
+				textLength = 0x8000
 			extra_indices.append((code, textLength, len(data)))
 			data = data + text
 		indextable.set(indices, extra_indices)
--- a/Lib/fontTools/ttLib/tables/T_S_I__2.py
+++ b/Lib/fontTools/ttLib/tables/T_S_I__2.py
@ -1,3 +1,10 @@
+""" TSI{0,1,2,3,5} are private tables used by Microsoft Visual TrueType (VTT)
+tool to store its hinting source data.
+
+TSI2 is the index table containing the lengths and offsets for the glyph
+programs that are contained in the TSI3 table. It uses the same format as
+the TSI0 table.
+"""
 from __future__ import print_function, division, absolute_import
 from fontTools.misc.py23 import *
 from fontTools import ttLib
--- a/Lib/fontTools/ttLib/tables/T_S_I__3.py
+++ b/Lib/fontTools/ttLib/tables/T_S_I__3.py
@ -1,3 +1,8 @@
+""" TSI{0,1,2,3,5} are private tables used by Microsoft Visual TrueType (VTT)
+tool to store its hinting source data.
+
+TSI3 contains the text of the glyph programs in the form of 'VTTTalk' code.
+"""
 from __future__ import print_function, division, absolute_import
 from fontTools.misc.py23 import *
 from fontTools import ttLib
--- a/Lib/fontTools/ttLib/tables/T_S_I__5.py
+++ b/Lib/fontTools/ttLib/tables/T_S_I__5.py
@ -1,3 +1,8 @@
+""" TSI{0,1,2,3,5} are private tables used by Microsoft Visual TrueType (VTT)
+tool to store its hinting source data.
+
+TSI5 contains the VTT character groups.
+"""
 from __future__ import print_function, division, absolute_import
 from fontTools.misc.py23 import *
 from fontTools.misc.textTools import safeEval
--- a/Tests/ttLib/tables/T_S_I__0_test.py
+++ b/Tests/ttLib/tables/T_S_I__0_test.py
@ -0,0 +1,106 @@
+from __future__ import print_function, division, absolute_import
+from fontTools.misc.py23 import SimpleNamespace
+from fontTools.misc.textTools import deHexStr
+from fontTools.misc.testTools import getXML
+from fontTools.ttLib.tables.T_S_I__0 import table_T_S_I__0
+import pytest
+
+
+# (gid, length, offset) for glyph programs
+TSI0_INDICES = [
+    (0, 1, 0),
+    (1, 5, 1),
+    (2, 0, 1),
+    (3, 0, 1),
+    (4, 8, 6)]
+
+# (type, length, offset) for 'extra' programs
+TSI0_EXTRA_INDICES = [
+    (0xFFFA, 2, 14),          # ppgm
+    (0xFFFB, 4, 16),          # cvt
+    (0xFFFC, 6, 20),          # reserved
+    (0xFFFD, 10, 26)]         # fpgm
+
+# compiled TSI0 table from data above
+TSI0_DATA = deHexStr(
+    "0000 0001 00000000"
+    "0001 0005 00000001"
+    "0002 0000 00000001"
+    "0003 0000 00000001"
+    "0004 0008 00000006"
+    "FFFE 0000 ABFC1F34"      # 'magic' separates glyph from extra programs
+    "FFFA 0002 0000000E"
+    "FFFB 0004 00000010"
+    "FFFC 0006 00000014"
+    "FFFD 000A 0000001A")
+
+# empty font has no glyph programs but 4 extra programs are always present
+EMPTY_TSI0_EXTRA_INDICES = [
+    (0xFFFA, 0, 0),
+    (0xFFFB, 0, 0),
+    (0xFFFC, 0, 0),
+    (0xFFFD, 0, 0)]
+
+EMPTY_TSI0_DATA = deHexStr(
+    "FFFE 0000 ABFC1F34"
+    "FFFA 0000 00000000"
+    "FFFB 0000 00000000"
+    "FFFC 0000 00000000"
+    "FFFD 0000 00000000")
+
+
+@pytest.fixture
+def table():
+    return table_T_S_I__0()
+
+
+@pytest.mark.parametrize(
+    "numGlyphs, data, expected_indices, expected_extra_indices",
+    [
+        (5, TSI0_DATA, TSI0_INDICES, TSI0_EXTRA_INDICES),
+        (0, EMPTY_TSI0_DATA, [], EMPTY_TSI0_EXTRA_INDICES)
+    ],
+    ids=["simple", "empty"]
+)
+def test_decompile(table, numGlyphs, data, expected_indices,
+                   expected_extra_indices):
+    font = {'maxp': SimpleNamespace(numGlyphs=numGlyphs)}
+
+    table.decompile(data, font)
+
+    assert len(table.indices) == numGlyphs
+    assert table.indices == expected_indices
+    assert len(table.extra_indices) == 4
+    assert table.extra_indices == expected_extra_indices
+
+
+@pytest.mark.parametrize(
+    "numGlyphs, indices, extra_indices, expected_data",
+    [
+        (5, TSI0_INDICES, TSI0_EXTRA_INDICES, TSI0_DATA),
+        (0, [], EMPTY_TSI0_EXTRA_INDICES, EMPTY_TSI0_DATA)
+    ],
+    ids=["simple", "empty"]
+)
+def test_compile(table, numGlyphs, indices, extra_indices, expected_data):
+    assert table.compile(ttFont=None) == b""
+
+    table.set(indices, extra_indices)
+    data = table.compile(ttFont=None)
+    assert data == expected_data
+
+
+def test_set(table):
+    table.set(TSI0_INDICES, TSI0_EXTRA_INDICES)
+    assert table.indices == TSI0_INDICES
+    assert table.extra_indices == TSI0_EXTRA_INDICES
+
+
+def test_toXML(table):
+    assert getXML(table.toXML, ttFont=None) == [
+        '<!-- This table will be calculated by the compiler -->']
+
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(pytest.main(sys.argv))
--- a/Tests/ttLib/tables/T_S_I__1_test.py
+++ b/Tests/ttLib/tables/T_S_I__1_test.py
@ -0,0 +1,162 @@
+from __future__ import print_function, division, absolute_import
+from fontTools.misc.py23 import unichr
+from fontTools.misc.loggingTools import CapturingLogHandler
+from fontTools.ttLib import TTFont, TTLibError
+from fontTools.ttLib.tables.T_S_I__0 import table_T_S_I__0
+from fontTools.ttLib.tables.T_S_I__1 import table_T_S_I__1
+import pytest
+
+
+TSI1_DATA = b"""abcdefghijklmnopqrstuvxywz0123456789"""
+
+
+@pytest.fixture
+def indextable():
+    table = table_T_S_I__0()
+    table.set(
+        [(0, 1, 0),         # gid 0, length=1, offset=0, text='a'
+         (1, 5, 1),         # gid 1, length=5, offset=1, text='bcdef'
+         (2, 0, 1),         # gid 2, length=0, offset=1, text=''
+         (3, 0, 1),         # gid 3, length=0, offset=1, text=''
+         (4, 8, 6)],        # gid 4, length=8, offset=6, text='ghijklmn'
+        [(0xFFFA, 2, 14),   # 'ppgm', length=2, offset=14, text='op'
+         (0xFFFB, 4, 16),   # 'cvt', length=4, offset=16, text='qrst'
+         (0xFFFC, 6, 20),   # 'reserved', length=6, offset=20, text='uvxywz'
+         (0xFFFD, 10, 26)]  # 'fpgm', length=10, offset=26, text='0123456789'
+    )
+    return table
+
+
+@pytest.fixture
+def font(indextable):
+    font = TTFont()
+    # ['a', 'b', 'c', ...]
+    ch = 0x61
+    n = len(indextable.indices)
+    font.glyphOrder = [unichr(i) for i in range(ch, ch+n)]
+    font['TSI0'] = indextable
+    return font
+
+
+@pytest.fixture
+def empty_font():
+    font = TTFont()
+    font.glyphOrder = []
+    indextable = table_T_S_I__0()
+    indextable.set([], [(0xFFFA, 0, 0),
+                        (0xFFFB, 0, 0),
+                        (0xFFFC, 0, 0),
+                        (0xFFFD, 0, 0)])
+    font['TSI0'] = indextable
+    return font
+
+
+def test_decompile(font):
+    table = table_T_S_I__1()
+    table.decompile(TSI1_DATA, font)
+
+    assert table.glyphPrograms == {
+        'a': b'a',
+        'b': b'bcdef',
+        # 'c': b'',  # zero-length entries are skipped
+        # 'd': b'',
+        'e': b'ghijklmn'}
+    assert table.extraPrograms == {
+        'ppgm': b'op',
+        'cvt': b'qrst',
+        'reserved': b'uvxywz',
+        'fpgm': b'0123456789'}
+
+
+def test_decompile_empty(empty_font):
+    table = table_T_S_I__1()
+    table.decompile(b"", empty_font)
+
+    assert table.glyphPrograms == {}
+    assert table.extraPrograms == {}
+
+
+def test_decompile_invalid_length(empty_font):
+    empty_font.glyphOrder = ['a']
+    empty_font['TSI0'].indices = [(0, 0x8000+1, 0)]
+
+    table = table_T_S_I__1()
+    with pytest.raises(TTLibError) as excinfo:
+        table.decompile(b'', empty_font)
+    assert excinfo.match("textLength .* must not be > 32768")
+
+
+def test_decompile_offset_past_end(empty_font):
+    empty_font.glyphOrder = ['foo', 'bar']
+    data = b'baz'
+    empty_font['TSI0'].indices = [(0, len(data), 0), (1, 1, len(data)+1)]
+
+    table = table_T_S_I__1()
+    with CapturingLogHandler(table.log, "WARNING") as captor:
+        table.decompile(data, empty_font)
+
+    # the 'bar' program is skipped because its offset > len(data)
+    assert table.glyphPrograms == {'foo': b'baz'}
+    assert any("textOffset > totalLength" in r.msg for r in captor.records)
+
+
+def test_decompile_magic_length_last_extra(empty_font):
+    indextable = empty_font['TSI0']
+    indextable.extra_indices[-1] = (0xFFFD, 0x8000, 0)
+    data = b"0" * (0x8000 + 1)
+
+    table = table_T_S_I__1()
+    table.decompile(data, empty_font)
+
+    assert table.extraPrograms['fpgm'] == data
+
+
+def test_decompile_magic_length_last_glyph(empty_font):
+    empty_font.glyphOrder = ['foo', 'bar']
+    indextable = empty_font['TSI0']
+    indextable.indices = [
+        (0, 3, 0),
+        (1, 0x8000, 3)]           # the actual length of 'bar' program is
+    indextable.extra_indices = [  # the difference between the first extra's
+        (0xFFFA, 0, 0x8004),      # offset and 'bar' offset: 0x8004 - 3
+        (0xFFFB, 0, 0x8004),
+        (0xFFFC, 0, 0x8004),
+        (0xFFFD, 0, 0x8004)]
+    foo_data = b"0" * 3
+    bar_data = b"1" * (0x8000 + 1)
+    data = foo_data + bar_data
+
+    table = table_T_S_I__1()
+    table.decompile(data, empty_font)
+
+    assert table.glyphPrograms['foo'] == foo_data
+    assert table.glyphPrograms['bar'] == bar_data
+
+
+def test_decompile_magic_length_non_last(empty_font):
+    indextable = empty_font['TSI0']
+    indextable.extra_indices = [
+        (0xFFFA, 3, 0),
+        (0xFFFB, 0x8000, 3),  # the actual length of 'cvt' program is:
+        (0xFFFC, 0, 0x8004),  # nextTextOffset - textOffset: 0x8004 - 3
+        (0xFFFD, 0, 0x8004)]
+    ppgm_data = b"0" * 3
+    cvt_data = b"1" * (0x8000 + 1)
+    data = ppgm_data + cvt_data
+
+    table = table_T_S_I__1()
+    table.decompile(data, empty_font)
+
+    assert table.extraPrograms['ppgm'] == ppgm_data
+    assert table.extraPrograms['cvt'] == cvt_data
+
+    table = table_T_S_I__1()
+    with CapturingLogHandler(table.log, "WARNING") as captor:
+        table.decompile(data[:-1], empty_font)  # last entry is truncated
+    captor.assertRegex("nextTextOffset > totalLength")
+    assert table.extraPrograms['cvt'] == cvt_data[:-1]
+
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(pytest.main(sys.argv))