Merge pull request #913 from anthrotype/vtt-tsi-length

[TSI*] Fix computation of the textLength of VTT private tables entries
This commit is contained in:
Cosimo Lupo 2017-04-11 10:20:36 +01:00 committed by GitHub
commit 7de51cff57
9 changed files with 393 additions and 31 deletions

View File

@ -463,6 +463,13 @@ class CapturingLogHandler(logging.Handler):
def createLock(self):
self.lock = None
def assertRegex(self, regexp):
import re
pattern = re.compile(regexp)
for r in self.records:
if pattern.search(r.msg):
return True
assert 0, "Pattern '%s' not found in logger records" % regexp
def deprecateArgument(name, msg, category=UserWarning):
""" Raise a warning about deprecated function argument 'name'. """

View File

@ -477,6 +477,25 @@ if not hasattr(logging, 'lastResort'):
logging.setLoggerClass(_Logger)
try:
from types import SimpleNamespace
except ImportError:
class SimpleNamespace(object):
"""
A backport of Python 3.3's ``types.SimpleNamespace``.
"""
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
def __repr__(self):
keys = sorted(self.__dict__)
items = ("{0}={1!r}".format(k, self.__dict__[k]) for k in keys)
return "{0}({1})".format(type(self).__name__, ", ".join(items))
def __eq__(self, other):
return self.__dict__ == other.__dict__
if __name__ == "__main__":
import doctest, sys
sys.exit(doctest.testmod().failed)

View File

@ -1,9 +1,16 @@
""" TSI{0,1,2,3,5} are private tables used by Microsoft Visual TrueType (VTT)
tool to store its hinting source data.
TSI0 is the index table containing the lengths and offsets for the glyph
programs and 'extra' programs ('fpgm', 'prep', and 'cvt') that are contained
in the TSI1 table.
"""
from __future__ import print_function, division, absolute_import
from fontTools.misc.py23 import *
from . import DefaultTable
import struct
tsi0Format = '>HHl'
tsi0Format = '>HHL'
def fixlongs(glyphID, textLength, textOffset):
return int(glyphID), int(textLength), textOffset
@ -22,7 +29,7 @@ class table_T_S_I__0(DefaultTable.DefaultTable):
indices.append((glyphID, textLength, textOffset))
data = data[size:]
assert len(data) == 0
assert indices[-5] == (0XFFFE, 0, -1409540300), "bad magic number" # 0xABFC1F34
assert indices[-5] == (0XFFFE, 0, 0xABFC1F34), "bad magic number"
self.indices = indices[:-5]
self.extra_indices = indices[-4:]
@ -30,11 +37,11 @@ class table_T_S_I__0(DefaultTable.DefaultTable):
if not hasattr(self, "indices"):
# We have no corresponding table (TSI1 or TSI3); let's return
# no data, which effectively means "ignore us".
return ""
return b""
data = b""
for index, textLength, textOffset in self.indices:
data = data + struct.pack(tsi0Format, index, textLength, textOffset)
data = data + struct.pack(tsi0Format, 0XFFFE, 0, -1409540300) # 0xABFC1F34
data = data + struct.pack(tsi0Format, 0XFFFE, 0, 0xABFC1F34)
for index, textLength, textOffset in self.extra_indices:
data = data + struct.pack(tsi0Format, index, textLength, textOffset)
return data

View File

@ -1,6 +1,14 @@
""" TSI{0,1,2,3,5} are private tables used by Microsoft Visual TrueType (VTT)
tool to store its hinting source data.
TSI1 contains the text of the glyph programs in the form of low-level assembly
code, as well as the 'extra' programs 'fpgm', 'ppgm' (i.e. 'prep'), and 'cvt'.
"""
from __future__ import print_function, division, absolute_import
from fontTools.misc.py23 import *
from . import DefaultTable
import logging
class table_T_S_I__1(DefaultTable.DefaultTable):
@ -8,33 +16,69 @@ class table_T_S_I__1(DefaultTable.DefaultTable):
indextable = "TSI0"
def decompile(self, data, ttFont):
indextable = ttFont[self.indextable]
self.glyphPrograms = {}
for i in range(len(indextable.indices)):
glyphID, textLength, textOffset = indextable.indices[i]
if textLength == 0x8000:
# Ugh. Hi Beat!
textLength = indextable.indices[i+1][1]
if textLength > 0x8000:
pass # XXX Hmmm.
text = data[textOffset:textOffset+textLength]
assert len(text) == textLength
if text:
self.glyphPrograms[ttFont.getGlyphName(glyphID)] = text
def __init__(self, tag=None):
super(table_T_S_I__1, self).__init__(tag)
self.log = logging.getLogger(self.__class__.__module__)
self.extraPrograms = {}
for i in range(len(indextable.extra_indices)):
extraCode, textLength, textOffset = indextable.extra_indices[i]
if textLength == 0x8000:
if self.extras[extraCode] == "fpgm": # this is the last one
textLength = len(data) - textOffset
def decompile(self, data, ttFont):
totalLength = len(data)
indextable = ttFont[self.indextable]
for indices, isExtra in zip(
(indextable.indices, indextable.extra_indices), (False, True)):
programs = {}
for i, (glyphID, textLength, textOffset) in enumerate(indices):
if isExtra:
name = self.extras[glyphID]
else:
textLength = indextable.extra_indices[i+1][1]
text = data[textOffset:textOffset+textLength]
assert len(text) == textLength
if text:
self.extraPrograms[self.extras[extraCode]] = text
name = ttFont.getGlyphName(glyphID)
if textOffset > totalLength:
self.log.warning("textOffset > totalLength; %r skipped" % name)
continue
if textLength < 0x8000:
# If the length stored in the record is less than 32768, then use
# that as the length of the record.
pass
elif textLength == 0x8000:
# If the length is 32768, compute the actual length as follows:
isLast = i == (len(indices)-1)
if isLast:
if isExtra:
# For the last "extra" record (the very last record of the
# table), the length is the difference between the total
# length of the TSI1 table and the textOffset of the final
# record.
nextTextOffset = totalLength
else:
# For the last "normal" record (the last record just prior
# to the record containing the "magic number"), the length
# is the difference between the textOffset of the record
# following the "magic number" (0xFFFE) record (i.e. the
# first "extra" record), and the textOffset of the last
# "normal" record.
nextTextOffset = indextable.extra_indices[0][2]
else:
# For all other records with a length of 0x8000, the length is
# the difference between the textOffset of the record in
# question and the textOffset of the next record.
nextTextOffset = indices[i+1][2]
assert nextTextOffset >= textOffset, "entries not sorted by offset"
if nextTextOffset > totalLength:
self.log.warning(
"nextTextOffset > totalLength; %r truncated" % name)
nextTextOffset = totalLength
textLength = nextTextOffset - textOffset
else:
from fontTools import ttLib
raise ttLib.TTLibError(
"%r textLength (%d) must not be > 32768" % (name, textLength))
text = data[textOffset:textOffset+textLength]
assert len(text) == textLength
if text:
programs[name] = text
if isExtra:
self.extraPrograms = programs
else:
self.glyphPrograms = programs
def compile(self, ttFont):
if not hasattr(self, "glyphPrograms"):
@ -55,7 +99,7 @@ class table_T_S_I__1(DefaultTable.DefaultTable):
text = b""
textLength = len(text)
if textLength >= 0x8000:
textLength = 0x8000 # XXX ???
textLength = 0x8000
indices.append((i, textLength, len(data)))
data = data + text
@ -71,7 +115,7 @@ class table_T_S_I__1(DefaultTable.DefaultTable):
text = b""
textLength = len(text)
if textLength >= 0x8000:
textLength = 0x8000 # XXX ???
textLength = 0x8000
extra_indices.append((code, textLength, len(data)))
data = data + text
indextable.set(indices, extra_indices)

View File

@ -1,3 +1,10 @@
""" TSI{0,1,2,3,5} are private tables used by Microsoft Visual TrueType (VTT)
tool to store its hinting source data.
TSI2 is the index table containing the lengths and offsets for the glyph
programs that are contained in the TSI3 table. It uses the same format as
the TSI0 table.
"""
from __future__ import print_function, division, absolute_import
from fontTools.misc.py23 import *
from fontTools import ttLib

View File

@ -1,3 +1,8 @@
""" TSI{0,1,2,3,5} are private tables used by Microsoft Visual TrueType (VTT)
tool to store its hinting source data.
TSI3 contains the text of the glyph programs in the form of 'VTTTalk' code.
"""
from __future__ import print_function, division, absolute_import
from fontTools.misc.py23 import *
from fontTools import ttLib

View File

@ -1,3 +1,8 @@
""" TSI{0,1,2,3,5} are private tables used by Microsoft Visual TrueType (VTT)
tool to store its hinting source data.
TSI5 contains the VTT character groups.
"""
from __future__ import print_function, division, absolute_import
from fontTools.misc.py23 import *
from fontTools.misc.textTools import safeEval

View File

@ -0,0 +1,106 @@
from __future__ import print_function, division, absolute_import
from fontTools.misc.py23 import SimpleNamespace
from fontTools.misc.textTools import deHexStr
from fontTools.misc.testTools import getXML
from fontTools.ttLib.tables.T_S_I__0 import table_T_S_I__0
import pytest
# (gid, length, offset) for glyph programs
TSI0_INDICES = [
(0, 1, 0),
(1, 5, 1),
(2, 0, 1),
(3, 0, 1),
(4, 8, 6)]
# (type, length, offset) for 'extra' programs
TSI0_EXTRA_INDICES = [
(0xFFFA, 2, 14), # ppgm
(0xFFFB, 4, 16), # cvt
(0xFFFC, 6, 20), # reserved
(0xFFFD, 10, 26)] # fpgm
# compiled TSI0 table from data above
TSI0_DATA = deHexStr(
"0000 0001 00000000"
"0001 0005 00000001"
"0002 0000 00000001"
"0003 0000 00000001"
"0004 0008 00000006"
"FFFE 0000 ABFC1F34" # 'magic' separates glyph from extra programs
"FFFA 0002 0000000E"
"FFFB 0004 00000010"
"FFFC 0006 00000014"
"FFFD 000A 0000001A")
# empty font has no glyph programs but 4 extra programs are always present
EMPTY_TSI0_EXTRA_INDICES = [
(0xFFFA, 0, 0),
(0xFFFB, 0, 0),
(0xFFFC, 0, 0),
(0xFFFD, 0, 0)]
EMPTY_TSI0_DATA = deHexStr(
"FFFE 0000 ABFC1F34"
"FFFA 0000 00000000"
"FFFB 0000 00000000"
"FFFC 0000 00000000"
"FFFD 0000 00000000")
@pytest.fixture
def table():
return table_T_S_I__0()
@pytest.mark.parametrize(
"numGlyphs, data, expected_indices, expected_extra_indices",
[
(5, TSI0_DATA, TSI0_INDICES, TSI0_EXTRA_INDICES),
(0, EMPTY_TSI0_DATA, [], EMPTY_TSI0_EXTRA_INDICES)
],
ids=["simple", "empty"]
)
def test_decompile(table, numGlyphs, data, expected_indices,
expected_extra_indices):
font = {'maxp': SimpleNamespace(numGlyphs=numGlyphs)}
table.decompile(data, font)
assert len(table.indices) == numGlyphs
assert table.indices == expected_indices
assert len(table.extra_indices) == 4
assert table.extra_indices == expected_extra_indices
@pytest.mark.parametrize(
"numGlyphs, indices, extra_indices, expected_data",
[
(5, TSI0_INDICES, TSI0_EXTRA_INDICES, TSI0_DATA),
(0, [], EMPTY_TSI0_EXTRA_INDICES, EMPTY_TSI0_DATA)
],
ids=["simple", "empty"]
)
def test_compile(table, numGlyphs, indices, extra_indices, expected_data):
assert table.compile(ttFont=None) == b""
table.set(indices, extra_indices)
data = table.compile(ttFont=None)
assert data == expected_data
def test_set(table):
table.set(TSI0_INDICES, TSI0_EXTRA_INDICES)
assert table.indices == TSI0_INDICES
assert table.extra_indices == TSI0_EXTRA_INDICES
def test_toXML(table):
assert getXML(table.toXML, ttFont=None) == [
'<!-- This table will be calculated by the compiler -->']
if __name__ == "__main__":
import sys
sys.exit(pytest.main(sys.argv))

View File

@ -0,0 +1,162 @@
from __future__ import print_function, division, absolute_import
from fontTools.misc.py23 import unichr
from fontTools.misc.loggingTools import CapturingLogHandler
from fontTools.ttLib import TTFont, TTLibError
from fontTools.ttLib.tables.T_S_I__0 import table_T_S_I__0
from fontTools.ttLib.tables.T_S_I__1 import table_T_S_I__1
import pytest
TSI1_DATA = b"""abcdefghijklmnopqrstuvxywz0123456789"""
@pytest.fixture
def indextable():
table = table_T_S_I__0()
table.set(
[(0, 1, 0), # gid 0, length=1, offset=0, text='a'
(1, 5, 1), # gid 1, length=5, offset=1, text='bcdef'
(2, 0, 1), # gid 2, length=0, offset=1, text=''
(3, 0, 1), # gid 3, length=0, offset=1, text=''
(4, 8, 6)], # gid 4, length=8, offset=6, text='ghijklmn'
[(0xFFFA, 2, 14), # 'ppgm', length=2, offset=14, text='op'
(0xFFFB, 4, 16), # 'cvt', length=4, offset=16, text='qrst'
(0xFFFC, 6, 20), # 'reserved', length=6, offset=20, text='uvxywz'
(0xFFFD, 10, 26)] # 'fpgm', length=10, offset=26, text='0123456789'
)
return table
@pytest.fixture
def font(indextable):
font = TTFont()
# ['a', 'b', 'c', ...]
ch = 0x61
n = len(indextable.indices)
font.glyphOrder = [unichr(i) for i in range(ch, ch+n)]
font['TSI0'] = indextable
return font
@pytest.fixture
def empty_font():
font = TTFont()
font.glyphOrder = []
indextable = table_T_S_I__0()
indextable.set([], [(0xFFFA, 0, 0),
(0xFFFB, 0, 0),
(0xFFFC, 0, 0),
(0xFFFD, 0, 0)])
font['TSI0'] = indextable
return font
def test_decompile(font):
table = table_T_S_I__1()
table.decompile(TSI1_DATA, font)
assert table.glyphPrograms == {
'a': b'a',
'b': b'bcdef',
# 'c': b'', # zero-length entries are skipped
# 'd': b'',
'e': b'ghijklmn'}
assert table.extraPrograms == {
'ppgm': b'op',
'cvt': b'qrst',
'reserved': b'uvxywz',
'fpgm': b'0123456789'}
def test_decompile_empty(empty_font):
table = table_T_S_I__1()
table.decompile(b"", empty_font)
assert table.glyphPrograms == {}
assert table.extraPrograms == {}
def test_decompile_invalid_length(empty_font):
empty_font.glyphOrder = ['a']
empty_font['TSI0'].indices = [(0, 0x8000+1, 0)]
table = table_T_S_I__1()
with pytest.raises(TTLibError) as excinfo:
table.decompile(b'', empty_font)
assert excinfo.match("textLength .* must not be > 32768")
def test_decompile_offset_past_end(empty_font):
empty_font.glyphOrder = ['foo', 'bar']
data = b'baz'
empty_font['TSI0'].indices = [(0, len(data), 0), (1, 1, len(data)+1)]
table = table_T_S_I__1()
with CapturingLogHandler(table.log, "WARNING") as captor:
table.decompile(data, empty_font)
# the 'bar' program is skipped because its offset > len(data)
assert table.glyphPrograms == {'foo': b'baz'}
assert any("textOffset > totalLength" in r.msg for r in captor.records)
def test_decompile_magic_length_last_extra(empty_font):
indextable = empty_font['TSI0']
indextable.extra_indices[-1] = (0xFFFD, 0x8000, 0)
data = b"0" * (0x8000 + 1)
table = table_T_S_I__1()
table.decompile(data, empty_font)
assert table.extraPrograms['fpgm'] == data
def test_decompile_magic_length_last_glyph(empty_font):
empty_font.glyphOrder = ['foo', 'bar']
indextable = empty_font['TSI0']
indextable.indices = [
(0, 3, 0),
(1, 0x8000, 3)] # the actual length of 'bar' program is
indextable.extra_indices = [ # the difference between the first extra's
(0xFFFA, 0, 0x8004), # offset and 'bar' offset: 0x8004 - 3
(0xFFFB, 0, 0x8004),
(0xFFFC, 0, 0x8004),
(0xFFFD, 0, 0x8004)]
foo_data = b"0" * 3
bar_data = b"1" * (0x8000 + 1)
data = foo_data + bar_data
table = table_T_S_I__1()
table.decompile(data, empty_font)
assert table.glyphPrograms['foo'] == foo_data
assert table.glyphPrograms['bar'] == bar_data
def test_decompile_magic_length_non_last(empty_font):
indextable = empty_font['TSI0']
indextable.extra_indices = [
(0xFFFA, 3, 0),
(0xFFFB, 0x8000, 3), # the actual length of 'cvt' program is:
(0xFFFC, 0, 0x8004), # nextTextOffset - textOffset: 0x8004 - 3
(0xFFFD, 0, 0x8004)]
ppgm_data = b"0" * 3
cvt_data = b"1" * (0x8000 + 1)
data = ppgm_data + cvt_data
table = table_T_S_I__1()
table.decompile(data, empty_font)
assert table.extraPrograms['ppgm'] == ppgm_data
assert table.extraPrograms['cvt'] == cvt_data
table = table_T_S_I__1()
with CapturingLogHandler(table.log, "WARNING") as captor:
table.decompile(data[:-1], empty_font) # last entry is truncated
captor.assertRegex("nextTextOffset > totalLength")
assert table.extraPrograms['cvt'] == cvt_data[:-1]
if __name__ == "__main__":
import sys
sys.exit(pytest.main(sys.argv))