From d767061e8d5ed5d3ec929c4239cdcd6344a2bf68 Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Mon, 18 May 2020 11:33:50 +0100 Subject: [PATCH 1/6] sfnt: add __getstate__ to SFNTReader to make it pickelable Fixes https://github.com/fonttools/fonttools/issues/1962 --- Lib/fontTools/ttLib/sfnt.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/Lib/fontTools/ttLib/sfnt.py b/Lib/fontTools/ttLib/sfnt.py index 9c45305d5..990308256 100644 --- a/Lib/fontTools/ttLib/sfnt.py +++ b/Lib/fontTools/ttLib/sfnt.py @@ -134,18 +134,34 @@ class SFNTReader(object): obj = cls.__new__(cls) for k, v in self.__dict__.items(): if k == "file": - pos = v.tell() - v.seek(0) - buf = BytesIO(v.read()) - v.seek(pos) - buf.seek(pos) - if hasattr(v, "name"): - buf.name = v.name - obj.file = buf + obj.file = _copy_file_to_bytes_stream(v) else: obj.__dict__[k] = deepcopy(v, memo) return obj + def __getstate__(self): + """Makes the SFNTReader pickleable even when TTFont was loaded with lazy=True. + + Similar to the custom `__deepcopy__` above, we copy the unpickleable file data + to an in-memory bytes stream, which is pickelable and behaves just like a file. + """ + state = self.__dict__.copy() + state["file"] = _copy_file_to_bytes_stream(state["file"]) + return state + + +def _copy_file_to_bytes_stream(f) -> BytesIO: + # Copy bytes from file object to BytesIO stream. + # The returned stream also records the original file `name` and current position. + pos = f.tell() + f.seek(0) + buf = BytesIO(f.read()) + f.seek(pos) + buf.seek(pos) + if hasattr(f, "name"): + buf.name = f.name + return buf + # default compression level for WOFF 1.0 tables and metadata ZLIB_COMPRESSION_LEVEL = 6 From 942fbfe07a5b3d7f6ecccc10447a39f66cc75ff0 Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Mon, 18 May 2020 11:57:03 +0100 Subject: [PATCH 2/6] sfnt: custom __deepcopy__ not needed with __getstate__ also, we only need to copy file to stream when self.file is _not_ already an io.BytesIO. --- Lib/fontTools/ttLib/sfnt.py | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/Lib/fontTools/ttLib/sfnt.py b/Lib/fontTools/ttLib/sfnt.py index 990308256..8ab4cecea 100644 --- a/Lib/fontTools/ttLib/sfnt.py +++ b/Lib/fontTools/ttLib/sfnt.py @@ -12,7 +12,8 @@ classes, since whenever to number of tables changes or whenever a table's length chages you need to rewrite the whole file anyway. """ -from fontTools.misc.py23 import * +from io import BytesIO +from fontTools.misc.py23 import Tag from fontTools.misc import sstruct from fontTools.ttLib import TTLibError import struct @@ -122,29 +123,15 @@ class SFNTReader(object): def close(self): self.file.close() - def __deepcopy__(self, memo): - """Overrides the default deepcopy of SFNTReader object, to make it work - in the case when TTFont is loaded with lazy=True, and thus reader holds a - reference to a file object which is not pickleable. - We work around it by manually copying the data into a in-memory stream. - """ - from copy import deepcopy - - cls = self.__class__ - obj = cls.__new__(cls) - for k, v in self.__dict__.items(): - if k == "file": - obj.file = _copy_file_to_bytes_stream(v) - else: - obj.__dict__[k] = deepcopy(v, memo) - return obj - def __getstate__(self): - """Makes the SFNTReader pickleable even when TTFont was loaded with lazy=True. + """Makes SFNTReader pickle/deepcopy-able even with TTFont loaded as lazy=True. - Similar to the custom `__deepcopy__` above, we copy the unpickleable file data - to an in-memory bytes stream, which is pickelable and behaves just like a file. + If SFNTReader holds a reference to an external file object which is not + pickleable, we copy the file data to an in-memory bytes stream, which is + pickelable and behaves just like a file. """ + if isinstance(self.file, BytesIO): + return self.__dict__ state = self.__dict__.copy() state["file"] = _copy_file_to_bytes_stream(state["file"]) return state From 72f9e7794ad99d8899c6961b6e5f8dac4f1d2269 Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Mon, 18 May 2020 12:41:25 +0100 Subject: [PATCH 3/6] SFNTReader: define __getstate__/__setstate__ to reopen external file Instead of copying to BytesIO, we can return the file name in getstate and reopen the file in setstate. This keeps the TTFont truly lazy as it avoids the extra copy --- Lib/fontTools/ttLib/sfnt.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/Lib/fontTools/ttLib/sfnt.py b/Lib/fontTools/ttLib/sfnt.py index 8ab4cecea..5996ec3d8 100644 --- a/Lib/fontTools/ttLib/sfnt.py +++ b/Lib/fontTools/ttLib/sfnt.py @@ -123,31 +123,29 @@ class SFNTReader(object): def close(self): self.file.close() - def __getstate__(self): - """Makes SFNTReader pickle/deepcopy-able even with TTFont loaded as lazy=True. + # We define custom __getstate__ and __setstate__ to make SFNTReader pickle-able + # and deepcopy-able. When a TTFont is loaded as lazy=True, SFNTReader holds a + # reference to an external file object which is not pickleable. So in __getstate__ + # we store the file name and current position, and in __setstate__ we reopen the + # same named file after unpickling. - If SFNTReader holds a reference to an external file object which is not - pickleable, we copy the file data to an in-memory bytes stream, which is - pickelable and behaves just like a file. - """ + def __getstate__(self): if isinstance(self.file, BytesIO): + # BytesIO is already pickleable, return the state unmodified return self.__dict__ + + # remove unpickleable file attribute, and only store its name and pos state = self.__dict__.copy() - state["file"] = _copy_file_to_bytes_stream(state["file"]) + del state["file"] + state["_filename"] = self.file.name + state["_filepos"] = self.file.tell() return state - -def _copy_file_to_bytes_stream(f) -> BytesIO: - # Copy bytes from file object to BytesIO stream. - # The returned stream also records the original file `name` and current position. - pos = f.tell() - f.seek(0) - buf = BytesIO(f.read()) - f.seek(pos) - buf.seek(pos) - if hasattr(f, "name"): - buf.name = f.name - return buf + def __setstate__(self, state): + if "file" not in state: + self.file = open(state.pop("_filename"), "rb") + self.file.seek(state.pop("_filepos")) + self.__dict__.update(state) # default compression level for WOFF 1.0 tables and metadata From a6612aa6b5ab2cf998522b331dd478e905ceeeff Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Mon, 18 May 2020 13:16:05 +0100 Subject: [PATCH 4/6] varLib_test: close TTFont otherwise shutil.rmtree fails on Win https://ci.appveyor.com/project/fonttools/fonttools/builds/32948704/job/hc91qjisv4nv603m#L400 --- Tests/varLib/varLib_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Tests/varLib/varLib_test.py b/Tests/varLib/varLib_test.py index 5d36d6871..d27fe163e 100644 --- a/Tests/varLib/varLib_test.py +++ b/Tests/varLib/varLib_test.py @@ -22,6 +22,7 @@ def reload_font(font): """(De)serialize to get final binary layout.""" buf = BytesIO() font.save(buf) + font.close() buf.seek(0) return TTFont(buf) From cbe84da8c351daee2bb5d6ed803912f0c8266aba Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Tue, 19 May 2020 10:57:09 +0100 Subject: [PATCH 5/6] varLib_test: comment why font.close() is needed --- Tests/varLib/varLib_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Tests/varLib/varLib_test.py b/Tests/varLib/varLib_test.py index d27fe163e..42e04bd9c 100644 --- a/Tests/varLib/varLib_test.py +++ b/Tests/varLib/varLib_test.py @@ -22,6 +22,8 @@ def reload_font(font): """(De)serialize to get final binary layout.""" buf = BytesIO() font.save(buf) + # Close the font to release filesystem resources so that on Windows the tearDown + # method can successfully remove the temporary directory created during setUp. font.close() buf.seek(0) return TTFont(buf) From cdd10373f0c0007d2e89044675337082e3ec509d Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Tue, 19 May 2020 13:19:01 +0100 Subject: [PATCH 6/6] sfnt_test: add test for deepcopy and pickle --- Tests/ttLib/sfnt_test.py | 56 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/Tests/ttLib/sfnt_test.py b/Tests/ttLib/sfnt_test.py index 917348332..9f8174446 100644 --- a/Tests/ttLib/sfnt_test.py +++ b/Tests/ttLib/sfnt_test.py @@ -1,7 +1,59 @@ -from fontTools.misc.py23 import * -from fontTools.ttLib.sfnt import calcChecksum +import io +import copy +import pickle +from fontTools.ttLib.sfnt import calcChecksum, SFNTReader +import pytest def test_calcChecksum(): assert calcChecksum(b"abcd") == 1633837924 assert calcChecksum(b"abcdxyz") == 3655064932 + + +EMPTY_SFNT = b"\x00\x01\x00\x00" + b"\x00" * 8 + + +def pickle_unpickle(obj): + return pickle.loads(pickle.dumps(obj)) + + +class SFNTReaderTest: + @pytest.mark.parametrize("deepcopy", [copy.deepcopy, pickle_unpickle]) + def test_pickle_protocol_FileIO(self, deepcopy, tmp_path): + fontfile = tmp_path / "test.ttf" + fontfile.write_bytes(EMPTY_SFNT) + reader = SFNTReader(fontfile.open("rb")) + + reader2 = deepcopy(reader) + + assert reader2 is not reader + assert reader2.file is not reader.file + + assert isinstance(reader2.file, io.BufferedReader) + assert isinstance(reader2.file.raw, io.FileIO) + assert reader2.file.name == reader.file.name + assert reader2.file.tell() == reader.file.tell() + + for k, v in reader.__dict__.items(): + if k == "file": + continue + assert getattr(reader2, k) == v + + @pytest.mark.parametrize("deepcopy", [copy.deepcopy, pickle_unpickle]) + def test_pickle_protocol_BytesIO(self, deepcopy, tmp_path): + buf = io.BytesIO(EMPTY_SFNT) + reader = SFNTReader(buf) + + reader2 = deepcopy(reader) + + assert reader2 is not reader + assert reader2.file is not reader.file + + assert isinstance(reader2.file, io.BytesIO) + assert reader2.file.tell() == reader.file.tell() + assert reader2.file.getvalue() == reader.file.getvalue() + + for k, v in reader.__dict__.items(): + if k == "file": + continue + assert getattr(reader2, k) == v