allow an encoding to be specified when parsing a Type 1 font

2018-04-12 14:39:44 +02:00 · 2018-04-12 14:39:44 +02:00 · 52ea50f4bc
commit 52ea50f4bc
parent 0d474737d7
2 changed files with 11 additions and 8 deletions
--- a/Lib/fontTools/misc/psLib.py
+++ b/Lib/fontTools/misc/psLib.py
@ -43,13 +43,14 @@ class PSError(Exception): pass

 class PSTokenizer(object):

-	def __init__(self, buf=b''):
+	def __init__(self, buf=b'', encoding="ascii"):
 		# Force self.buf to be a byte string
 		buf = tobytes(buf)
 		self.buf = buf
 		self.len = len(buf)
 		self.pos = 0
 		self.closed = False
+		self.encoding = encoding

 	def read(self, n=-1):
 		"""Read at most 'n' bytes from the buffer, or less if the read
@ -122,7 +123,7 @@ class PSTokenizer(object):
 			_, nextpos = m.span()
 			token = buf[pos:nextpos]
 		self.pos = pos + len(token)
-		token = tostr(token, encoding='ascii')
+		token = tostr(token, encoding=self.encoding)
 		return tokentype, token

 	def skipwhite(self, whitematch=skipwhiteRE.match):
@ -145,9 +146,10 @@ class PSTokenizer(object):

 class PSInterpreter(PSOperators):

-	def __init__(self):
+	def __init__(self, encoding="ascii"):
 		systemdict = {}
 		userdict = {}
+		self.encoding = encoding
 		self.dictstack = [systemdict, userdict]
 		self.stack = []
 		self.proclevel = 0
@ -174,7 +176,7 @@ class PSInterpreter(PSOperators):
 			self.suckoperators(systemdict, baseclass)

 	def interpret(self, data, getattr=getattr):
-		tokenizer = self.tokenizer = PSTokenizer(data)
+		tokenizer = self.tokenizer = PSTokenizer(data, self.encoding)
 		getnexttoken = tokenizer.getnexttoken
 		do_token = self.do_token
 		handle_object = self.handle_object
@ -345,13 +347,13 @@ def unpack_item(item):
 		newitem = item.value
 	return newitem

-def suckfont(data):
+def suckfont(data, encoding="ascii"):
 	m = re.search(br"/FontName\s+/([^ \t\n\r]+)\s+def", data)
 	if m:
 		fontName = m.group(1)
 	else:
 		fontName = None
-	interpreter = PSInterpreter()
+	interpreter = PSInterpreter(encoding=encoding)
 	interpreter.interpret(b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop")
 	interpreter.interpret(data)
 	fontdir = interpreter.dictstack[0]['FontDirectory'].value
--- a/Lib/fontTools/t1Lib/init.py
+++ b/Lib/fontTools/t1Lib/init.py
@ -49,11 +49,12 @@ class T1Font(object):
 	Type 1 fonts.
 	"""

-	def __init__(self, path=None):
+	def __init__(self, path=None, encoding="ascii"):
 		if path is not None:
 			self.data, type = read(path)
 		else:
 			pass # XXX
+		self.encoding = encoding

 	def saveAs(self, path, type, dohex=False):
 		write(path, self.getData(), type, dohex)
@ -82,7 +83,7 @@ class T1Font(object):
 	def parse(self):
 		from fontTools.misc import psLib
 		from fontTools.misc import psCharStrings
-		self.font = psLib.suckfont(self.data)
+		self.font = psLib.suckfont(self.data, self.encoding)
 		charStrings = self.font["CharStrings"]
 		lenIV = self.font["Private"].get("lenIV", 4)
 		assert lenIV >= 0