2013-11-27 14:36:57 -05:00
|
|
|
"""Python 2/3 compat layer."""
|
|
|
|
|
2014-01-14 15:07:50 +08:00
|
|
|
from __future__ import print_function, division, absolute_import
|
2015-05-08 19:28:42 +01:00
|
|
|
import sys
|
2013-11-27 17:27:45 -05:00
|
|
|
|
2016-04-07 09:21:05 +01:00
|
|
|
|
|
|
|
__all__ = ['basestring', 'unicode', 'unichr', 'byteord', 'bytechr', 'BytesIO',
|
|
|
|
'StringIO', 'UnicodeIO', 'strjoin', 'bytesjoin', 'tobytes', 'tostr',
|
2016-05-04 17:11:54 +01:00
|
|
|
'tounicode', 'Tag', 'open', 'range', 'xrange']
|
2016-04-07 09:21:05 +01:00
|
|
|
|
|
|
|
|
2013-11-27 14:36:57 -05:00
|
|
|
try:
|
[py23] fix ImportError when trying to import `unichr`, `basestring` or `unicode` when already defined
When one does `from fontTools.misc.py23 import *`, everything seems to work fine.
However, linters will complain when one uses the asterisk to import all names from a module, since they can't detect when names are left undefined -- asterisks are greedy and will eat all names.
If one avoids the asterik and attempts to import explicitly, like in `from fontTools.misc.py23 import basestring`, the problem then is that, if `py23` does not re-define the name -- e.g. under python2 `basestring` or `unicode` are built-ins -- then the import statement raises `ImportError`.
The same happens for the `unichr` function on a "wide" Python 2 build (in which `sys.maxunicode == 0x10FFFF`).
Now, to work around this, we need to re-assign those built-ins to their very same names. This may look silly, but at least it works.
2015-11-23 12:02:12 +00:00
|
|
|
basestring = basestring
|
2013-11-27 14:36:57 -05:00
|
|
|
except NameError:
|
|
|
|
basestring = str
|
|
|
|
|
|
|
|
try:
|
[py23] fix ImportError when trying to import `unichr`, `basestring` or `unicode` when already defined
When one does `from fontTools.misc.py23 import *`, everything seems to work fine.
However, linters will complain when one uses the asterisk to import all names from a module, since they can't detect when names are left undefined -- asterisks are greedy and will eat all names.
If one avoids the asterik and attempts to import explicitly, like in `from fontTools.misc.py23 import basestring`, the problem then is that, if `py23` does not re-define the name -- e.g. under python2 `basestring` or `unicode` are built-ins -- then the import statement raises `ImportError`.
The same happens for the `unichr` function on a "wide" Python 2 build (in which `sys.maxunicode == 0x10FFFF`).
Now, to work around this, we need to re-assign those built-ins to their very same names. This may look silly, but at least it works.
2015-11-23 12:02:12 +00:00
|
|
|
unicode = unicode
|
2013-11-27 14:36:57 -05:00
|
|
|
except NameError:
|
|
|
|
unicode = str
|
|
|
|
|
|
|
|
try:
|
[py23] fix ImportError when trying to import `unichr`, `basestring` or `unicode` when already defined
When one does `from fontTools.misc.py23 import *`, everything seems to work fine.
However, linters will complain when one uses the asterisk to import all names from a module, since they can't detect when names are left undefined -- asterisks are greedy and will eat all names.
If one avoids the asterik and attempts to import explicitly, like in `from fontTools.misc.py23 import basestring`, the problem then is that, if `py23` does not re-define the name -- e.g. under python2 `basestring` or `unicode` are built-ins -- then the import statement raises `ImportError`.
The same happens for the `unichr` function on a "wide" Python 2 build (in which `sys.maxunicode == 0x10FFFF`).
Now, to work around this, we need to re-assign those built-ins to their very same names. This may look silly, but at least it works.
2015-11-23 12:02:12 +00:00
|
|
|
unichr = unichr
|
2015-05-08 19:28:42 +01:00
|
|
|
|
|
|
|
if sys.maxunicode < 0x10FFFF:
|
|
|
|
# workarounds for Python 2 "narrow" builds with UCS2-only support.
|
|
|
|
|
|
|
|
_narrow_unichr = unichr
|
|
|
|
|
|
|
|
def unichr(i):
|
|
|
|
"""
|
|
|
|
Return the unicode character whose Unicode code is the integer 'i'.
|
|
|
|
The valid range is 0 to 0x10FFFF inclusive.
|
|
|
|
|
|
|
|
>>> _narrow_unichr(0xFFFF + 1)
|
|
|
|
Traceback (most recent call last):
|
|
|
|
File "<stdin>", line 1, in ?
|
|
|
|
ValueError: unichr() arg not in range(0x10000) (narrow Python build)
|
|
|
|
>>> unichr(0xFFFF + 1) == u'\U00010000'
|
|
|
|
True
|
|
|
|
>>> unichr(1114111) == u'\U0010FFFF'
|
|
|
|
True
|
|
|
|
>>> unichr(0x10FFFF + 1)
|
|
|
|
Traceback (most recent call last):
|
|
|
|
File "<stdin>", line 1, in ?
|
|
|
|
ValueError: unichr() arg not in range(0x110000)
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
return _narrow_unichr(i)
|
|
|
|
except ValueError:
|
|
|
|
try:
|
|
|
|
padded_hex_str = hex(i)[2:].zfill(8)
|
|
|
|
escape_str = "\\U" + padded_hex_str
|
|
|
|
return escape_str.decode("unicode-escape")
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
raise ValueError('unichr() arg not in range(0x110000)')
|
|
|
|
|
|
|
|
import re
|
|
|
|
_unicode_escape_RE = re.compile(r'\\U[A-Fa-f0-9]{8}')
|
|
|
|
|
|
|
|
def byteord(c):
|
|
|
|
"""
|
|
|
|
Given a 8-bit or unicode character, return an integer representing the
|
|
|
|
Unicode code point of the character. If a unicode argument is given, the
|
|
|
|
character's code point must be in the range 0 to 0x10FFFF inclusive.
|
|
|
|
|
|
|
|
>>> ord(u'\U00010000')
|
|
|
|
Traceback (most recent call last):
|
|
|
|
File "<stdin>", line 1, in ?
|
|
|
|
TypeError: ord() expected a character, but string of length 2 found
|
|
|
|
>>> byteord(u'\U00010000') == 0xFFFF + 1
|
|
|
|
True
|
|
|
|
>>> byteord(u'\U0010FFFF') == 1114111
|
|
|
|
True
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
return ord(c)
|
|
|
|
except TypeError as e:
|
|
|
|
try:
|
|
|
|
escape_str = c.encode('unicode-escape')
|
|
|
|
if not _unicode_escape_RE.match(escape_str):
|
|
|
|
raise
|
|
|
|
hex_str = escape_str[3:]
|
|
|
|
return int(hex_str, 16)
|
|
|
|
except:
|
|
|
|
raise TypeError(e)
|
|
|
|
|
|
|
|
else:
|
|
|
|
byteord = ord
|
2013-11-27 14:36:57 -05:00
|
|
|
bytechr = chr
|
2015-05-08 19:28:42 +01:00
|
|
|
|
|
|
|
except NameError:
|
2013-11-27 14:36:57 -05:00
|
|
|
unichr = chr
|
|
|
|
def bytechr(n):
|
|
|
|
return bytes([n])
|
2013-11-27 18:13:48 -05:00
|
|
|
def byteord(c):
|
2013-11-27 21:13:05 -05:00
|
|
|
return c if isinstance(c, int) else ord(c)
|
2013-11-27 14:36:57 -05:00
|
|
|
|
2015-08-07 15:44:58 +01:00
|
|
|
|
|
|
|
# the 'io' module provides the same I/O interface on both 2 and 3.
|
|
|
|
# here we define an alias of io.StringIO to disambiguate it eternally...
|
|
|
|
from io import BytesIO
|
|
|
|
from io import StringIO as UnicodeIO
|
2013-11-27 14:36:57 -05:00
|
|
|
try:
|
2015-08-07 15:44:58 +01:00
|
|
|
# in python 2, by 'StringIO' we still mean a stream of *byte* strings
|
2013-12-04 04:11:06 -05:00
|
|
|
from StringIO import StringIO
|
2013-11-27 14:36:57 -05:00
|
|
|
except ImportError:
|
2015-08-07 15:44:58 +01:00
|
|
|
# in Python 3, we mean instead a stream of *unicode* strings
|
|
|
|
StringIO = UnicodeIO
|
|
|
|
|
2013-11-27 16:44:53 -05:00
|
|
|
|
2015-04-14 19:07:34 -07:00
|
|
|
def strjoin(iterable, joiner=''):
|
|
|
|
return tostr(joiner).join(iterable)
|
2014-07-21 13:19:53 -04:00
|
|
|
|
2015-04-16 17:09:49 -07:00
|
|
|
def tobytes(s, encoding='ascii', errors='strict'):
|
2014-07-21 13:19:53 -04:00
|
|
|
if not isinstance(s, bytes):
|
2015-04-16 17:09:49 -07:00
|
|
|
return s.encode(encoding, errors)
|
2014-07-21 13:19:53 -04:00
|
|
|
else:
|
|
|
|
return s
|
2015-04-16 17:09:49 -07:00
|
|
|
def tounicode(s, encoding='ascii', errors='strict'):
|
2014-07-21 13:19:53 -04:00
|
|
|
if not isinstance(s, unicode):
|
2015-04-16 17:09:49 -07:00
|
|
|
return s.decode(encoding, errors)
|
2014-07-21 13:19:53 -04:00
|
|
|
else:
|
|
|
|
return s
|
|
|
|
|
2013-11-27 16:44:53 -05:00
|
|
|
if str == bytes:
|
|
|
|
class Tag(str):
|
|
|
|
def tobytes(self):
|
|
|
|
if isinstance(self, bytes):
|
|
|
|
return self
|
|
|
|
else:
|
2013-11-28 06:46:59 -05:00
|
|
|
return self.encode('latin1')
|
2013-11-27 19:51:59 -05:00
|
|
|
|
2014-07-21 13:19:53 -04:00
|
|
|
tostr = tobytes
|
2013-11-27 21:09:03 -05:00
|
|
|
|
2013-11-27 21:17:35 -05:00
|
|
|
bytesjoin = strjoin
|
2013-11-27 16:44:53 -05:00
|
|
|
else:
|
|
|
|
class Tag(str):
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def transcode(blob):
|
|
|
|
if not isinstance(blob, str):
|
|
|
|
blob = blob.decode('latin-1')
|
|
|
|
return blob
|
|
|
|
|
|
|
|
def __new__(self, content):
|
|
|
|
return str.__new__(self, self.transcode(content))
|
2013-12-06 22:25:48 -05:00
|
|
|
def __ne__(self, other):
|
|
|
|
return not self.__eq__(other)
|
2013-11-27 16:44:53 -05:00
|
|
|
def __eq__(self, other):
|
|
|
|
return str.__eq__(self, self.transcode(other))
|
|
|
|
|
|
|
|
def __hash__(self):
|
|
|
|
return str.__hash__(self)
|
|
|
|
|
|
|
|
def tobytes(self):
|
|
|
|
return self.encode('latin-1')
|
2013-11-27 19:51:59 -05:00
|
|
|
|
2014-07-21 13:19:53 -04:00
|
|
|
tostr = tounicode
|
2013-11-27 21:09:03 -05:00
|
|
|
|
2015-04-14 19:07:34 -07:00
|
|
|
def bytesjoin(iterable, joiner=b''):
|
|
|
|
return tobytes(joiner).join(tobytes(item) for item in iterable)
|
2015-05-08 19:28:42 +01:00
|
|
|
|
|
|
|
|
2015-12-11 17:21:35 +00:00
|
|
|
import os
|
|
|
|
import io as _io
|
|
|
|
|
|
|
|
try:
|
|
|
|
from msvcrt import setmode as _setmode
|
|
|
|
except ImportError:
|
|
|
|
_setmode = None # only available on the Windows platform
|
|
|
|
|
|
|
|
|
|
|
|
def open(file, mode='r', buffering=-1, encoding=None, errors=None,
|
|
|
|
newline=None, closefd=True, opener=None):
|
|
|
|
""" Wrapper around `io.open` that bridges the differences between Python 2
|
|
|
|
and Python 3's built-in `open` functions. In Python 2, `io.open` is a
|
|
|
|
backport of Python 3's `open`, whereas in Python 3, it is an alias of the
|
|
|
|
built-in `open` function.
|
|
|
|
|
|
|
|
One difference is that the 'opener' keyword argument is only supported in
|
|
|
|
Python 3. Here we pass the value of 'opener' only when it is not None.
|
|
|
|
This causes Python 2 to raise TypeError, complaining about the number of
|
|
|
|
expected arguments, so it must be avoided if py2 or py2-3 contexts.
|
|
|
|
|
|
|
|
Another difference between 2 and 3, this time on Windows, has to do with
|
|
|
|
opening files by name or by file descriptor.
|
|
|
|
|
|
|
|
On the Windows C runtime, the 'O_BINARY' flag is defined which disables
|
|
|
|
the newlines translation ('\r\n' <=> '\n') when reading/writing files.
|
|
|
|
On both Python 2 and 3 this flag is always set when opening files by name.
|
|
|
|
This way, the newlines translation at the MSVCRT level doesn't interfere
|
|
|
|
with the Python io module's own newlines translation.
|
|
|
|
|
|
|
|
However, when opening files via fd, on Python 2 the fd is simply copied,
|
|
|
|
regardless of whether it has the 'O_BINARY' flag set or not.
|
|
|
|
This becomes a problem in the case of stdout, stdin, and stderr, because on
|
|
|
|
Windows these are opened in text mode by default (ie. don't have the
|
|
|
|
O_BINARY flag set).
|
|
|
|
|
|
|
|
On Python 3, this issue has been fixed, and all fds are now opened in
|
|
|
|
binary mode on Windows, including standard streams. Similarly here, I use
|
|
|
|
the `_setmode` function to ensure that integer file descriptors are
|
|
|
|
O_BINARY'ed before I pass them on to io.open.
|
|
|
|
|
|
|
|
For more info, see: https://bugs.python.org/issue10841
|
|
|
|
"""
|
|
|
|
if isinstance(file, int):
|
|
|
|
# the 'file' argument is an integer file descriptor
|
|
|
|
fd = file
|
|
|
|
if fd < 0:
|
|
|
|
raise ValueError('negative file descriptor')
|
|
|
|
if _setmode:
|
|
|
|
# `_setmode` function sets the line-end translation and returns the
|
|
|
|
# value of the previous mode. AFAIK there's no `_getmode`, so to
|
|
|
|
# check if the previous mode already had the bit set, I fist need
|
|
|
|
# to duplicate the file descriptor, set the binary flag on the copy
|
|
|
|
# and check the returned value.
|
|
|
|
fdcopy = os.dup(fd)
|
|
|
|
current_mode = _setmode(fdcopy, os.O_BINARY)
|
|
|
|
if not (current_mode & os.O_BINARY):
|
|
|
|
# the binary mode was not set: use the file descriptor's copy
|
|
|
|
file = fdcopy
|
|
|
|
if closefd:
|
|
|
|
# close the original file descriptor
|
|
|
|
os.close(fd)
|
|
|
|
else:
|
|
|
|
# ensure the copy is closed when the file object is closed
|
|
|
|
closefd = True
|
|
|
|
else:
|
|
|
|
# original file descriptor already had binary flag, close copy
|
|
|
|
os.close(fdcopy)
|
|
|
|
|
|
|
|
if opener is not None:
|
|
|
|
# "opener" is not supported on Python 2, use it at your own risk!
|
|
|
|
return _io.open(
|
|
|
|
file, mode, buffering, encoding, errors, newline, closefd,
|
|
|
|
opener=opener)
|
|
|
|
else:
|
|
|
|
return _io.open(
|
|
|
|
file, mode, buffering, encoding, errors, newline, closefd)
|
|
|
|
|
|
|
|
|
2016-05-04 17:11:54 +01:00
|
|
|
# always use iterator whether one uses 'range' (py3) or 'xrange' (py2)
|
|
|
|
try:
|
|
|
|
xrange = xrange
|
|
|
|
range = xrange
|
|
|
|
except NameError:
|
|
|
|
xrange = range
|
|
|
|
range = range
|
|
|
|
|
|
|
|
|
2016-02-01 13:10:42 +00:00
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
|
|
class _Logger(logging.Logger):
|
|
|
|
""" Add support for 'lastResort' handler introduced in Python 3.2. """
|
|
|
|
|
|
|
|
def callHandlers(self, record):
|
|
|
|
# this is the same as Python 3.5's logging.Logger.callHandlers
|
|
|
|
c = self
|
|
|
|
found = 0
|
|
|
|
while c:
|
|
|
|
for hdlr in c.handlers:
|
|
|
|
found = found + 1
|
|
|
|
if record.levelno >= hdlr.level:
|
|
|
|
hdlr.handle(record)
|
|
|
|
if not c.propagate:
|
|
|
|
c = None # break out
|
|
|
|
else:
|
|
|
|
c = c.parent
|
|
|
|
if (found == 0):
|
|
|
|
if logging.lastResort:
|
|
|
|
if record.levelno >= logging.lastResort.level:
|
|
|
|
logging.lastResort.handle(record)
|
|
|
|
elif logging.raiseExceptions and not self.manager.emittedNoHandlerWarning:
|
|
|
|
sys.stderr.write("No handlers could be found for logger"
|
|
|
|
" \"%s\"\n" % self.name)
|
|
|
|
self.manager.emittedNoHandlerWarning = True
|
|
|
|
|
|
|
|
|
|
|
|
class _StderrHandler(logging.StreamHandler):
|
|
|
|
""" This class is like a StreamHandler using sys.stderr, but always uses
|
|
|
|
whatever sys.stderr is currently set to rather than the value of
|
|
|
|
sys.stderr at handler construction time.
|
|
|
|
"""
|
|
|
|
def __init__(self, level=logging.NOTSET):
|
|
|
|
"""
|
|
|
|
Initialize the handler.
|
|
|
|
"""
|
|
|
|
logging.Handler.__init__(self, level)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def stream(self):
|
|
|
|
return sys.stderr
|
|
|
|
|
|
|
|
|
|
|
|
if not hasattr(logging, 'lastResort'):
|
|
|
|
# for Python pre-3.2, we need to define the "last resort" handler used when
|
|
|
|
# clients don't explicitly configure logging (in Python 3.2 and above this is
|
|
|
|
# already defined). The handler prints the bare message to sys.stderr, only
|
|
|
|
# for events of severity WARNING or greater.
|
|
|
|
# To obtain the pre-3.2 behaviour, you can set logging.lastResort to None.
|
|
|
|
# https://docs.python.org/3.5/howto/logging.html#what-happens-if-no-configuration-is-provided
|
|
|
|
logging.lastResort = _StderrHandler(logging.WARNING)
|
|
|
|
# Also, we need to set the Logger class to one which supports the last resort
|
|
|
|
# handler. All new loggers instantiated after this call will use the custom
|
|
|
|
# logger class (the already existing ones, like the 'root' logger, will not)
|
|
|
|
logging.setLoggerClass(_Logger)
|
|
|
|
|
|
|
|
|
2015-05-08 19:28:42 +01:00
|
|
|
if __name__ == "__main__":
|
|
|
|
import doctest, sys
|
|
|
|
sys.exit(doctest.testmod().failed)
|