[unicodedata] use bisect.bisect_right function
CPython comes with a fast C implementation of bisect module. This gives 4 to 5 times speed-ups over my pure-python version.
This commit is contained in:
parent
5b3c189f6d
commit
3442da1529
@ -2,7 +2,7 @@ from __future__ import (
|
||||
print_function, division, absolute_import, unicode_literals)
|
||||
from fontTools.misc.py23 import *
|
||||
|
||||
import functools
|
||||
from bisect import bisect_right
|
||||
|
||||
try:
|
||||
# use unicodedata backport compatible with python2:
|
||||
@ -12,7 +12,7 @@ except ImportError:
|
||||
# fall back to built-in unicodedata (possibly outdated)
|
||||
from unicodedata import *
|
||||
|
||||
from .scripts import SCRIPT_RANGES
|
||||
from .scripts import SCRIPT_RANGES, SCRIPT_NAMES
|
||||
|
||||
|
||||
__all__ = [
|
||||
@ -36,36 +36,16 @@ __all__ = [
|
||||
]
|
||||
|
||||
|
||||
def _memoize(func):
|
||||
# Decorator that caches a function's return value each time it is
|
||||
# called, and returns the cached value if called later with the same
|
||||
# argument.
|
||||
cache = func.cache = {}
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapper(arg):
|
||||
if arg not in cache:
|
||||
cache[arg] = func(arg)
|
||||
return cache[arg]
|
||||
return wrapper
|
||||
|
||||
|
||||
@_memoize
|
||||
def script(char):
|
||||
"""For the unicode character 'char' return the script name."""
|
||||
code = byteord(char)
|
||||
return _binary_search_range(code, SCRIPT_RANGES, default="Unknown")
|
||||
|
||||
|
||||
def _binary_search_range(code, ranges, default=None):
|
||||
left = 0
|
||||
right = len(ranges) - 1
|
||||
while right >= left:
|
||||
mid = (left + right) >> 1
|
||||
if code < ranges[mid][0]:
|
||||
right = mid - 1
|
||||
elif code > ranges[mid][1]:
|
||||
left = mid + 1
|
||||
else:
|
||||
return ranges[mid][2]
|
||||
return default
|
||||
# 'bisect_right(a, x, lo=0, hi=len(a))' returns an insertion point which
|
||||
# comes after (to the right of) any existing entries of x in a, and it
|
||||
# partitions array a into two halves so that, for the left side
|
||||
# all(val <= x for val in a[lo:i]), and for the right side
|
||||
# all(val > x for val in a[i:hi]).
|
||||
# Our 'SCRIPT_RANGES' is a sorted list of ranges (only their starting
|
||||
# breakpoints); we want to use `bisect_right` to look up the range that
|
||||
# contains the given codepoint: i.e. whose start is less than or equal
|
||||
# to the codepoint. Thus, we subtract -1 from the index returned.
|
||||
i = bisect_right(SCRIPT_RANGES, code)
|
||||
return SCRIPT_NAMES[i-1]
|
||||
|
Loading…
x
Reference in New Issue
Block a user