52 lines
1.5 KiB
Python
Raw Normal View History

from __future__ import (
print_function, division, absolute_import, unicode_literals)
from fontTools.misc.py23 import *
from bisect import bisect_right
try:
# use unicodedata backport compatible with python2:
# https://github.com/mikekap/unicodedata2
from unicodedata2 import *
except ImportError:
# fall back to built-in unicodedata (possibly outdated)
from unicodedata import *
from .scripts import SCRIPT_RANGES, SCRIPT_NAMES
__all__ = [
# names from built-in unicodedata module
"lookup",
"name",
"decimal",
"digit",
"numeric",
"category",
"bidirectional",
"combining",
"east_asian_width",
"mirrored",
"decomposition",
"normalize",
"unidata_version",
"ucd_3_2_0",
# additonal functions
"script",
]
def script(char):
code = byteord(char)
# 'bisect_right(a, x, lo=0, hi=len(a))' returns an insertion point which
# comes after (to the right of) any existing entries of x in a, and it
# partitions array a into two halves so that, for the left side
# all(val <= x for val in a[lo:i]), and for the right side
# all(val > x for val in a[i:hi]).
# Our 'SCRIPT_RANGES' is a sorted list of ranges (only their starting
# breakpoints); we want to use `bisect_right` to look up the range that
# contains the given codepoint: i.e. whose start is less than or equal
# to the codepoint. Thus, we subtract -1 from the index returned.
i = bisect_right(SCRIPT_RANGES, code)
return SCRIPT_NAMES[i-1]