[unicodedata] add script_code func and 'default' fallback arg
`script_code` does the reverse of `script_name`: it takes a long script name and returns a 4-letter script code. Both `script_name` and `script_code` raise KeyError by default, but can optionally return a default value instead.
This commit is contained in:
parent
afd2490a6c
commit
99ea0a3986
@ -2,6 +2,7 @@ from __future__ import (
|
|||||||
print_function, division, absolute_import, unicode_literals)
|
print_function, division, absolute_import, unicode_literals)
|
||||||
from fontTools.misc.py23 import *
|
from fontTools.misc.py23 import *
|
||||||
|
|
||||||
|
import re
|
||||||
from bisect import bisect_right
|
from bisect import bisect_right
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -84,13 +85,50 @@ def script_extension(char):
|
|||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
def script_name(code):
|
def script_name(code, default=KeyError):
|
||||||
""" Return the long, human-readable script name given a four-letter
|
""" Return the long, human-readable script name given a four-letter
|
||||||
Unicode script code.
|
Unicode script code.
|
||||||
|
|
||||||
Raises KeyError if no matching name is found.
|
If no matching name is found, a KeyError is raised by default.
|
||||||
|
|
||||||
|
You can use the 'default' argument to return a fallback value (e.g.
|
||||||
|
'Unknown' or None) instead of throwing an error.
|
||||||
"""
|
"""
|
||||||
return Scripts.NAMES[code].replace("_", " ")
|
try:
|
||||||
|
return str(Scripts.NAMES[code].replace("_", " "))
|
||||||
|
except KeyError:
|
||||||
|
if isinstance(default, type) and issubclass(default, KeyError):
|
||||||
|
raise
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
_normalize_re = re.compile(r"[-_ ]+")
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_property_name(string):
|
||||||
|
"""Remove case, strip space, '-' and '_' for loose matching."""
|
||||||
|
return _normalize_re.sub("", string).lower()
|
||||||
|
|
||||||
|
|
||||||
|
_SCRIPT_CODES = {_normalize_property_name(v): k
|
||||||
|
for k, v in Scripts.NAMES.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def script_code(script_name, default=KeyError):
|
||||||
|
"""Returns the four-letter Unicode script code from its long name
|
||||||
|
|
||||||
|
If no matching script code is found, a KeyError is raised by default.
|
||||||
|
|
||||||
|
You can use the 'default' argument to return a fallback string (e.g.
|
||||||
|
'Zzzz' or None) instead of throwing an error.
|
||||||
|
"""
|
||||||
|
normalized_name = _normalize_property_name(script_name)
|
||||||
|
try:
|
||||||
|
return _SCRIPT_CODES[normalized_name]
|
||||||
|
except KeyError:
|
||||||
|
if isinstance(default, type) and issubclass(default, KeyError):
|
||||||
|
raise
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
def block(char):
|
def block(char):
|
||||||
|
@ -175,8 +175,25 @@ def test_script_name():
|
|||||||
assert unicodedata.script_name("Zzzz") == "Unknown"
|
assert unicodedata.script_name("Zzzz") == "Unknown"
|
||||||
# underscores in long names are replaced by spaces
|
# underscores in long names are replaced by spaces
|
||||||
assert unicodedata.script_name("Egyp") == "Egyptian Hieroglyphs"
|
assert unicodedata.script_name("Egyp") == "Egyptian Hieroglyphs"
|
||||||
|
|
||||||
with pytest.raises(KeyError):
|
with pytest.raises(KeyError):
|
||||||
unicodedata.script_name("QQQQ")
|
unicodedata.script_name("QQQQ")
|
||||||
|
assert unicodedata.script_name("QQQQ", default="Unknown")
|
||||||
|
|
||||||
|
|
||||||
|
def test_script_code():
|
||||||
|
assert unicodedata.script_code("Latin") == "Latn"
|
||||||
|
assert unicodedata.script_code("Common") == "Zyyy"
|
||||||
|
assert unicodedata.script_code("Unknown") == "Zzzz"
|
||||||
|
# case, whitespace, underscores and hyphens are ignored
|
||||||
|
assert unicodedata.script_code("Egyptian Hieroglyphs") == "Egyp"
|
||||||
|
assert unicodedata.script_code("Egyptian_Hieroglyphs") == "Egyp"
|
||||||
|
assert unicodedata.script_code("egyptianhieroglyphs") == "Egyp"
|
||||||
|
assert unicodedata.script_code("Egyptian-Hieroglyphs") == "Egyp"
|
||||||
|
|
||||||
|
with pytest.raises(KeyError):
|
||||||
|
unicodedata.script_code("Does not exist")
|
||||||
|
assert unicodedata.script_code("Does not exist", default="Zzzz") == "Zzzz"
|
||||||
|
|
||||||
|
|
||||||
def test_block():
|
def test_block():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user