[unicodedata] add script_code func and 'default' fallback arg

`script_code` does the reverse of `script_name`: it takes a long
script name and returns a 4-letter script code.

Both `script_name` and `script_code` raise KeyError by default,
but can optionally return a default value instead.
This commit is contained in:
Cosimo Lupo 2017-11-22 17:34:24 +01:00
parent afd2490a6c
commit 99ea0a3986
2 changed files with 58 additions and 3 deletions

View File

@ -2,6 +2,7 @@ from __future__ import (
print_function, division, absolute_import, unicode_literals)
from fontTools.misc.py23 import *
import re
from bisect import bisect_right
try:
@ -84,13 +85,50 @@ def script_extension(char):
return value
def script_name(code):
def script_name(code, default=KeyError):
""" Return the long, human-readable script name given a four-letter
Unicode script code.
Raises KeyError if no matching name is found.
If no matching name is found, a KeyError is raised by default.
You can use the 'default' argument to return a fallback value (e.g.
'Unknown' or None) instead of throwing an error.
"""
return Scripts.NAMES[code].replace("_", " ")
try:
return str(Scripts.NAMES[code].replace("_", " "))
except KeyError:
if isinstance(default, type) and issubclass(default, KeyError):
raise
return default
_normalize_re = re.compile(r"[-_ ]+")
def _normalize_property_name(string):
"""Remove case, strip space, '-' and '_' for loose matching."""
return _normalize_re.sub("", string).lower()
_SCRIPT_CODES = {_normalize_property_name(v): k
for k, v in Scripts.NAMES.items()}
def script_code(script_name, default=KeyError):
"""Returns the four-letter Unicode script code from its long name
If no matching script code is found, a KeyError is raised by default.
You can use the 'default' argument to return a fallback string (e.g.
'Zzzz' or None) instead of throwing an error.
"""
normalized_name = _normalize_property_name(script_name)
try:
return _SCRIPT_CODES[normalized_name]
except KeyError:
if isinstance(default, type) and issubclass(default, KeyError):
raise
return default
def block(char):

View File

@ -175,8 +175,25 @@ def test_script_name():
assert unicodedata.script_name("Zzzz") == "Unknown"
# underscores in long names are replaced by spaces
assert unicodedata.script_name("Egyp") == "Egyptian Hieroglyphs"
with pytest.raises(KeyError):
unicodedata.script_name("QQQQ")
assert unicodedata.script_name("QQQQ", default="Unknown")
def test_script_code():
assert unicodedata.script_code("Latin") == "Latn"
assert unicodedata.script_code("Common") == "Zyyy"
assert unicodedata.script_code("Unknown") == "Zzzz"
# case, whitespace, underscores and hyphens are ignored
assert unicodedata.script_code("Egyptian Hieroglyphs") == "Egyp"
assert unicodedata.script_code("Egyptian_Hieroglyphs") == "Egyp"
assert unicodedata.script_code("egyptianhieroglyphs") == "Egyp"
assert unicodedata.script_code("Egyptian-Hieroglyphs") == "Egyp"
with pytest.raises(KeyError):
unicodedata.script_code("Does not exist")
assert unicodedata.script_code("Does not exist", default="Zzzz") == "Zzzz"
def test_block():