diff --git a/Lib/fontTools/unicodedata/__init__.py b/Lib/fontTools/unicodedata/__init__.py index e05edc20a..1592e98ab 100644 --- a/Lib/fontTools/unicodedata/__init__.py +++ b/Lib/fontTools/unicodedata/__init__.py @@ -2,6 +2,7 @@ from __future__ import ( print_function, division, absolute_import, unicode_literals) from fontTools.misc.py23 import * +import re from bisect import bisect_right try: @@ -84,13 +85,50 @@ def script_extension(char): return value -def script_name(code): +def script_name(code, default=KeyError): """ Return the long, human-readable script name given a four-letter Unicode script code. - Raises KeyError if no matching name is found. + If no matching name is found, a KeyError is raised by default. + + You can use the 'default' argument to return a fallback value (e.g. + 'Unknown' or None) instead of throwing an error. """ - return Scripts.NAMES[code].replace("_", " ") + try: + return str(Scripts.NAMES[code].replace("_", " ")) + except KeyError: + if isinstance(default, type) and issubclass(default, KeyError): + raise + return default + + +_normalize_re = re.compile(r"[-_ ]+") + + +def _normalize_property_name(string): + """Remove case, strip space, '-' and '_' for loose matching.""" + return _normalize_re.sub("", string).lower() + + +_SCRIPT_CODES = {_normalize_property_name(v): k + for k, v in Scripts.NAMES.items()} + + +def script_code(script_name, default=KeyError): + """Returns the four-letter Unicode script code from its long name + + If no matching script code is found, a KeyError is raised by default. + + You can use the 'default' argument to return a fallback string (e.g. + 'Zzzz' or None) instead of throwing an error. + """ + normalized_name = _normalize_property_name(script_name) + try: + return _SCRIPT_CODES[normalized_name] + except KeyError: + if isinstance(default, type) and issubclass(default, KeyError): + raise + return default def block(char): diff --git a/Tests/unicodedata_test.py b/Tests/unicodedata_test.py index 677de5676..dba02e7c9 100644 --- a/Tests/unicodedata_test.py +++ b/Tests/unicodedata_test.py @@ -175,8 +175,25 @@ def test_script_name(): assert unicodedata.script_name("Zzzz") == "Unknown" # underscores in long names are replaced by spaces assert unicodedata.script_name("Egyp") == "Egyptian Hieroglyphs" + with pytest.raises(KeyError): unicodedata.script_name("QQQQ") + assert unicodedata.script_name("QQQQ", default="Unknown") + + +def test_script_code(): + assert unicodedata.script_code("Latin") == "Latn" + assert unicodedata.script_code("Common") == "Zyyy" + assert unicodedata.script_code("Unknown") == "Zzzz" + # case, whitespace, underscores and hyphens are ignored + assert unicodedata.script_code("Egyptian Hieroglyphs") == "Egyp" + assert unicodedata.script_code("Egyptian_Hieroglyphs") == "Egyp" + assert unicodedata.script_code("egyptianhieroglyphs") == "Egyp" + assert unicodedata.script_code("Egyptian-Hieroglyphs") == "Egyp" + + with pytest.raises(KeyError): + unicodedata.script_code("Does not exist") + assert unicodedata.script_code("Does not exist", default="Zzzz") == "Zzzz" def test_block():