diff --git a/Lib/fontTools/unicodedata/__init__.py b/Lib/fontTools/unicodedata/__init__.py index 31d7ef885..689f04e73 100644 --- a/Lib/fontTools/unicodedata/__init__.py +++ b/Lib/fontTools/unicodedata/__init__.py @@ -38,6 +38,7 @@ __all__ = [tostr(s) for s in ( "script_extension", "script_name", "script_code", + "script_horizontal_direction", "ot_tags_from_script", )] @@ -134,6 +135,75 @@ def script_code(script_name, default=KeyError): return default +# The data on script direction is taken from harfbuzz's "hb-common.cc": +# https://goo.gl/X5FDXC +# It matches the CLDR "scriptMetadata.txt as of January 2018: +# http://unicode.org/repos/cldr/trunk/common/properties/scriptMetadata.txt +RTL_SCRIPTS = { + # Unicode-1.1 additions + 'Arab', # Arabic + 'Hebr', # Hebrew + + # Unicode-3.0 additions + 'Syrc', # Syriac + 'Thaa', # Thaana + + # Unicode-4.0 additions + 'Cprt', # Cypriot + + # Unicode-4.1 additions + 'Khar', # Kharoshthi + + # Unicode-5.0 additions + 'Phnx', # Phoenician + 'Nkoo', # Nko + + # Unicode-5.1 additions + 'Lydi', # Lydian + + # Unicode-5.2 additions + 'Avst', # Avestan + 'Armi', # Imperial Aramaic + 'Phli', # Inscriptional Pahlavi + 'Prti', # Inscriptional Parthian + 'Sarb', # Old South Arabian + 'Orkh', # Old Turkic + 'Samr', # Samaritan + + # Unicode-6.0 additions + 'Mand', # Mandaic + + # Unicode-6.1 additions + 'Merc', # Meroitic Cursive + 'Mero', # Meroitic Hieroglyphs + + # Unicode-7.0 additions + 'Mani', # Manichaean + 'Mend', # Mende Kikakui + 'Nbat', # Nabataean + 'Narb', # Old North Arabian + 'Palm', # Palmyrene + 'Phlp', # Psalter Pahlavi + + # Unicode-8.0 additions + 'Hatr', # Hatran + 'Hung', # Old Hungarian + + # Unicode-9.0 additions + 'Adlm', # Adlam +} + +def script_horizontal_direction(script_code, default=KeyError): + """ Return "RTL" for scripts that contain right-to-left characters + according to the Bidi_Class property. Otherwise return "LTR". + """ + if script_code not in Scripts.NAMES: + if isinstance(default, type) and issubclass(default, KeyError): + raise default(script_code) + return default + return str("RTL") if script_code in RTL_SCRIPTS else str("LTR") + + def block(char): """ Return the block property assigned to the Unicode character 'char' as a string. diff --git a/Tests/unicodedata_test.py b/Tests/unicodedata_test.py index 094ea8e6d..0d0f2bf36 100644 --- a/Tests/unicodedata_test.py +++ b/Tests/unicodedata_test.py @@ -218,6 +218,17 @@ def test_ot_tags_from_script(): assert unicodedata.ot_tags_from_script("Aaaa") == ["DFLT"] +def test_script_horizontal_direction(): + assert unicodedata.script_horizontal_direction("Latn") == "LTR" + assert unicodedata.script_horizontal_direction("Arab") == "RTL" + assert unicodedata.script_horizontal_direction("Thaa") == "RTL" + + with pytest.raises(KeyError): + unicodedata.script_horizontal_direction("Azzz") + assert unicodedata.script_horizontal_direction("Azzz", + default="LTR") == "LTR" + + if __name__ == "__main__": import sys sys.exit(pytest.main(sys.argv))