unicodedata: add ot_tag_to_script function
returns the Unicode script code for a given OpenType script tag, or None if no match is found
This commit is contained in:
parent
29deb7e6fb
commit
677954d5b9
@ -35,3 +35,7 @@ NEW_SCRIPT_TAGS = {
|
||||
"Telu": ("tel2",),
|
||||
"Mymr": ("mym2",),
|
||||
}
|
||||
|
||||
NEW_SCRIPT_TAGS_REVERSED = {
|
||||
value: key for key, values in NEW_SCRIPT_TAGS.items() for value in values
|
||||
}
|
||||
|
@ -40,6 +40,7 @@ __all__ = [tostr(s) for s in (
|
||||
"script_code",
|
||||
"script_horizontal_direction",
|
||||
"ot_tags_from_script",
|
||||
"ot_tag_to_script",
|
||||
)]
|
||||
|
||||
|
||||
@ -239,3 +240,37 @@ def ot_tags_from_script(script_code):
|
||||
script_tags.reverse() # last in, first out
|
||||
|
||||
return script_tags
|
||||
|
||||
|
||||
def ot_tag_to_script(tag):
|
||||
""" Return the Unicode script code for the given OpenType script tag, or
|
||||
None for "DFLT" tag or if there is no Unicode script associated with it.
|
||||
Raises ValueError if the tag is invalid.
|
||||
"""
|
||||
tag = tostr(tag).strip()
|
||||
if not tag or " " in tag or len(tag) > 4:
|
||||
raise ValueError("invalid OpenType tag: %r" % tag)
|
||||
|
||||
while len(tag) != 4:
|
||||
tag += str(" ") # pad with spaces
|
||||
|
||||
if tag == OTTags.DEFAULT_SCRIPT:
|
||||
# it's unclear which Unicode script the "DFLT" OpenType tag maps to,
|
||||
# so here we return None
|
||||
return None
|
||||
|
||||
if tag in OTTags.NEW_SCRIPT_TAGS_REVERSED:
|
||||
return OTTags.NEW_SCRIPT_TAGS_REVERSED[tag]
|
||||
|
||||
# This side of the conversion is fully algorithmic
|
||||
|
||||
# Any spaces at the end of the tag are replaced by repeating the last
|
||||
# letter. Eg 'nko ' -> 'Nkoo'.
|
||||
# Change first char to uppercase
|
||||
script_code = tag[0].upper() + tag[1]
|
||||
for i in range(2, 4):
|
||||
script_code += (script_code[i-1] if tag[i] == " " else tag[i])
|
||||
|
||||
if script_code not in Scripts.NAMES:
|
||||
return None
|
||||
return script_code
|
||||
|
@ -218,6 +218,26 @@ def test_ot_tags_from_script():
|
||||
assert unicodedata.ot_tags_from_script("Aaaa") == ["DFLT"]
|
||||
|
||||
|
||||
def test_ot_tag_to_script():
|
||||
assert unicodedata.ot_tag_to_script("latn") == "Latn"
|
||||
assert unicodedata.ot_tag_to_script("kana") == "Kana"
|
||||
assert unicodedata.ot_tag_to_script("DFLT") == None
|
||||
assert unicodedata.ot_tag_to_script("aaaa") == None
|
||||
assert unicodedata.ot_tag_to_script("beng") == "Beng"
|
||||
assert unicodedata.ot_tag_to_script("bng2") == "Beng"
|
||||
assert unicodedata.ot_tag_to_script("dev2") == "Deva"
|
||||
assert unicodedata.ot_tag_to_script("gjr2") == "Gujr"
|
||||
assert unicodedata.ot_tag_to_script("yi ") == "Yiii"
|
||||
assert unicodedata.ot_tag_to_script("nko ") == "Nkoo"
|
||||
assert unicodedata.ot_tag_to_script("vai ") == "Vaii"
|
||||
assert unicodedata.ot_tag_to_script("lao ") == "Laoo"
|
||||
assert unicodedata.ot_tag_to_script("yi") == "Yiii"
|
||||
|
||||
for invalid_value in ("", " ", "z zz", "zzzzz"):
|
||||
with pytest.raises(ValueError, match="invalid OpenType tag"):
|
||||
unicodedata.ot_tag_to_script(invalid_value)
|
||||
|
||||
|
||||
def test_script_horizontal_direction():
|
||||
assert unicodedata.script_horizontal_direction("Latn") == "LTR"
|
||||
assert unicodedata.script_horizontal_direction("Arab") == "RTL"
|
||||
|
Loading…
x
Reference in New Issue
Block a user