[Tests] adjust unicodedata_test to expect short script codes

This commit is contained in:
Cosimo Lupo 2017-11-22 16:50:55 +01:00
parent 54fa00499e
commit 012688ac20
2 changed files with 150 additions and 150 deletions

View File

@ -39,15 +39,15 @@ __all__ = [
def script(char):
""" Return the script property assigned to the Unicode character 'char'
as string.
""" Return the four-letter script code assigned to the Unicode character
'char' as string.
>>> script("a")
'Latin'
'Latn'
>>> script(",")
'Common'
'Zyyy'
>>> script(unichr(0x10FFFF))
'Unknown'
'Zzzz'
"""
code = byteord(char)
# 'bisect_right(a, x, lo=0, hi=len(a))' returns an insertion point which
@ -67,11 +67,11 @@ def script_extension(char):
""" Return the script extension property assigned to the Unicode character
'char' as a set of string.
>>> script_extension("a") == {'Latin'}
>>> script_extension("a") == {'Latn'}
True
>>> script_extension(unichr(0x060C)) == {'Arab', 'Syrc', 'Thaa'}
True
>>> script_extension(unichr(0x10FFFF)) == {'Unknown'}
>>> script_extension(unichr(0x10FFFF)) == {'Zzzz'}
True
"""
code = byteord(char)

View File

@ -6,160 +6,160 @@ from fontTools import unicodedata
def test_script():
assert unicodedata.script("a") == "Latin"
assert unicodedata.script(unichr(0)) == "Common"
assert unicodedata.script(unichr(0x0378)) == "Unknown"
assert unicodedata.script(unichr(0x10FFFF)) == "Unknown"
assert unicodedata.script("a") == "Latn"
assert unicodedata.script(unichr(0)) == "Zyyy"
assert unicodedata.script(unichr(0x0378)) == "Zzzz"
assert unicodedata.script(unichr(0x10FFFF)) == "Zzzz"
# these were randomly sampled, one character per script
assert unicodedata.script(unichr(0x1E918)) == 'Adlam'
assert unicodedata.script(unichr(0x1E918)) == 'Adlm'
assert unicodedata.script(unichr(0x1170D)) == 'Ahom'
assert unicodedata.script(unichr(0x145A0)) == 'Anatolian_Hieroglyphs'
assert unicodedata.script(unichr(0x0607)) == 'Arabic'
assert unicodedata.script(unichr(0x056C)) == 'Armenian'
assert unicodedata.script(unichr(0x10B27)) == 'Avestan'
assert unicodedata.script(unichr(0x1B41)) == 'Balinese'
assert unicodedata.script(unichr(0x168AD)) == 'Bamum'
assert unicodedata.script(unichr(0x16ADD)) == 'Bassa_Vah'
assert unicodedata.script(unichr(0x1BE5)) == 'Batak'
assert unicodedata.script(unichr(0x09F3)) == 'Bengali'
assert unicodedata.script(unichr(0x11C5B)) == 'Bhaiksuki'
assert unicodedata.script(unichr(0x3126)) == 'Bopomofo'
assert unicodedata.script(unichr(0x1103B)) == 'Brahmi'
assert unicodedata.script(unichr(0x2849)) == 'Braille'
assert unicodedata.script(unichr(0x1A0A)) == 'Buginese'
assert unicodedata.script(unichr(0x174E)) == 'Buhid'
assert unicodedata.script(unichr(0x18EE)) == 'Canadian_Aboriginal'
assert unicodedata.script(unichr(0x102B7)) == 'Carian'
assert unicodedata.script(unichr(0x1053D)) == 'Caucasian_Albanian'
assert unicodedata.script(unichr(0x11123)) == 'Chakma'
assert unicodedata.script(unichr(0x145A0)) == 'Hluw'
assert unicodedata.script(unichr(0x0607)) == 'Arab'
assert unicodedata.script(unichr(0x056C)) == 'Armn'
assert unicodedata.script(unichr(0x10B27)) == 'Avst'
assert unicodedata.script(unichr(0x1B41)) == 'Bali'
assert unicodedata.script(unichr(0x168AD)) == 'Bamu'
assert unicodedata.script(unichr(0x16ADD)) == 'Bass'
assert unicodedata.script(unichr(0x1BE5)) == 'Batk'
assert unicodedata.script(unichr(0x09F3)) == 'Beng'
assert unicodedata.script(unichr(0x11C5B)) == 'Bhks'
assert unicodedata.script(unichr(0x3126)) == 'Bopo'
assert unicodedata.script(unichr(0x1103B)) == 'Brah'
assert unicodedata.script(unichr(0x2849)) == 'Brai'
assert unicodedata.script(unichr(0x1A0A)) == 'Bugi'
assert unicodedata.script(unichr(0x174E)) == 'Buhd'
assert unicodedata.script(unichr(0x18EE)) == 'Cans'
assert unicodedata.script(unichr(0x102B7)) == 'Cari'
assert unicodedata.script(unichr(0x1053D)) == 'Aghb'
assert unicodedata.script(unichr(0x11123)) == 'Cakm'
assert unicodedata.script(unichr(0xAA1F)) == 'Cham'
assert unicodedata.script(unichr(0xAB95)) == 'Cherokee'
assert unicodedata.script(unichr(0x1F0C7)) == 'Common'
assert unicodedata.script(unichr(0x2C85)) == 'Coptic'
assert unicodedata.script(unichr(0x12014)) == 'Cuneiform'
assert unicodedata.script(unichr(0x1082E)) == 'Cypriot'
assert unicodedata.script(unichr(0xA686)) == 'Cyrillic'
assert unicodedata.script(unichr(0x10417)) == 'Deseret'
assert unicodedata.script(unichr(0x093E)) == 'Devanagari'
assert unicodedata.script(unichr(0x1BC4B)) == 'Duployan'
assert unicodedata.script(unichr(0x1310C)) == 'Egyptian_Hieroglyphs'
assert unicodedata.script(unichr(0x1051C)) == 'Elbasan'
assert unicodedata.script(unichr(0x2DA6)) == 'Ethiopic'
assert unicodedata.script(unichr(0x10AD)) == 'Georgian'
assert unicodedata.script(unichr(0x2C52)) == 'Glagolitic'
assert unicodedata.script(unichr(0x10343)) == 'Gothic'
assert unicodedata.script(unichr(0x11371)) == 'Grantha'
assert unicodedata.script(unichr(0x03D0)) == 'Greek'
assert unicodedata.script(unichr(0x0AAA)) == 'Gujarati'
assert unicodedata.script(unichr(0x0A4C)) == 'Gurmukhi'
assert unicodedata.script(unichr(0x23C9F)) == 'Han'
assert unicodedata.script(unichr(0xC259)) == 'Hangul'
assert unicodedata.script(unichr(0x1722)) == 'Hanunoo'
assert unicodedata.script(unichr(0x108F5)) == 'Hatran'
assert unicodedata.script(unichr(0x05C2)) == 'Hebrew'
assert unicodedata.script(unichr(0x1B072)) == 'Hiragana'
assert unicodedata.script(unichr(0x10847)) == 'Imperial_Aramaic'
assert unicodedata.script(unichr(0x033A)) == 'Inherited'
assert unicodedata.script(unichr(0x10B66)) == 'Inscriptional_Pahlavi'
assert unicodedata.script(unichr(0x10B4B)) == 'Inscriptional_Parthian'
assert unicodedata.script(unichr(0xA98A)) == 'Javanese'
assert unicodedata.script(unichr(0x110B2)) == 'Kaithi'
assert unicodedata.script(unichr(0x0CC6)) == 'Kannada'
assert unicodedata.script(unichr(0x3337)) == 'Katakana'
assert unicodedata.script(unichr(0xA915)) == 'Kayah_Li'
assert unicodedata.script(unichr(0x10A2E)) == 'Kharoshthi'
assert unicodedata.script(unichr(0x17AA)) == 'Khmer'
assert unicodedata.script(unichr(0x11225)) == 'Khojki'
assert unicodedata.script(unichr(0x112B6)) == 'Khudawadi'
assert unicodedata.script(unichr(0x0ED7)) == 'Lao'
assert unicodedata.script(unichr(0xAB3C)) == 'Latin'
assert unicodedata.script(unichr(0x1C48)) == 'Lepcha'
assert unicodedata.script(unichr(0x1923)) == 'Limbu'
assert unicodedata.script(unichr(0x1071D)) == 'Linear_A'
assert unicodedata.script(unichr(0x100EC)) == 'Linear_B'
assert unicodedata.script(unichr(0xAB95)) == 'Cher'
assert unicodedata.script(unichr(0x1F0C7)) == 'Zyyy'
assert unicodedata.script(unichr(0x2C85)) == 'Copt'
assert unicodedata.script(unichr(0x12014)) == 'Xsux'
assert unicodedata.script(unichr(0x1082E)) == 'Cprt'
assert unicodedata.script(unichr(0xA686)) == 'Cyrl'
assert unicodedata.script(unichr(0x10417)) == 'Dsrt'
assert unicodedata.script(unichr(0x093E)) == 'Deva'
assert unicodedata.script(unichr(0x1BC4B)) == 'Dupl'
assert unicodedata.script(unichr(0x1310C)) == 'Egyp'
assert unicodedata.script(unichr(0x1051C)) == 'Elba'
assert unicodedata.script(unichr(0x2DA6)) == 'Ethi'
assert unicodedata.script(unichr(0x10AD)) == 'Geor'
assert unicodedata.script(unichr(0x2C52)) == 'Glag'
assert unicodedata.script(unichr(0x10343)) == 'Goth'
assert unicodedata.script(unichr(0x11371)) == 'Gran'
assert unicodedata.script(unichr(0x03D0)) == 'Grek'
assert unicodedata.script(unichr(0x0AAA)) == 'Gujr'
assert unicodedata.script(unichr(0x0A4C)) == 'Guru'
assert unicodedata.script(unichr(0x23C9F)) == 'Hani'
assert unicodedata.script(unichr(0xC259)) == 'Hang'
assert unicodedata.script(unichr(0x1722)) == 'Hano'
assert unicodedata.script(unichr(0x108F5)) == 'Hatr'
assert unicodedata.script(unichr(0x05C2)) == 'Hebr'
assert unicodedata.script(unichr(0x1B072)) == 'Hira'
assert unicodedata.script(unichr(0x10847)) == 'Armi'
assert unicodedata.script(unichr(0x033A)) == 'Zinh'
assert unicodedata.script(unichr(0x10B66)) == 'Phli'
assert unicodedata.script(unichr(0x10B4B)) == 'Prti'
assert unicodedata.script(unichr(0xA98A)) == 'Java'
assert unicodedata.script(unichr(0x110B2)) == 'Kthi'
assert unicodedata.script(unichr(0x0CC6)) == 'Knda'
assert unicodedata.script(unichr(0x3337)) == 'Kana'
assert unicodedata.script(unichr(0xA915)) == 'Kali'
assert unicodedata.script(unichr(0x10A2E)) == 'Khar'
assert unicodedata.script(unichr(0x17AA)) == 'Khmr'
assert unicodedata.script(unichr(0x11225)) == 'Khoj'
assert unicodedata.script(unichr(0x112B6)) == 'Sind'
assert unicodedata.script(unichr(0x0ED7)) == 'Laoo'
assert unicodedata.script(unichr(0xAB3C)) == 'Latn'
assert unicodedata.script(unichr(0x1C48)) == 'Lepc'
assert unicodedata.script(unichr(0x1923)) == 'Limb'
assert unicodedata.script(unichr(0x1071D)) == 'Lina'
assert unicodedata.script(unichr(0x100EC)) == 'Linb'
assert unicodedata.script(unichr(0xA4E9)) == 'Lisu'
assert unicodedata.script(unichr(0x10284)) == 'Lycian'
assert unicodedata.script(unichr(0x10926)) == 'Lydian'
assert unicodedata.script(unichr(0x11161)) == 'Mahajani'
assert unicodedata.script(unichr(0x0D56)) == 'Malayalam'
assert unicodedata.script(unichr(0x0856)) == 'Mandaic'
assert unicodedata.script(unichr(0x10AF0)) == 'Manichaean'
assert unicodedata.script(unichr(0x11CB0)) == 'Marchen'
assert unicodedata.script(unichr(0x11D28)) == 'Masaram_Gondi'
assert unicodedata.script(unichr(0xABDD)) == 'Meetei_Mayek'
assert unicodedata.script(unichr(0x1E897)) == 'Mende_Kikakui'
assert unicodedata.script(unichr(0x109B0)) == 'Meroitic_Cursive'
assert unicodedata.script(unichr(0x10993)) == 'Meroitic_Hieroglyphs'
assert unicodedata.script(unichr(0x16F5D)) == 'Miao'
assert unicodedata.script(unichr(0x10284)) == 'Lyci'
assert unicodedata.script(unichr(0x10926)) == 'Lydi'
assert unicodedata.script(unichr(0x11161)) == 'Mahj'
assert unicodedata.script(unichr(0x0D56)) == 'Mlym'
assert unicodedata.script(unichr(0x0856)) == 'Mand'
assert unicodedata.script(unichr(0x10AF0)) == 'Mani'
assert unicodedata.script(unichr(0x11CB0)) == 'Marc'
assert unicodedata.script(unichr(0x11D28)) == 'Gonm'
assert unicodedata.script(unichr(0xABDD)) == 'Mtei'
assert unicodedata.script(unichr(0x1E897)) == 'Mend'
assert unicodedata.script(unichr(0x109B0)) == 'Merc'
assert unicodedata.script(unichr(0x10993)) == 'Mero'
assert unicodedata.script(unichr(0x16F5D)) == 'Plrd'
assert unicodedata.script(unichr(0x1160B)) == 'Modi'
assert unicodedata.script(unichr(0x18A8)) == 'Mongolian'
assert unicodedata.script(unichr(0x16A48)) == 'Mro'
assert unicodedata.script(unichr(0x1128C)) == 'Multani'
assert unicodedata.script(unichr(0x105B)) == 'Myanmar'
assert unicodedata.script(unichr(0x108AF)) == 'Nabataean'
assert unicodedata.script(unichr(0x19B3)) == 'New_Tai_Lue'
assert unicodedata.script(unichr(0x18A8)) == 'Mong'
assert unicodedata.script(unichr(0x16A48)) == 'Mroo'
assert unicodedata.script(unichr(0x1128C)) == 'Mult'
assert unicodedata.script(unichr(0x105B)) == 'Mymr'
assert unicodedata.script(unichr(0x108AF)) == 'Nbat'
assert unicodedata.script(unichr(0x19B3)) == 'Talu'
assert unicodedata.script(unichr(0x1143D)) == 'Newa'
assert unicodedata.script(unichr(0x07F4)) == 'Nko'
assert unicodedata.script(unichr(0x1B192)) == 'Nushu'
assert unicodedata.script(unichr(0x169C)) == 'Ogham'
assert unicodedata.script(unichr(0x1C56)) == 'Ol_Chiki'
assert unicodedata.script(unichr(0x10CE9)) == 'Old_Hungarian'
assert unicodedata.script(unichr(0x10316)) == 'Old_Italic'
assert unicodedata.script(unichr(0x10A93)) == 'Old_North_Arabian'
assert unicodedata.script(unichr(0x1035A)) == 'Old_Permic'
assert unicodedata.script(unichr(0x103D5)) == 'Old_Persian'
assert unicodedata.script(unichr(0x10A65)) == 'Old_South_Arabian'
assert unicodedata.script(unichr(0x10C09)) == 'Old_Turkic'
assert unicodedata.script(unichr(0x0B60)) == 'Oriya'
assert unicodedata.script(unichr(0x104CF)) == 'Osage'
assert unicodedata.script(unichr(0x104A8)) == 'Osmanya'
assert unicodedata.script(unichr(0x16B12)) == 'Pahawh_Hmong'
assert unicodedata.script(unichr(0x10879)) == 'Palmyrene'
assert unicodedata.script(unichr(0x11AF1)) == 'Pau_Cin_Hau'
assert unicodedata.script(unichr(0xA869)) == 'Phags_Pa'
assert unicodedata.script(unichr(0x10909)) == 'Phoenician'
assert unicodedata.script(unichr(0x10B81)) == 'Psalter_Pahlavi'
assert unicodedata.script(unichr(0xA941)) == 'Rejang'
assert unicodedata.script(unichr(0x16C3)) == 'Runic'
assert unicodedata.script(unichr(0x0814)) == 'Samaritan'
assert unicodedata.script(unichr(0xA88C)) == 'Saurashtra'
assert unicodedata.script(unichr(0x111C8)) == 'Sharada'
assert unicodedata.script(unichr(0x1045F)) == 'Shavian'
assert unicodedata.script(unichr(0x115AD)) == 'Siddham'
assert unicodedata.script(unichr(0x1D8C0)) == 'SignWriting'
assert unicodedata.script(unichr(0x0DB9)) == 'Sinhala'
assert unicodedata.script(unichr(0x110F9)) == 'Sora_Sompeng'
assert unicodedata.script(unichr(0x11A60)) == 'Soyombo'
assert unicodedata.script(unichr(0x1B94)) == 'Sundanese'
assert unicodedata.script(unichr(0xA81F)) == 'Syloti_Nagri'
assert unicodedata.script(unichr(0x0740)) == 'Syriac'
assert unicodedata.script(unichr(0x1714)) == 'Tagalog'
assert unicodedata.script(unichr(0x1761)) == 'Tagbanwa'
assert unicodedata.script(unichr(0x1965)) == 'Tai_Le'
assert unicodedata.script(unichr(0x1A32)) == 'Tai_Tham'
assert unicodedata.script(unichr(0xAA86)) == 'Tai_Viet'
assert unicodedata.script(unichr(0x116A5)) == 'Takri'
assert unicodedata.script(unichr(0x0B8E)) == 'Tamil'
assert unicodedata.script(unichr(0x1754D)) == 'Tangut'
assert unicodedata.script(unichr(0x0C40)) == 'Telugu'
assert unicodedata.script(unichr(0x07A4)) == 'Thaana'
assert unicodedata.script(unichr(0x07F4)) == 'Nkoo'
assert unicodedata.script(unichr(0x1B192)) == 'Nshu'
assert unicodedata.script(unichr(0x169C)) == 'Ogam'
assert unicodedata.script(unichr(0x1C56)) == 'Olck'
assert unicodedata.script(unichr(0x10CE9)) == 'Hung'
assert unicodedata.script(unichr(0x10316)) == 'Ital'
assert unicodedata.script(unichr(0x10A93)) == 'Narb'
assert unicodedata.script(unichr(0x1035A)) == 'Perm'
assert unicodedata.script(unichr(0x103D5)) == 'Xpeo'
assert unicodedata.script(unichr(0x10A65)) == 'Sarb'
assert unicodedata.script(unichr(0x10C09)) == 'Orkh'
assert unicodedata.script(unichr(0x0B60)) == 'Orya'
assert unicodedata.script(unichr(0x104CF)) == 'Osge'
assert unicodedata.script(unichr(0x104A8)) == 'Osma'
assert unicodedata.script(unichr(0x16B12)) == 'Hmng'
assert unicodedata.script(unichr(0x10879)) == 'Palm'
assert unicodedata.script(unichr(0x11AF1)) == 'Pauc'
assert unicodedata.script(unichr(0xA869)) == 'Phag'
assert unicodedata.script(unichr(0x10909)) == 'Phnx'
assert unicodedata.script(unichr(0x10B81)) == 'Phlp'
assert unicodedata.script(unichr(0xA941)) == 'Rjng'
assert unicodedata.script(unichr(0x16C3)) == 'Runr'
assert unicodedata.script(unichr(0x0814)) == 'Samr'
assert unicodedata.script(unichr(0xA88C)) == 'Saur'
assert unicodedata.script(unichr(0x111C8)) == 'Shrd'
assert unicodedata.script(unichr(0x1045F)) == 'Shaw'
assert unicodedata.script(unichr(0x115AD)) == 'Sidd'
assert unicodedata.script(unichr(0x1D8C0)) == 'Sgnw'
assert unicodedata.script(unichr(0x0DB9)) == 'Sinh'
assert unicodedata.script(unichr(0x110F9)) == 'Sora'
assert unicodedata.script(unichr(0x11A60)) == 'Soyo'
assert unicodedata.script(unichr(0x1B94)) == 'Sund'
assert unicodedata.script(unichr(0xA81F)) == 'Sylo'
assert unicodedata.script(unichr(0x0740)) == 'Syrc'
assert unicodedata.script(unichr(0x1714)) == 'Tglg'
assert unicodedata.script(unichr(0x1761)) == 'Tagb'
assert unicodedata.script(unichr(0x1965)) == 'Tale'
assert unicodedata.script(unichr(0x1A32)) == 'Lana'
assert unicodedata.script(unichr(0xAA86)) == 'Tavt'
assert unicodedata.script(unichr(0x116A5)) == 'Takr'
assert unicodedata.script(unichr(0x0B8E)) == 'Taml'
assert unicodedata.script(unichr(0x1754D)) == 'Tang'
assert unicodedata.script(unichr(0x0C40)) == 'Telu'
assert unicodedata.script(unichr(0x07A4)) == 'Thaa'
assert unicodedata.script(unichr(0x0E42)) == 'Thai'
assert unicodedata.script(unichr(0x0F09)) == 'Tibetan'
assert unicodedata.script(unichr(0x2D3A)) == 'Tifinagh'
assert unicodedata.script(unichr(0x114B0)) == 'Tirhuta'
assert unicodedata.script(unichr(0x1038B)) == 'Ugaritic'
assert unicodedata.script(unichr(0xA585)) == 'Vai'
assert unicodedata.script(unichr(0x118CF)) == 'Warang_Citi'
assert unicodedata.script(unichr(0xA066)) == 'Yi'
assert unicodedata.script(unichr(0x11A31)) == 'Zanabazar_Square'
assert unicodedata.script(unichr(0x0F09)) == 'Tibt'
assert unicodedata.script(unichr(0x2D3A)) == 'Tfng'
assert unicodedata.script(unichr(0x114B0)) == 'Tirh'
assert unicodedata.script(unichr(0x1038B)) == 'Ugar'
assert unicodedata.script(unichr(0xA585)) == 'Vaii'
assert unicodedata.script(unichr(0x118CF)) == 'Wara'
assert unicodedata.script(unichr(0xA066)) == 'Yiii'
assert unicodedata.script(unichr(0x11A31)) == 'Zanb'
def test_script_extension():
assert unicodedata.script_extension("a") == {"Latin"}
assert unicodedata.script_extension(unichr(0)) == {"Common"}
assert unicodedata.script_extension(unichr(0x0378)) == {"Unknown"}
assert unicodedata.script_extension(unichr(0x10FFFF)) == {"Unknown"}
assert unicodedata.script_extension("a") == {"Latn"}
assert unicodedata.script_extension(unichr(0)) == {"Zyyy"}
assert unicodedata.script_extension(unichr(0x0378)) == {"Zzzz"}
assert unicodedata.script_extension(unichr(0x10FFFF)) == {"Zzzz"}
assert unicodedata.script_extension("\u0660") == {'Arab', 'Thaa'}
assert unicodedata.script_extension("\u0964") == {