diff --git a/Lib/fontTools/unicodedata/Blocks.py b/Lib/fontTools/unicodedata/Blocks.py new file mode 100644 index 000000000..cba4e9e94 --- /dev/null +++ b/Lib/fontTools/unicodedata/Blocks.py @@ -0,0 +1,677 @@ +# -*- coding: utf-8 -*- +# +# NOTE: This file was auto-generated with MetaTools/buildUCD.py. +# Source: https://unicode.org/Public/UNIDATA/Blocks.txt +# License: http://unicode.org/copyright.html#License +# +# Blocks-10.0.0.txt +# Date: 2017-04-12, 17:30:00 GMT [KW] +# © 2017 Unicode®, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Format: +# Start Code..End Code; Block Name + + +RANGES = [ + 0x0000, # .. 0x007F ; Basic Latin + 0x0080, # .. 0x00FF ; Latin-1 Supplement + 0x0100, # .. 0x017F ; Latin Extended-A + 0x0180, # .. 0x024F ; Latin Extended-B + 0x0250, # .. 0x02AF ; IPA Extensions + 0x02B0, # .. 0x02FF ; Spacing Modifier Letters + 0x0300, # .. 0x036F ; Combining Diacritical Marks + 0x0370, # .. 0x03FF ; Greek and Coptic + 0x0400, # .. 0x04FF ; Cyrillic + 0x0500, # .. 0x052F ; Cyrillic Supplement + 0x0530, # .. 0x058F ; Armenian + 0x0590, # .. 0x05FF ; Hebrew + 0x0600, # .. 0x06FF ; Arabic + 0x0700, # .. 0x074F ; Syriac + 0x0750, # .. 0x077F ; Arabic Supplement + 0x0780, # .. 0x07BF ; Thaana + 0x07C0, # .. 0x07FF ; NKo + 0x0800, # .. 0x083F ; Samaritan + 0x0840, # .. 0x085F ; Mandaic + 0x0860, # .. 0x086F ; Syriac Supplement + 0x0870, # .. 0x089F ; No_Block + 0x08A0, # .. 0x08FF ; Arabic Extended-A + 0x0900, # .. 0x097F ; Devanagari + 0x0980, # .. 0x09FF ; Bengali + 0x0A00, # .. 0x0A7F ; Gurmukhi + 0x0A80, # .. 0x0AFF ; Gujarati + 0x0B00, # .. 0x0B7F ; Oriya + 0x0B80, # .. 0x0BFF ; Tamil + 0x0C00, # .. 0x0C7F ; Telugu + 0x0C80, # .. 0x0CFF ; Kannada + 0x0D00, # .. 0x0D7F ; Malayalam + 0x0D80, # .. 0x0DFF ; Sinhala + 0x0E00, # .. 0x0E7F ; Thai + 0x0E80, # .. 0x0EFF ; Lao + 0x0F00, # .. 0x0FFF ; Tibetan + 0x1000, # .. 0x109F ; Myanmar + 0x10A0, # .. 0x10FF ; Georgian + 0x1100, # .. 0x11FF ; Hangul Jamo + 0x1200, # .. 0x137F ; Ethiopic + 0x1380, # .. 0x139F ; Ethiopic Supplement + 0x13A0, # .. 0x13FF ; Cherokee + 0x1400, # .. 0x167F ; Unified Canadian Aboriginal Syllabics + 0x1680, # .. 0x169F ; Ogham + 0x16A0, # .. 0x16FF ; Runic + 0x1700, # .. 0x171F ; Tagalog + 0x1720, # .. 0x173F ; Hanunoo + 0x1740, # .. 0x175F ; Buhid + 0x1760, # .. 0x177F ; Tagbanwa + 0x1780, # .. 0x17FF ; Khmer + 0x1800, # .. 0x18AF ; Mongolian + 0x18B0, # .. 0x18FF ; Unified Canadian Aboriginal Syllabics Extended + 0x1900, # .. 0x194F ; Limbu + 0x1950, # .. 0x197F ; Tai Le + 0x1980, # .. 0x19DF ; New Tai Lue + 0x19E0, # .. 0x19FF ; Khmer Symbols + 0x1A00, # .. 0x1A1F ; Buginese + 0x1A20, # .. 0x1AAF ; Tai Tham + 0x1AB0, # .. 0x1AFF ; Combining Diacritical Marks Extended + 0x1B00, # .. 0x1B7F ; Balinese + 0x1B80, # .. 0x1BBF ; Sundanese + 0x1BC0, # .. 0x1BFF ; Batak + 0x1C00, # .. 0x1C4F ; Lepcha + 0x1C50, # .. 0x1C7F ; Ol Chiki + 0x1C80, # .. 0x1C8F ; Cyrillic Extended-C + 0x1C90, # .. 0x1CBF ; No_Block + 0x1CC0, # .. 0x1CCF ; Sundanese Supplement + 0x1CD0, # .. 0x1CFF ; Vedic Extensions + 0x1D00, # .. 0x1D7F ; Phonetic Extensions + 0x1D80, # .. 0x1DBF ; Phonetic Extensions Supplement + 0x1DC0, # .. 0x1DFF ; Combining Diacritical Marks Supplement + 0x1E00, # .. 0x1EFF ; Latin Extended Additional + 0x1F00, # .. 0x1FFF ; Greek Extended + 0x2000, # .. 0x206F ; General Punctuation + 0x2070, # .. 0x209F ; Superscripts and Subscripts + 0x20A0, # .. 0x20CF ; Currency Symbols + 0x20D0, # .. 0x20FF ; Combining Diacritical Marks for Symbols + 0x2100, # .. 0x214F ; Letterlike Symbols + 0x2150, # .. 0x218F ; Number Forms + 0x2190, # .. 0x21FF ; Arrows + 0x2200, # .. 0x22FF ; Mathematical Operators + 0x2300, # .. 0x23FF ; Miscellaneous Technical + 0x2400, # .. 0x243F ; Control Pictures + 0x2440, # .. 0x245F ; Optical Character Recognition + 0x2460, # .. 0x24FF ; Enclosed Alphanumerics + 0x2500, # .. 0x257F ; Box Drawing + 0x2580, # .. 0x259F ; Block Elements + 0x25A0, # .. 0x25FF ; Geometric Shapes + 0x2600, # .. 0x26FF ; Miscellaneous Symbols + 0x2700, # .. 0x27BF ; Dingbats + 0x27C0, # .. 0x27EF ; Miscellaneous Mathematical Symbols-A + 0x27F0, # .. 0x27FF ; Supplemental Arrows-A + 0x2800, # .. 0x28FF ; Braille Patterns + 0x2900, # .. 0x297F ; Supplemental Arrows-B + 0x2980, # .. 0x29FF ; Miscellaneous Mathematical Symbols-B + 0x2A00, # .. 0x2AFF ; Supplemental Mathematical Operators + 0x2B00, # .. 0x2BFF ; Miscellaneous Symbols and Arrows + 0x2C00, # .. 0x2C5F ; Glagolitic + 0x2C60, # .. 0x2C7F ; Latin Extended-C + 0x2C80, # .. 0x2CFF ; Coptic + 0x2D00, # .. 0x2D2F ; Georgian Supplement + 0x2D30, # .. 0x2D7F ; Tifinagh + 0x2D80, # .. 0x2DDF ; Ethiopic Extended + 0x2DE0, # .. 0x2DFF ; Cyrillic Extended-A + 0x2E00, # .. 0x2E7F ; Supplemental Punctuation + 0x2E80, # .. 0x2EFF ; CJK Radicals Supplement + 0x2F00, # .. 0x2FDF ; Kangxi Radicals + 0x2FE0, # .. 0x2FEF ; No_Block + 0x2FF0, # .. 0x2FFF ; Ideographic Description Characters + 0x3000, # .. 0x303F ; CJK Symbols and Punctuation + 0x3040, # .. 0x309F ; Hiragana + 0x30A0, # .. 0x30FF ; Katakana + 0x3100, # .. 0x312F ; Bopomofo + 0x3130, # .. 0x318F ; Hangul Compatibility Jamo + 0x3190, # .. 0x319F ; Kanbun + 0x31A0, # .. 0x31BF ; Bopomofo Extended + 0x31C0, # .. 0x31EF ; CJK Strokes + 0x31F0, # .. 0x31FF ; Katakana Phonetic Extensions + 0x3200, # .. 0x32FF ; Enclosed CJK Letters and Months + 0x3300, # .. 0x33FF ; CJK Compatibility + 0x3400, # .. 0x4DBF ; CJK Unified Ideographs Extension A + 0x4DC0, # .. 0x4DFF ; Yijing Hexagram Symbols + 0x4E00, # .. 0x9FFF ; CJK Unified Ideographs + 0xA000, # .. 0xA48F ; Yi Syllables + 0xA490, # .. 0xA4CF ; Yi Radicals + 0xA4D0, # .. 0xA4FF ; Lisu + 0xA500, # .. 0xA63F ; Vai + 0xA640, # .. 0xA69F ; Cyrillic Extended-B + 0xA6A0, # .. 0xA6FF ; Bamum + 0xA700, # .. 0xA71F ; Modifier Tone Letters + 0xA720, # .. 0xA7FF ; Latin Extended-D + 0xA800, # .. 0xA82F ; Syloti Nagri + 0xA830, # .. 0xA83F ; Common Indic Number Forms + 0xA840, # .. 0xA87F ; Phags-pa + 0xA880, # .. 0xA8DF ; Saurashtra + 0xA8E0, # .. 0xA8FF ; Devanagari Extended + 0xA900, # .. 0xA92F ; Kayah Li + 0xA930, # .. 0xA95F ; Rejang + 0xA960, # .. 0xA97F ; Hangul Jamo Extended-A + 0xA980, # .. 0xA9DF ; Javanese + 0xA9E0, # .. 0xA9FF ; Myanmar Extended-B + 0xAA00, # .. 0xAA5F ; Cham + 0xAA60, # .. 0xAA7F ; Myanmar Extended-A + 0xAA80, # .. 0xAADF ; Tai Viet + 0xAAE0, # .. 0xAAFF ; Meetei Mayek Extensions + 0xAB00, # .. 0xAB2F ; Ethiopic Extended-A + 0xAB30, # .. 0xAB6F ; Latin Extended-E + 0xAB70, # .. 0xABBF ; Cherokee Supplement + 0xABC0, # .. 0xABFF ; Meetei Mayek + 0xAC00, # .. 0xD7AF ; Hangul Syllables + 0xD7B0, # .. 0xD7FF ; Hangul Jamo Extended-B + 0xD800, # .. 0xDB7F ; High Surrogates + 0xDB80, # .. 0xDBFF ; High Private Use Surrogates + 0xDC00, # .. 0xDFFF ; Low Surrogates + 0xE000, # .. 0xF8FF ; Private Use Area + 0xF900, # .. 0xFAFF ; CJK Compatibility Ideographs + 0xFB00, # .. 0xFB4F ; Alphabetic Presentation Forms + 0xFB50, # .. 0xFDFF ; Arabic Presentation Forms-A + 0xFE00, # .. 0xFE0F ; Variation Selectors + 0xFE10, # .. 0xFE1F ; Vertical Forms + 0xFE20, # .. 0xFE2F ; Combining Half Marks + 0xFE30, # .. 0xFE4F ; CJK Compatibility Forms + 0xFE50, # .. 0xFE6F ; Small Form Variants + 0xFE70, # .. 0xFEFF ; Arabic Presentation Forms-B + 0xFF00, # .. 0xFFEF ; Halfwidth and Fullwidth Forms + 0xFFF0, # .. 0xFFFF ; Specials + 0x10000, # .. 0x1007F ; Linear B Syllabary + 0x10080, # .. 0x100FF ; Linear B Ideograms + 0x10100, # .. 0x1013F ; Aegean Numbers + 0x10140, # .. 0x1018F ; Ancient Greek Numbers + 0x10190, # .. 0x101CF ; Ancient Symbols + 0x101D0, # .. 0x101FF ; Phaistos Disc + 0x10200, # .. 0x1027F ; No_Block + 0x10280, # .. 0x1029F ; Lycian + 0x102A0, # .. 0x102DF ; Carian + 0x102E0, # .. 0x102FF ; Coptic Epact Numbers + 0x10300, # .. 0x1032F ; Old Italic + 0x10330, # .. 0x1034F ; Gothic + 0x10350, # .. 0x1037F ; Old Permic + 0x10380, # .. 0x1039F ; Ugaritic + 0x103A0, # .. 0x103DF ; Old Persian + 0x103E0, # .. 0x103FF ; No_Block + 0x10400, # .. 0x1044F ; Deseret + 0x10450, # .. 0x1047F ; Shavian + 0x10480, # .. 0x104AF ; Osmanya + 0x104B0, # .. 0x104FF ; Osage + 0x10500, # .. 0x1052F ; Elbasan + 0x10530, # .. 0x1056F ; Caucasian Albanian + 0x10570, # .. 0x105FF ; No_Block + 0x10600, # .. 0x1077F ; Linear A + 0x10780, # .. 0x107FF ; No_Block + 0x10800, # .. 0x1083F ; Cypriot Syllabary + 0x10840, # .. 0x1085F ; Imperial Aramaic + 0x10860, # .. 0x1087F ; Palmyrene + 0x10880, # .. 0x108AF ; Nabataean + 0x108B0, # .. 0x108DF ; No_Block + 0x108E0, # .. 0x108FF ; Hatran + 0x10900, # .. 0x1091F ; Phoenician + 0x10920, # .. 0x1093F ; Lydian + 0x10940, # .. 0x1097F ; No_Block + 0x10980, # .. 0x1099F ; Meroitic Hieroglyphs + 0x109A0, # .. 0x109FF ; Meroitic Cursive + 0x10A00, # .. 0x10A5F ; Kharoshthi + 0x10A60, # .. 0x10A7F ; Old South Arabian + 0x10A80, # .. 0x10A9F ; Old North Arabian + 0x10AA0, # .. 0x10ABF ; No_Block + 0x10AC0, # .. 0x10AFF ; Manichaean + 0x10B00, # .. 0x10B3F ; Avestan + 0x10B40, # .. 0x10B5F ; Inscriptional Parthian + 0x10B60, # .. 0x10B7F ; Inscriptional Pahlavi + 0x10B80, # .. 0x10BAF ; Psalter Pahlavi + 0x10BB0, # .. 0x10BFF ; No_Block + 0x10C00, # .. 0x10C4F ; Old Turkic + 0x10C50, # .. 0x10C7F ; No_Block + 0x10C80, # .. 0x10CFF ; Old Hungarian + 0x10D00, # .. 0x10E5F ; No_Block + 0x10E60, # .. 0x10E7F ; Rumi Numeral Symbols + 0x10E80, # .. 0x10FFF ; No_Block + 0x11000, # .. 0x1107F ; Brahmi + 0x11080, # .. 0x110CF ; Kaithi + 0x110D0, # .. 0x110FF ; Sora Sompeng + 0x11100, # .. 0x1114F ; Chakma + 0x11150, # .. 0x1117F ; Mahajani + 0x11180, # .. 0x111DF ; Sharada + 0x111E0, # .. 0x111FF ; Sinhala Archaic Numbers + 0x11200, # .. 0x1124F ; Khojki + 0x11250, # .. 0x1127F ; No_Block + 0x11280, # .. 0x112AF ; Multani + 0x112B0, # .. 0x112FF ; Khudawadi + 0x11300, # .. 0x1137F ; Grantha + 0x11380, # .. 0x113FF ; No_Block + 0x11400, # .. 0x1147F ; Newa + 0x11480, # .. 0x114DF ; Tirhuta + 0x114E0, # .. 0x1157F ; No_Block + 0x11580, # .. 0x115FF ; Siddham + 0x11600, # .. 0x1165F ; Modi + 0x11660, # .. 0x1167F ; Mongolian Supplement + 0x11680, # .. 0x116CF ; Takri + 0x116D0, # .. 0x116FF ; No_Block + 0x11700, # .. 0x1173F ; Ahom + 0x11740, # .. 0x1189F ; No_Block + 0x118A0, # .. 0x118FF ; Warang Citi + 0x11900, # .. 0x119FF ; No_Block + 0x11A00, # .. 0x11A4F ; Zanabazar Square + 0x11A50, # .. 0x11AAF ; Soyombo + 0x11AB0, # .. 0x11ABF ; No_Block + 0x11AC0, # .. 0x11AFF ; Pau Cin Hau + 0x11B00, # .. 0x11BFF ; No_Block + 0x11C00, # .. 0x11C6F ; Bhaiksuki + 0x11C70, # .. 0x11CBF ; Marchen + 0x11CC0, # .. 0x11CFF ; No_Block + 0x11D00, # .. 0x11D5F ; Masaram Gondi + 0x11D60, # .. 0x11FFF ; No_Block + 0x12000, # .. 0x123FF ; Cuneiform + 0x12400, # .. 0x1247F ; Cuneiform Numbers and Punctuation + 0x12480, # .. 0x1254F ; Early Dynastic Cuneiform + 0x12550, # .. 0x12FFF ; No_Block + 0x13000, # .. 0x1342F ; Egyptian Hieroglyphs + 0x13430, # .. 0x143FF ; No_Block + 0x14400, # .. 0x1467F ; Anatolian Hieroglyphs + 0x14680, # .. 0x167FF ; No_Block + 0x16800, # .. 0x16A3F ; Bamum Supplement + 0x16A40, # .. 0x16A6F ; Mro + 0x16A70, # .. 0x16ACF ; No_Block + 0x16AD0, # .. 0x16AFF ; Bassa Vah + 0x16B00, # .. 0x16B8F ; Pahawh Hmong + 0x16B90, # .. 0x16EFF ; No_Block + 0x16F00, # .. 0x16F9F ; Miao + 0x16FA0, # .. 0x16FDF ; No_Block + 0x16FE0, # .. 0x16FFF ; Ideographic Symbols and Punctuation + 0x17000, # .. 0x187FF ; Tangut + 0x18800, # .. 0x18AFF ; Tangut Components + 0x18B00, # .. 0x1AFFF ; No_Block + 0x1B000, # .. 0x1B0FF ; Kana Supplement + 0x1B100, # .. 0x1B12F ; Kana Extended-A + 0x1B130, # .. 0x1B16F ; No_Block + 0x1B170, # .. 0x1B2FF ; Nushu + 0x1B300, # .. 0x1BBFF ; No_Block + 0x1BC00, # .. 0x1BC9F ; Duployan + 0x1BCA0, # .. 0x1BCAF ; Shorthand Format Controls + 0x1BCB0, # .. 0x1CFFF ; No_Block + 0x1D000, # .. 0x1D0FF ; Byzantine Musical Symbols + 0x1D100, # .. 0x1D1FF ; Musical Symbols + 0x1D200, # .. 0x1D24F ; Ancient Greek Musical Notation + 0x1D250, # .. 0x1D2FF ; No_Block + 0x1D300, # .. 0x1D35F ; Tai Xuan Jing Symbols + 0x1D360, # .. 0x1D37F ; Counting Rod Numerals + 0x1D380, # .. 0x1D3FF ; No_Block + 0x1D400, # .. 0x1D7FF ; Mathematical Alphanumeric Symbols + 0x1D800, # .. 0x1DAAF ; Sutton SignWriting + 0x1DAB0, # .. 0x1DFFF ; No_Block + 0x1E000, # .. 0x1E02F ; Glagolitic Supplement + 0x1E030, # .. 0x1E7FF ; No_Block + 0x1E800, # .. 0x1E8DF ; Mende Kikakui + 0x1E8E0, # .. 0x1E8FF ; No_Block + 0x1E900, # .. 0x1E95F ; Adlam + 0x1E960, # .. 0x1EDFF ; No_Block + 0x1EE00, # .. 0x1EEFF ; Arabic Mathematical Alphabetic Symbols + 0x1EF00, # .. 0x1EFFF ; No_Block + 0x1F000, # .. 0x1F02F ; Mahjong Tiles + 0x1F030, # .. 0x1F09F ; Domino Tiles + 0x1F0A0, # .. 0x1F0FF ; Playing Cards + 0x1F100, # .. 0x1F1FF ; Enclosed Alphanumeric Supplement + 0x1F200, # .. 0x1F2FF ; Enclosed Ideographic Supplement + 0x1F300, # .. 0x1F5FF ; Miscellaneous Symbols and Pictographs + 0x1F600, # .. 0x1F64F ; Emoticons + 0x1F650, # .. 0x1F67F ; Ornamental Dingbats + 0x1F680, # .. 0x1F6FF ; Transport and Map Symbols + 0x1F700, # .. 0x1F77F ; Alchemical Symbols + 0x1F780, # .. 0x1F7FF ; Geometric Shapes Extended + 0x1F800, # .. 0x1F8FF ; Supplemental Arrows-C + 0x1F900, # .. 0x1F9FF ; Supplemental Symbols and Pictographs + 0x1FA00, # .. 0x1FFFF ; No_Block + 0x20000, # .. 0x2A6DF ; CJK Unified Ideographs Extension B + 0x2A6E0, # .. 0x2A6FF ; No_Block + 0x2A700, # .. 0x2B73F ; CJK Unified Ideographs Extension C + 0x2B740, # .. 0x2B81F ; CJK Unified Ideographs Extension D + 0x2B820, # .. 0x2CEAF ; CJK Unified Ideographs Extension E + 0x2CEB0, # .. 0x2EBEF ; CJK Unified Ideographs Extension F + 0x2EBF0, # .. 0x2F7FF ; No_Block + 0x2F800, # .. 0x2FA1F ; CJK Compatibility Ideographs Supplement + 0x2FA20, # .. 0xDFFFF ; No_Block + 0xE0000, # .. 0xE007F ; Tags + 0xE0080, # .. 0xE00FF ; No_Block + 0xE0100, # .. 0xE01EF ; Variation Selectors Supplement + 0xE01F0, # .. 0xEFFFF ; No_Block + 0xF0000, # .. 0xFFFFF ; Supplementary Private Use Area-A + 0x100000, # .. 0x10FFFF ; Supplementary Private Use Area-B +] + +VALUES = [ + 'Basic Latin', # 0000..007F + 'Latin-1 Supplement', # 0080..00FF + 'Latin Extended-A', # 0100..017F + 'Latin Extended-B', # 0180..024F + 'IPA Extensions', # 0250..02AF + 'Spacing Modifier Letters', # 02B0..02FF + 'Combining Diacritical Marks', # 0300..036F + 'Greek and Coptic', # 0370..03FF + 'Cyrillic', # 0400..04FF + 'Cyrillic Supplement', # 0500..052F + 'Armenian', # 0530..058F + 'Hebrew', # 0590..05FF + 'Arabic', # 0600..06FF + 'Syriac', # 0700..074F + 'Arabic Supplement', # 0750..077F + 'Thaana', # 0780..07BF + 'NKo', # 07C0..07FF + 'Samaritan', # 0800..083F + 'Mandaic', # 0840..085F + 'Syriac Supplement', # 0860..086F + 'No_Block', # 0870..089F + 'Arabic Extended-A', # 08A0..08FF + 'Devanagari', # 0900..097F + 'Bengali', # 0980..09FF + 'Gurmukhi', # 0A00..0A7F + 'Gujarati', # 0A80..0AFF + 'Oriya', # 0B00..0B7F + 'Tamil', # 0B80..0BFF + 'Telugu', # 0C00..0C7F + 'Kannada', # 0C80..0CFF + 'Malayalam', # 0D00..0D7F + 'Sinhala', # 0D80..0DFF + 'Thai', # 0E00..0E7F + 'Lao', # 0E80..0EFF + 'Tibetan', # 0F00..0FFF + 'Myanmar', # 1000..109F + 'Georgian', # 10A0..10FF + 'Hangul Jamo', # 1100..11FF + 'Ethiopic', # 1200..137F + 'Ethiopic Supplement', # 1380..139F + 'Cherokee', # 13A0..13FF + 'Unified Canadian Aboriginal Syllabics', # 1400..167F + 'Ogham', # 1680..169F + 'Runic', # 16A0..16FF + 'Tagalog', # 1700..171F + 'Hanunoo', # 1720..173F + 'Buhid', # 1740..175F + 'Tagbanwa', # 1760..177F + 'Khmer', # 1780..17FF + 'Mongolian', # 1800..18AF + 'Unified Canadian Aboriginal Syllabics Extended', # 18B0..18FF + 'Limbu', # 1900..194F + 'Tai Le', # 1950..197F + 'New Tai Lue', # 1980..19DF + 'Khmer Symbols', # 19E0..19FF + 'Buginese', # 1A00..1A1F + 'Tai Tham', # 1A20..1AAF + 'Combining Diacritical Marks Extended', # 1AB0..1AFF + 'Balinese', # 1B00..1B7F + 'Sundanese', # 1B80..1BBF + 'Batak', # 1BC0..1BFF + 'Lepcha', # 1C00..1C4F + 'Ol Chiki', # 1C50..1C7F + 'Cyrillic Extended-C', # 1C80..1C8F + 'No_Block', # 1C90..1CBF + 'Sundanese Supplement', # 1CC0..1CCF + 'Vedic Extensions', # 1CD0..1CFF + 'Phonetic Extensions', # 1D00..1D7F + 'Phonetic Extensions Supplement', # 1D80..1DBF + 'Combining Diacritical Marks Supplement', # 1DC0..1DFF + 'Latin Extended Additional', # 1E00..1EFF + 'Greek Extended', # 1F00..1FFF + 'General Punctuation', # 2000..206F + 'Superscripts and Subscripts', # 2070..209F + 'Currency Symbols', # 20A0..20CF + 'Combining Diacritical Marks for Symbols', # 20D0..20FF + 'Letterlike Symbols', # 2100..214F + 'Number Forms', # 2150..218F + 'Arrows', # 2190..21FF + 'Mathematical Operators', # 2200..22FF + 'Miscellaneous Technical', # 2300..23FF + 'Control Pictures', # 2400..243F + 'Optical Character Recognition', # 2440..245F + 'Enclosed Alphanumerics', # 2460..24FF + 'Box Drawing', # 2500..257F + 'Block Elements', # 2580..259F + 'Geometric Shapes', # 25A0..25FF + 'Miscellaneous Symbols', # 2600..26FF + 'Dingbats', # 2700..27BF + 'Miscellaneous Mathematical Symbols-A', # 27C0..27EF + 'Supplemental Arrows-A', # 27F0..27FF + 'Braille Patterns', # 2800..28FF + 'Supplemental Arrows-B', # 2900..297F + 'Miscellaneous Mathematical Symbols-B', # 2980..29FF + 'Supplemental Mathematical Operators', # 2A00..2AFF + 'Miscellaneous Symbols and Arrows', # 2B00..2BFF + 'Glagolitic', # 2C00..2C5F + 'Latin Extended-C', # 2C60..2C7F + 'Coptic', # 2C80..2CFF + 'Georgian Supplement', # 2D00..2D2F + 'Tifinagh', # 2D30..2D7F + 'Ethiopic Extended', # 2D80..2DDF + 'Cyrillic Extended-A', # 2DE0..2DFF + 'Supplemental Punctuation', # 2E00..2E7F + 'CJK Radicals Supplement', # 2E80..2EFF + 'Kangxi Radicals', # 2F00..2FDF + 'No_Block', # 2FE0..2FEF + 'Ideographic Description Characters', # 2FF0..2FFF + 'CJK Symbols and Punctuation', # 3000..303F + 'Hiragana', # 3040..309F + 'Katakana', # 30A0..30FF + 'Bopomofo', # 3100..312F + 'Hangul Compatibility Jamo', # 3130..318F + 'Kanbun', # 3190..319F + 'Bopomofo Extended', # 31A0..31BF + 'CJK Strokes', # 31C0..31EF + 'Katakana Phonetic Extensions', # 31F0..31FF + 'Enclosed CJK Letters and Months', # 3200..32FF + 'CJK Compatibility', # 3300..33FF + 'CJK Unified Ideographs Extension A', # 3400..4DBF + 'Yijing Hexagram Symbols', # 4DC0..4DFF + 'CJK Unified Ideographs', # 4E00..9FFF + 'Yi Syllables', # A000..A48F + 'Yi Radicals', # A490..A4CF + 'Lisu', # A4D0..A4FF + 'Vai', # A500..A63F + 'Cyrillic Extended-B', # A640..A69F + 'Bamum', # A6A0..A6FF + 'Modifier Tone Letters', # A700..A71F + 'Latin Extended-D', # A720..A7FF + 'Syloti Nagri', # A800..A82F + 'Common Indic Number Forms', # A830..A83F + 'Phags-pa', # A840..A87F + 'Saurashtra', # A880..A8DF + 'Devanagari Extended', # A8E0..A8FF + 'Kayah Li', # A900..A92F + 'Rejang', # A930..A95F + 'Hangul Jamo Extended-A', # A960..A97F + 'Javanese', # A980..A9DF + 'Myanmar Extended-B', # A9E0..A9FF + 'Cham', # AA00..AA5F + 'Myanmar Extended-A', # AA60..AA7F + 'Tai Viet', # AA80..AADF + 'Meetei Mayek Extensions', # AAE0..AAFF + 'Ethiopic Extended-A', # AB00..AB2F + 'Latin Extended-E', # AB30..AB6F + 'Cherokee Supplement', # AB70..ABBF + 'Meetei Mayek', # ABC0..ABFF + 'Hangul Syllables', # AC00..D7AF + 'Hangul Jamo Extended-B', # D7B0..D7FF + 'High Surrogates', # D800..DB7F + 'High Private Use Surrogates', # DB80..DBFF + 'Low Surrogates', # DC00..DFFF + 'Private Use Area', # E000..F8FF + 'CJK Compatibility Ideographs', # F900..FAFF + 'Alphabetic Presentation Forms', # FB00..FB4F + 'Arabic Presentation Forms-A', # FB50..FDFF + 'Variation Selectors', # FE00..FE0F + 'Vertical Forms', # FE10..FE1F + 'Combining Half Marks', # FE20..FE2F + 'CJK Compatibility Forms', # FE30..FE4F + 'Small Form Variants', # FE50..FE6F + 'Arabic Presentation Forms-B', # FE70..FEFF + 'Halfwidth and Fullwidth Forms', # FF00..FFEF + 'Specials', # FFF0..FFFF + 'Linear B Syllabary', # 10000..1007F + 'Linear B Ideograms', # 10080..100FF + 'Aegean Numbers', # 10100..1013F + 'Ancient Greek Numbers', # 10140..1018F + 'Ancient Symbols', # 10190..101CF + 'Phaistos Disc', # 101D0..101FF + 'No_Block', # 10200..1027F + 'Lycian', # 10280..1029F + 'Carian', # 102A0..102DF + 'Coptic Epact Numbers', # 102E0..102FF + 'Old Italic', # 10300..1032F + 'Gothic', # 10330..1034F + 'Old Permic', # 10350..1037F + 'Ugaritic', # 10380..1039F + 'Old Persian', # 103A0..103DF + 'No_Block', # 103E0..103FF + 'Deseret', # 10400..1044F + 'Shavian', # 10450..1047F + 'Osmanya', # 10480..104AF + 'Osage', # 104B0..104FF + 'Elbasan', # 10500..1052F + 'Caucasian Albanian', # 10530..1056F + 'No_Block', # 10570..105FF + 'Linear A', # 10600..1077F + 'No_Block', # 10780..107FF + 'Cypriot Syllabary', # 10800..1083F + 'Imperial Aramaic', # 10840..1085F + 'Palmyrene', # 10860..1087F + 'Nabataean', # 10880..108AF + 'No_Block', # 108B0..108DF + 'Hatran', # 108E0..108FF + 'Phoenician', # 10900..1091F + 'Lydian', # 10920..1093F + 'No_Block', # 10940..1097F + 'Meroitic Hieroglyphs', # 10980..1099F + 'Meroitic Cursive', # 109A0..109FF + 'Kharoshthi', # 10A00..10A5F + 'Old South Arabian', # 10A60..10A7F + 'Old North Arabian', # 10A80..10A9F + 'No_Block', # 10AA0..10ABF + 'Manichaean', # 10AC0..10AFF + 'Avestan', # 10B00..10B3F + 'Inscriptional Parthian', # 10B40..10B5F + 'Inscriptional Pahlavi', # 10B60..10B7F + 'Psalter Pahlavi', # 10B80..10BAF + 'No_Block', # 10BB0..10BFF + 'Old Turkic', # 10C00..10C4F + 'No_Block', # 10C50..10C7F + 'Old Hungarian', # 10C80..10CFF + 'No_Block', # 10D00..10E5F + 'Rumi Numeral Symbols', # 10E60..10E7F + 'No_Block', # 10E80..10FFF + 'Brahmi', # 11000..1107F + 'Kaithi', # 11080..110CF + 'Sora Sompeng', # 110D0..110FF + 'Chakma', # 11100..1114F + 'Mahajani', # 11150..1117F + 'Sharada', # 11180..111DF + 'Sinhala Archaic Numbers', # 111E0..111FF + 'Khojki', # 11200..1124F + 'No_Block', # 11250..1127F + 'Multani', # 11280..112AF + 'Khudawadi', # 112B0..112FF + 'Grantha', # 11300..1137F + 'No_Block', # 11380..113FF + 'Newa', # 11400..1147F + 'Tirhuta', # 11480..114DF + 'No_Block', # 114E0..1157F + 'Siddham', # 11580..115FF + 'Modi', # 11600..1165F + 'Mongolian Supplement', # 11660..1167F + 'Takri', # 11680..116CF + 'No_Block', # 116D0..116FF + 'Ahom', # 11700..1173F + 'No_Block', # 11740..1189F + 'Warang Citi', # 118A0..118FF + 'No_Block', # 11900..119FF + 'Zanabazar Square', # 11A00..11A4F + 'Soyombo', # 11A50..11AAF + 'No_Block', # 11AB0..11ABF + 'Pau Cin Hau', # 11AC0..11AFF + 'No_Block', # 11B00..11BFF + 'Bhaiksuki', # 11C00..11C6F + 'Marchen', # 11C70..11CBF + 'No_Block', # 11CC0..11CFF + 'Masaram Gondi', # 11D00..11D5F + 'No_Block', # 11D60..11FFF + 'Cuneiform', # 12000..123FF + 'Cuneiform Numbers and Punctuation', # 12400..1247F + 'Early Dynastic Cuneiform', # 12480..1254F + 'No_Block', # 12550..12FFF + 'Egyptian Hieroglyphs', # 13000..1342F + 'No_Block', # 13430..143FF + 'Anatolian Hieroglyphs', # 14400..1467F + 'No_Block', # 14680..167FF + 'Bamum Supplement', # 16800..16A3F + 'Mro', # 16A40..16A6F + 'No_Block', # 16A70..16ACF + 'Bassa Vah', # 16AD0..16AFF + 'Pahawh Hmong', # 16B00..16B8F + 'No_Block', # 16B90..16EFF + 'Miao', # 16F00..16F9F + 'No_Block', # 16FA0..16FDF + 'Ideographic Symbols and Punctuation', # 16FE0..16FFF + 'Tangut', # 17000..187FF + 'Tangut Components', # 18800..18AFF + 'No_Block', # 18B00..1AFFF + 'Kana Supplement', # 1B000..1B0FF + 'Kana Extended-A', # 1B100..1B12F + 'No_Block', # 1B130..1B16F + 'Nushu', # 1B170..1B2FF + 'No_Block', # 1B300..1BBFF + 'Duployan', # 1BC00..1BC9F + 'Shorthand Format Controls', # 1BCA0..1BCAF + 'No_Block', # 1BCB0..1CFFF + 'Byzantine Musical Symbols', # 1D000..1D0FF + 'Musical Symbols', # 1D100..1D1FF + 'Ancient Greek Musical Notation', # 1D200..1D24F + 'No_Block', # 1D250..1D2FF + 'Tai Xuan Jing Symbols', # 1D300..1D35F + 'Counting Rod Numerals', # 1D360..1D37F + 'No_Block', # 1D380..1D3FF + 'Mathematical Alphanumeric Symbols', # 1D400..1D7FF + 'Sutton SignWriting', # 1D800..1DAAF + 'No_Block', # 1DAB0..1DFFF + 'Glagolitic Supplement', # 1E000..1E02F + 'No_Block', # 1E030..1E7FF + 'Mende Kikakui', # 1E800..1E8DF + 'No_Block', # 1E8E0..1E8FF + 'Adlam', # 1E900..1E95F + 'No_Block', # 1E960..1EDFF + 'Arabic Mathematical Alphabetic Symbols', # 1EE00..1EEFF + 'No_Block', # 1EF00..1EFFF + 'Mahjong Tiles', # 1F000..1F02F + 'Domino Tiles', # 1F030..1F09F + 'Playing Cards', # 1F0A0..1F0FF + 'Enclosed Alphanumeric Supplement', # 1F100..1F1FF + 'Enclosed Ideographic Supplement', # 1F200..1F2FF + 'Miscellaneous Symbols and Pictographs', # 1F300..1F5FF + 'Emoticons', # 1F600..1F64F + 'Ornamental Dingbats', # 1F650..1F67F + 'Transport and Map Symbols', # 1F680..1F6FF + 'Alchemical Symbols', # 1F700..1F77F + 'Geometric Shapes Extended', # 1F780..1F7FF + 'Supplemental Arrows-C', # 1F800..1F8FF + 'Supplemental Symbols and Pictographs', # 1F900..1F9FF + 'No_Block', # 1FA00..1FFFF + 'CJK Unified Ideographs Extension B', # 20000..2A6DF + 'No_Block', # 2A6E0..2A6FF + 'CJK Unified Ideographs Extension C', # 2A700..2B73F + 'CJK Unified Ideographs Extension D', # 2B740..2B81F + 'CJK Unified Ideographs Extension E', # 2B820..2CEAF + 'CJK Unified Ideographs Extension F', # 2CEB0..2EBEF + 'No_Block', # 2EBF0..2F7FF + 'CJK Compatibility Ideographs Supplement', # 2F800..2FA1F + 'No_Block', # 2FA20..DFFFF + 'Tags', # E0000..E007F + 'No_Block', # E0080..E00FF + 'Variation Selectors Supplement', # E0100..E01EF + 'No_Block', # E01F0..EFFFF + 'Supplementary Private Use Area-A', # F0000..FFFFF + 'Supplementary Private Use Area-B', # 100000..10FFFF +] diff --git a/Lib/fontTools/unicodedata/ScriptExtensions.py b/Lib/fontTools/unicodedata/ScriptExtensions.py new file mode 100644 index 000000000..a92cc80c9 --- /dev/null +++ b/Lib/fontTools/unicodedata/ScriptExtensions.py @@ -0,0 +1,389 @@ +# -*- coding: utf-8 -*- +# +# NOTE: This file was auto-generated with MetaTools/buildUCD.py. +# Source: https://unicode.org/Public/UNIDATA/ScriptExtensions.txt +# License: http://unicode.org/copyright.html#License +# +# ScriptExtensions-10.0.0.txt +# Date: 2017-05-31, 01:07:00 GMT [RP] +# © 2017 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# The Script_Extensions property indicates which characters are commonly used +# with more than one script, but with a limited number of scripts. +# For each code point, there is one or more property values. Each such value is a Script property value. +# For more information, see: +# UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/ +# Especially the sections: +# http://www.unicode.org/reports/tr24/#Assignment_Script_Values +# http://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values +# +# Each Script_Extensions value in this file consists of a set +# of one or more abbreviated Script property values. The ordering of the +# values in that set is not material, but for stability in presentation +# it is given here as alphabetical. +# +# The Script_Extensions values are presented in sorted order in the file. +# They are sorted first by the number of Script property values in their sets, +# and then alphabetically by first differing Script property value. +# +# Following each distinct Script_Extensions value is the list of code +# points associated with that value, listed in code point order. +# +# All code points not explicitly listed for Script_Extensions +# have as their value the corresponding Script property value +# +# @missing: 0000..10FFFF;