Merge pull request #1106 from anthrotype/ucd-scripts
add fontTools.unicodedata, plus a script to fetch "Scripts.txt" from UCD
This commit is contained in:
commit
0be50c922c
677
Lib/fontTools/unicodedata/Blocks.py
Normal file
677
Lib/fontTools/unicodedata/Blocks.py
Normal file
@ -0,0 +1,677 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# NOTE: This file was auto-generated with MetaTools/buildUCD.py.
|
||||||
|
# Source: https://unicode.org/Public/UNIDATA/Blocks.txt
|
||||||
|
# License: http://unicode.org/copyright.html#License
|
||||||
|
#
|
||||||
|
# Blocks-10.0.0.txt
|
||||||
|
# Date: 2017-04-12, 17:30:00 GMT [KW]
|
||||||
|
# © 2017 Unicode®, Inc.
|
||||||
|
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||||
|
#
|
||||||
|
# Unicode Character Database
|
||||||
|
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||||
|
#
|
||||||
|
# Format:
|
||||||
|
# Start Code..End Code; Block Name
|
||||||
|
|
||||||
|
|
||||||
|
RANGES = [
|
||||||
|
0x0000, # .. 0x007F ; Basic Latin
|
||||||
|
0x0080, # .. 0x00FF ; Latin-1 Supplement
|
||||||
|
0x0100, # .. 0x017F ; Latin Extended-A
|
||||||
|
0x0180, # .. 0x024F ; Latin Extended-B
|
||||||
|
0x0250, # .. 0x02AF ; IPA Extensions
|
||||||
|
0x02B0, # .. 0x02FF ; Spacing Modifier Letters
|
||||||
|
0x0300, # .. 0x036F ; Combining Diacritical Marks
|
||||||
|
0x0370, # .. 0x03FF ; Greek and Coptic
|
||||||
|
0x0400, # .. 0x04FF ; Cyrillic
|
||||||
|
0x0500, # .. 0x052F ; Cyrillic Supplement
|
||||||
|
0x0530, # .. 0x058F ; Armenian
|
||||||
|
0x0590, # .. 0x05FF ; Hebrew
|
||||||
|
0x0600, # .. 0x06FF ; Arabic
|
||||||
|
0x0700, # .. 0x074F ; Syriac
|
||||||
|
0x0750, # .. 0x077F ; Arabic Supplement
|
||||||
|
0x0780, # .. 0x07BF ; Thaana
|
||||||
|
0x07C0, # .. 0x07FF ; NKo
|
||||||
|
0x0800, # .. 0x083F ; Samaritan
|
||||||
|
0x0840, # .. 0x085F ; Mandaic
|
||||||
|
0x0860, # .. 0x086F ; Syriac Supplement
|
||||||
|
0x0870, # .. 0x089F ; No_Block
|
||||||
|
0x08A0, # .. 0x08FF ; Arabic Extended-A
|
||||||
|
0x0900, # .. 0x097F ; Devanagari
|
||||||
|
0x0980, # .. 0x09FF ; Bengali
|
||||||
|
0x0A00, # .. 0x0A7F ; Gurmukhi
|
||||||
|
0x0A80, # .. 0x0AFF ; Gujarati
|
||||||
|
0x0B00, # .. 0x0B7F ; Oriya
|
||||||
|
0x0B80, # .. 0x0BFF ; Tamil
|
||||||
|
0x0C00, # .. 0x0C7F ; Telugu
|
||||||
|
0x0C80, # .. 0x0CFF ; Kannada
|
||||||
|
0x0D00, # .. 0x0D7F ; Malayalam
|
||||||
|
0x0D80, # .. 0x0DFF ; Sinhala
|
||||||
|
0x0E00, # .. 0x0E7F ; Thai
|
||||||
|
0x0E80, # .. 0x0EFF ; Lao
|
||||||
|
0x0F00, # .. 0x0FFF ; Tibetan
|
||||||
|
0x1000, # .. 0x109F ; Myanmar
|
||||||
|
0x10A0, # .. 0x10FF ; Georgian
|
||||||
|
0x1100, # .. 0x11FF ; Hangul Jamo
|
||||||
|
0x1200, # .. 0x137F ; Ethiopic
|
||||||
|
0x1380, # .. 0x139F ; Ethiopic Supplement
|
||||||
|
0x13A0, # .. 0x13FF ; Cherokee
|
||||||
|
0x1400, # .. 0x167F ; Unified Canadian Aboriginal Syllabics
|
||||||
|
0x1680, # .. 0x169F ; Ogham
|
||||||
|
0x16A0, # .. 0x16FF ; Runic
|
||||||
|
0x1700, # .. 0x171F ; Tagalog
|
||||||
|
0x1720, # .. 0x173F ; Hanunoo
|
||||||
|
0x1740, # .. 0x175F ; Buhid
|
||||||
|
0x1760, # .. 0x177F ; Tagbanwa
|
||||||
|
0x1780, # .. 0x17FF ; Khmer
|
||||||
|
0x1800, # .. 0x18AF ; Mongolian
|
||||||
|
0x18B0, # .. 0x18FF ; Unified Canadian Aboriginal Syllabics Extended
|
||||||
|
0x1900, # .. 0x194F ; Limbu
|
||||||
|
0x1950, # .. 0x197F ; Tai Le
|
||||||
|
0x1980, # .. 0x19DF ; New Tai Lue
|
||||||
|
0x19E0, # .. 0x19FF ; Khmer Symbols
|
||||||
|
0x1A00, # .. 0x1A1F ; Buginese
|
||||||
|
0x1A20, # .. 0x1AAF ; Tai Tham
|
||||||
|
0x1AB0, # .. 0x1AFF ; Combining Diacritical Marks Extended
|
||||||
|
0x1B00, # .. 0x1B7F ; Balinese
|
||||||
|
0x1B80, # .. 0x1BBF ; Sundanese
|
||||||
|
0x1BC0, # .. 0x1BFF ; Batak
|
||||||
|
0x1C00, # .. 0x1C4F ; Lepcha
|
||||||
|
0x1C50, # .. 0x1C7F ; Ol Chiki
|
||||||
|
0x1C80, # .. 0x1C8F ; Cyrillic Extended-C
|
||||||
|
0x1C90, # .. 0x1CBF ; No_Block
|
||||||
|
0x1CC0, # .. 0x1CCF ; Sundanese Supplement
|
||||||
|
0x1CD0, # .. 0x1CFF ; Vedic Extensions
|
||||||
|
0x1D00, # .. 0x1D7F ; Phonetic Extensions
|
||||||
|
0x1D80, # .. 0x1DBF ; Phonetic Extensions Supplement
|
||||||
|
0x1DC0, # .. 0x1DFF ; Combining Diacritical Marks Supplement
|
||||||
|
0x1E00, # .. 0x1EFF ; Latin Extended Additional
|
||||||
|
0x1F00, # .. 0x1FFF ; Greek Extended
|
||||||
|
0x2000, # .. 0x206F ; General Punctuation
|
||||||
|
0x2070, # .. 0x209F ; Superscripts and Subscripts
|
||||||
|
0x20A0, # .. 0x20CF ; Currency Symbols
|
||||||
|
0x20D0, # .. 0x20FF ; Combining Diacritical Marks for Symbols
|
||||||
|
0x2100, # .. 0x214F ; Letterlike Symbols
|
||||||
|
0x2150, # .. 0x218F ; Number Forms
|
||||||
|
0x2190, # .. 0x21FF ; Arrows
|
||||||
|
0x2200, # .. 0x22FF ; Mathematical Operators
|
||||||
|
0x2300, # .. 0x23FF ; Miscellaneous Technical
|
||||||
|
0x2400, # .. 0x243F ; Control Pictures
|
||||||
|
0x2440, # .. 0x245F ; Optical Character Recognition
|
||||||
|
0x2460, # .. 0x24FF ; Enclosed Alphanumerics
|
||||||
|
0x2500, # .. 0x257F ; Box Drawing
|
||||||
|
0x2580, # .. 0x259F ; Block Elements
|
||||||
|
0x25A0, # .. 0x25FF ; Geometric Shapes
|
||||||
|
0x2600, # .. 0x26FF ; Miscellaneous Symbols
|
||||||
|
0x2700, # .. 0x27BF ; Dingbats
|
||||||
|
0x27C0, # .. 0x27EF ; Miscellaneous Mathematical Symbols-A
|
||||||
|
0x27F0, # .. 0x27FF ; Supplemental Arrows-A
|
||||||
|
0x2800, # .. 0x28FF ; Braille Patterns
|
||||||
|
0x2900, # .. 0x297F ; Supplemental Arrows-B
|
||||||
|
0x2980, # .. 0x29FF ; Miscellaneous Mathematical Symbols-B
|
||||||
|
0x2A00, # .. 0x2AFF ; Supplemental Mathematical Operators
|
||||||
|
0x2B00, # .. 0x2BFF ; Miscellaneous Symbols and Arrows
|
||||||
|
0x2C00, # .. 0x2C5F ; Glagolitic
|
||||||
|
0x2C60, # .. 0x2C7F ; Latin Extended-C
|
||||||
|
0x2C80, # .. 0x2CFF ; Coptic
|
||||||
|
0x2D00, # .. 0x2D2F ; Georgian Supplement
|
||||||
|
0x2D30, # .. 0x2D7F ; Tifinagh
|
||||||
|
0x2D80, # .. 0x2DDF ; Ethiopic Extended
|
||||||
|
0x2DE0, # .. 0x2DFF ; Cyrillic Extended-A
|
||||||
|
0x2E00, # .. 0x2E7F ; Supplemental Punctuation
|
||||||
|
0x2E80, # .. 0x2EFF ; CJK Radicals Supplement
|
||||||
|
0x2F00, # .. 0x2FDF ; Kangxi Radicals
|
||||||
|
0x2FE0, # .. 0x2FEF ; No_Block
|
||||||
|
0x2FF0, # .. 0x2FFF ; Ideographic Description Characters
|
||||||
|
0x3000, # .. 0x303F ; CJK Symbols and Punctuation
|
||||||
|
0x3040, # .. 0x309F ; Hiragana
|
||||||
|
0x30A0, # .. 0x30FF ; Katakana
|
||||||
|
0x3100, # .. 0x312F ; Bopomofo
|
||||||
|
0x3130, # .. 0x318F ; Hangul Compatibility Jamo
|
||||||
|
0x3190, # .. 0x319F ; Kanbun
|
||||||
|
0x31A0, # .. 0x31BF ; Bopomofo Extended
|
||||||
|
0x31C0, # .. 0x31EF ; CJK Strokes
|
||||||
|
0x31F0, # .. 0x31FF ; Katakana Phonetic Extensions
|
||||||
|
0x3200, # .. 0x32FF ; Enclosed CJK Letters and Months
|
||||||
|
0x3300, # .. 0x33FF ; CJK Compatibility
|
||||||
|
0x3400, # .. 0x4DBF ; CJK Unified Ideographs Extension A
|
||||||
|
0x4DC0, # .. 0x4DFF ; Yijing Hexagram Symbols
|
||||||
|
0x4E00, # .. 0x9FFF ; CJK Unified Ideographs
|
||||||
|
0xA000, # .. 0xA48F ; Yi Syllables
|
||||||
|
0xA490, # .. 0xA4CF ; Yi Radicals
|
||||||
|
0xA4D0, # .. 0xA4FF ; Lisu
|
||||||
|
0xA500, # .. 0xA63F ; Vai
|
||||||
|
0xA640, # .. 0xA69F ; Cyrillic Extended-B
|
||||||
|
0xA6A0, # .. 0xA6FF ; Bamum
|
||||||
|
0xA700, # .. 0xA71F ; Modifier Tone Letters
|
||||||
|
0xA720, # .. 0xA7FF ; Latin Extended-D
|
||||||
|
0xA800, # .. 0xA82F ; Syloti Nagri
|
||||||
|
0xA830, # .. 0xA83F ; Common Indic Number Forms
|
||||||
|
0xA840, # .. 0xA87F ; Phags-pa
|
||||||
|
0xA880, # .. 0xA8DF ; Saurashtra
|
||||||
|
0xA8E0, # .. 0xA8FF ; Devanagari Extended
|
||||||
|
0xA900, # .. 0xA92F ; Kayah Li
|
||||||
|
0xA930, # .. 0xA95F ; Rejang
|
||||||
|
0xA960, # .. 0xA97F ; Hangul Jamo Extended-A
|
||||||
|
0xA980, # .. 0xA9DF ; Javanese
|
||||||
|
0xA9E0, # .. 0xA9FF ; Myanmar Extended-B
|
||||||
|
0xAA00, # .. 0xAA5F ; Cham
|
||||||
|
0xAA60, # .. 0xAA7F ; Myanmar Extended-A
|
||||||
|
0xAA80, # .. 0xAADF ; Tai Viet
|
||||||
|
0xAAE0, # .. 0xAAFF ; Meetei Mayek Extensions
|
||||||
|
0xAB00, # .. 0xAB2F ; Ethiopic Extended-A
|
||||||
|
0xAB30, # .. 0xAB6F ; Latin Extended-E
|
||||||
|
0xAB70, # .. 0xABBF ; Cherokee Supplement
|
||||||
|
0xABC0, # .. 0xABFF ; Meetei Mayek
|
||||||
|
0xAC00, # .. 0xD7AF ; Hangul Syllables
|
||||||
|
0xD7B0, # .. 0xD7FF ; Hangul Jamo Extended-B
|
||||||
|
0xD800, # .. 0xDB7F ; High Surrogates
|
||||||
|
0xDB80, # .. 0xDBFF ; High Private Use Surrogates
|
||||||
|
0xDC00, # .. 0xDFFF ; Low Surrogates
|
||||||
|
0xE000, # .. 0xF8FF ; Private Use Area
|
||||||
|
0xF900, # .. 0xFAFF ; CJK Compatibility Ideographs
|
||||||
|
0xFB00, # .. 0xFB4F ; Alphabetic Presentation Forms
|
||||||
|
0xFB50, # .. 0xFDFF ; Arabic Presentation Forms-A
|
||||||
|
0xFE00, # .. 0xFE0F ; Variation Selectors
|
||||||
|
0xFE10, # .. 0xFE1F ; Vertical Forms
|
||||||
|
0xFE20, # .. 0xFE2F ; Combining Half Marks
|
||||||
|
0xFE30, # .. 0xFE4F ; CJK Compatibility Forms
|
||||||
|
0xFE50, # .. 0xFE6F ; Small Form Variants
|
||||||
|
0xFE70, # .. 0xFEFF ; Arabic Presentation Forms-B
|
||||||
|
0xFF00, # .. 0xFFEF ; Halfwidth and Fullwidth Forms
|
||||||
|
0xFFF0, # .. 0xFFFF ; Specials
|
||||||
|
0x10000, # .. 0x1007F ; Linear B Syllabary
|
||||||
|
0x10080, # .. 0x100FF ; Linear B Ideograms
|
||||||
|
0x10100, # .. 0x1013F ; Aegean Numbers
|
||||||
|
0x10140, # .. 0x1018F ; Ancient Greek Numbers
|
||||||
|
0x10190, # .. 0x101CF ; Ancient Symbols
|
||||||
|
0x101D0, # .. 0x101FF ; Phaistos Disc
|
||||||
|
0x10200, # .. 0x1027F ; No_Block
|
||||||
|
0x10280, # .. 0x1029F ; Lycian
|
||||||
|
0x102A0, # .. 0x102DF ; Carian
|
||||||
|
0x102E0, # .. 0x102FF ; Coptic Epact Numbers
|
||||||
|
0x10300, # .. 0x1032F ; Old Italic
|
||||||
|
0x10330, # .. 0x1034F ; Gothic
|
||||||
|
0x10350, # .. 0x1037F ; Old Permic
|
||||||
|
0x10380, # .. 0x1039F ; Ugaritic
|
||||||
|
0x103A0, # .. 0x103DF ; Old Persian
|
||||||
|
0x103E0, # .. 0x103FF ; No_Block
|
||||||
|
0x10400, # .. 0x1044F ; Deseret
|
||||||
|
0x10450, # .. 0x1047F ; Shavian
|
||||||
|
0x10480, # .. 0x104AF ; Osmanya
|
||||||
|
0x104B0, # .. 0x104FF ; Osage
|
||||||
|
0x10500, # .. 0x1052F ; Elbasan
|
||||||
|
0x10530, # .. 0x1056F ; Caucasian Albanian
|
||||||
|
0x10570, # .. 0x105FF ; No_Block
|
||||||
|
0x10600, # .. 0x1077F ; Linear A
|
||||||
|
0x10780, # .. 0x107FF ; No_Block
|
||||||
|
0x10800, # .. 0x1083F ; Cypriot Syllabary
|
||||||
|
0x10840, # .. 0x1085F ; Imperial Aramaic
|
||||||
|
0x10860, # .. 0x1087F ; Palmyrene
|
||||||
|
0x10880, # .. 0x108AF ; Nabataean
|
||||||
|
0x108B0, # .. 0x108DF ; No_Block
|
||||||
|
0x108E0, # .. 0x108FF ; Hatran
|
||||||
|
0x10900, # .. 0x1091F ; Phoenician
|
||||||
|
0x10920, # .. 0x1093F ; Lydian
|
||||||
|
0x10940, # .. 0x1097F ; No_Block
|
||||||
|
0x10980, # .. 0x1099F ; Meroitic Hieroglyphs
|
||||||
|
0x109A0, # .. 0x109FF ; Meroitic Cursive
|
||||||
|
0x10A00, # .. 0x10A5F ; Kharoshthi
|
||||||
|
0x10A60, # .. 0x10A7F ; Old South Arabian
|
||||||
|
0x10A80, # .. 0x10A9F ; Old North Arabian
|
||||||
|
0x10AA0, # .. 0x10ABF ; No_Block
|
||||||
|
0x10AC0, # .. 0x10AFF ; Manichaean
|
||||||
|
0x10B00, # .. 0x10B3F ; Avestan
|
||||||
|
0x10B40, # .. 0x10B5F ; Inscriptional Parthian
|
||||||
|
0x10B60, # .. 0x10B7F ; Inscriptional Pahlavi
|
||||||
|
0x10B80, # .. 0x10BAF ; Psalter Pahlavi
|
||||||
|
0x10BB0, # .. 0x10BFF ; No_Block
|
||||||
|
0x10C00, # .. 0x10C4F ; Old Turkic
|
||||||
|
0x10C50, # .. 0x10C7F ; No_Block
|
||||||
|
0x10C80, # .. 0x10CFF ; Old Hungarian
|
||||||
|
0x10D00, # .. 0x10E5F ; No_Block
|
||||||
|
0x10E60, # .. 0x10E7F ; Rumi Numeral Symbols
|
||||||
|
0x10E80, # .. 0x10FFF ; No_Block
|
||||||
|
0x11000, # .. 0x1107F ; Brahmi
|
||||||
|
0x11080, # .. 0x110CF ; Kaithi
|
||||||
|
0x110D0, # .. 0x110FF ; Sora Sompeng
|
||||||
|
0x11100, # .. 0x1114F ; Chakma
|
||||||
|
0x11150, # .. 0x1117F ; Mahajani
|
||||||
|
0x11180, # .. 0x111DF ; Sharada
|
||||||
|
0x111E0, # .. 0x111FF ; Sinhala Archaic Numbers
|
||||||
|
0x11200, # .. 0x1124F ; Khojki
|
||||||
|
0x11250, # .. 0x1127F ; No_Block
|
||||||
|
0x11280, # .. 0x112AF ; Multani
|
||||||
|
0x112B0, # .. 0x112FF ; Khudawadi
|
||||||
|
0x11300, # .. 0x1137F ; Grantha
|
||||||
|
0x11380, # .. 0x113FF ; No_Block
|
||||||
|
0x11400, # .. 0x1147F ; Newa
|
||||||
|
0x11480, # .. 0x114DF ; Tirhuta
|
||||||
|
0x114E0, # .. 0x1157F ; No_Block
|
||||||
|
0x11580, # .. 0x115FF ; Siddham
|
||||||
|
0x11600, # .. 0x1165F ; Modi
|
||||||
|
0x11660, # .. 0x1167F ; Mongolian Supplement
|
||||||
|
0x11680, # .. 0x116CF ; Takri
|
||||||
|
0x116D0, # .. 0x116FF ; No_Block
|
||||||
|
0x11700, # .. 0x1173F ; Ahom
|
||||||
|
0x11740, # .. 0x1189F ; No_Block
|
||||||
|
0x118A0, # .. 0x118FF ; Warang Citi
|
||||||
|
0x11900, # .. 0x119FF ; No_Block
|
||||||
|
0x11A00, # .. 0x11A4F ; Zanabazar Square
|
||||||
|
0x11A50, # .. 0x11AAF ; Soyombo
|
||||||
|
0x11AB0, # .. 0x11ABF ; No_Block
|
||||||
|
0x11AC0, # .. 0x11AFF ; Pau Cin Hau
|
||||||
|
0x11B00, # .. 0x11BFF ; No_Block
|
||||||
|
0x11C00, # .. 0x11C6F ; Bhaiksuki
|
||||||
|
0x11C70, # .. 0x11CBF ; Marchen
|
||||||
|
0x11CC0, # .. 0x11CFF ; No_Block
|
||||||
|
0x11D00, # .. 0x11D5F ; Masaram Gondi
|
||||||
|
0x11D60, # .. 0x11FFF ; No_Block
|
||||||
|
0x12000, # .. 0x123FF ; Cuneiform
|
||||||
|
0x12400, # .. 0x1247F ; Cuneiform Numbers and Punctuation
|
||||||
|
0x12480, # .. 0x1254F ; Early Dynastic Cuneiform
|
||||||
|
0x12550, # .. 0x12FFF ; No_Block
|
||||||
|
0x13000, # .. 0x1342F ; Egyptian Hieroglyphs
|
||||||
|
0x13430, # .. 0x143FF ; No_Block
|
||||||
|
0x14400, # .. 0x1467F ; Anatolian Hieroglyphs
|
||||||
|
0x14680, # .. 0x167FF ; No_Block
|
||||||
|
0x16800, # .. 0x16A3F ; Bamum Supplement
|
||||||
|
0x16A40, # .. 0x16A6F ; Mro
|
||||||
|
0x16A70, # .. 0x16ACF ; No_Block
|
||||||
|
0x16AD0, # .. 0x16AFF ; Bassa Vah
|
||||||
|
0x16B00, # .. 0x16B8F ; Pahawh Hmong
|
||||||
|
0x16B90, # .. 0x16EFF ; No_Block
|
||||||
|
0x16F00, # .. 0x16F9F ; Miao
|
||||||
|
0x16FA0, # .. 0x16FDF ; No_Block
|
||||||
|
0x16FE0, # .. 0x16FFF ; Ideographic Symbols and Punctuation
|
||||||
|
0x17000, # .. 0x187FF ; Tangut
|
||||||
|
0x18800, # .. 0x18AFF ; Tangut Components
|
||||||
|
0x18B00, # .. 0x1AFFF ; No_Block
|
||||||
|
0x1B000, # .. 0x1B0FF ; Kana Supplement
|
||||||
|
0x1B100, # .. 0x1B12F ; Kana Extended-A
|
||||||
|
0x1B130, # .. 0x1B16F ; No_Block
|
||||||
|
0x1B170, # .. 0x1B2FF ; Nushu
|
||||||
|
0x1B300, # .. 0x1BBFF ; No_Block
|
||||||
|
0x1BC00, # .. 0x1BC9F ; Duployan
|
||||||
|
0x1BCA0, # .. 0x1BCAF ; Shorthand Format Controls
|
||||||
|
0x1BCB0, # .. 0x1CFFF ; No_Block
|
||||||
|
0x1D000, # .. 0x1D0FF ; Byzantine Musical Symbols
|
||||||
|
0x1D100, # .. 0x1D1FF ; Musical Symbols
|
||||||
|
0x1D200, # .. 0x1D24F ; Ancient Greek Musical Notation
|
||||||
|
0x1D250, # .. 0x1D2FF ; No_Block
|
||||||
|
0x1D300, # .. 0x1D35F ; Tai Xuan Jing Symbols
|
||||||
|
0x1D360, # .. 0x1D37F ; Counting Rod Numerals
|
||||||
|
0x1D380, # .. 0x1D3FF ; No_Block
|
||||||
|
0x1D400, # .. 0x1D7FF ; Mathematical Alphanumeric Symbols
|
||||||
|
0x1D800, # .. 0x1DAAF ; Sutton SignWriting
|
||||||
|
0x1DAB0, # .. 0x1DFFF ; No_Block
|
||||||
|
0x1E000, # .. 0x1E02F ; Glagolitic Supplement
|
||||||
|
0x1E030, # .. 0x1E7FF ; No_Block
|
||||||
|
0x1E800, # .. 0x1E8DF ; Mende Kikakui
|
||||||
|
0x1E8E0, # .. 0x1E8FF ; No_Block
|
||||||
|
0x1E900, # .. 0x1E95F ; Adlam
|
||||||
|
0x1E960, # .. 0x1EDFF ; No_Block
|
||||||
|
0x1EE00, # .. 0x1EEFF ; Arabic Mathematical Alphabetic Symbols
|
||||||
|
0x1EF00, # .. 0x1EFFF ; No_Block
|
||||||
|
0x1F000, # .. 0x1F02F ; Mahjong Tiles
|
||||||
|
0x1F030, # .. 0x1F09F ; Domino Tiles
|
||||||
|
0x1F0A0, # .. 0x1F0FF ; Playing Cards
|
||||||
|
0x1F100, # .. 0x1F1FF ; Enclosed Alphanumeric Supplement
|
||||||
|
0x1F200, # .. 0x1F2FF ; Enclosed Ideographic Supplement
|
||||||
|
0x1F300, # .. 0x1F5FF ; Miscellaneous Symbols and Pictographs
|
||||||
|
0x1F600, # .. 0x1F64F ; Emoticons
|
||||||
|
0x1F650, # .. 0x1F67F ; Ornamental Dingbats
|
||||||
|
0x1F680, # .. 0x1F6FF ; Transport and Map Symbols
|
||||||
|
0x1F700, # .. 0x1F77F ; Alchemical Symbols
|
||||||
|
0x1F780, # .. 0x1F7FF ; Geometric Shapes Extended
|
||||||
|
0x1F800, # .. 0x1F8FF ; Supplemental Arrows-C
|
||||||
|
0x1F900, # .. 0x1F9FF ; Supplemental Symbols and Pictographs
|
||||||
|
0x1FA00, # .. 0x1FFFF ; No_Block
|
||||||
|
0x20000, # .. 0x2A6DF ; CJK Unified Ideographs Extension B
|
||||||
|
0x2A6E0, # .. 0x2A6FF ; No_Block
|
||||||
|
0x2A700, # .. 0x2B73F ; CJK Unified Ideographs Extension C
|
||||||
|
0x2B740, # .. 0x2B81F ; CJK Unified Ideographs Extension D
|
||||||
|
0x2B820, # .. 0x2CEAF ; CJK Unified Ideographs Extension E
|
||||||
|
0x2CEB0, # .. 0x2EBEF ; CJK Unified Ideographs Extension F
|
||||||
|
0x2EBF0, # .. 0x2F7FF ; No_Block
|
||||||
|
0x2F800, # .. 0x2FA1F ; CJK Compatibility Ideographs Supplement
|
||||||
|
0x2FA20, # .. 0xDFFFF ; No_Block
|
||||||
|
0xE0000, # .. 0xE007F ; Tags
|
||||||
|
0xE0080, # .. 0xE00FF ; No_Block
|
||||||
|
0xE0100, # .. 0xE01EF ; Variation Selectors Supplement
|
||||||
|
0xE01F0, # .. 0xEFFFF ; No_Block
|
||||||
|
0xF0000, # .. 0xFFFFF ; Supplementary Private Use Area-A
|
||||||
|
0x100000, # .. 0x10FFFF ; Supplementary Private Use Area-B
|
||||||
|
]
|
||||||
|
|
||||||
|
VALUES = [
|
||||||
|
'Basic Latin', # 0000..007F
|
||||||
|
'Latin-1 Supplement', # 0080..00FF
|
||||||
|
'Latin Extended-A', # 0100..017F
|
||||||
|
'Latin Extended-B', # 0180..024F
|
||||||
|
'IPA Extensions', # 0250..02AF
|
||||||
|
'Spacing Modifier Letters', # 02B0..02FF
|
||||||
|
'Combining Diacritical Marks', # 0300..036F
|
||||||
|
'Greek and Coptic', # 0370..03FF
|
||||||
|
'Cyrillic', # 0400..04FF
|
||||||
|
'Cyrillic Supplement', # 0500..052F
|
||||||
|
'Armenian', # 0530..058F
|
||||||
|
'Hebrew', # 0590..05FF
|
||||||
|
'Arabic', # 0600..06FF
|
||||||
|
'Syriac', # 0700..074F
|
||||||
|
'Arabic Supplement', # 0750..077F
|
||||||
|
'Thaana', # 0780..07BF
|
||||||
|
'NKo', # 07C0..07FF
|
||||||
|
'Samaritan', # 0800..083F
|
||||||
|
'Mandaic', # 0840..085F
|
||||||
|
'Syriac Supplement', # 0860..086F
|
||||||
|
'No_Block', # 0870..089F
|
||||||
|
'Arabic Extended-A', # 08A0..08FF
|
||||||
|
'Devanagari', # 0900..097F
|
||||||
|
'Bengali', # 0980..09FF
|
||||||
|
'Gurmukhi', # 0A00..0A7F
|
||||||
|
'Gujarati', # 0A80..0AFF
|
||||||
|
'Oriya', # 0B00..0B7F
|
||||||
|
'Tamil', # 0B80..0BFF
|
||||||
|
'Telugu', # 0C00..0C7F
|
||||||
|
'Kannada', # 0C80..0CFF
|
||||||
|
'Malayalam', # 0D00..0D7F
|
||||||
|
'Sinhala', # 0D80..0DFF
|
||||||
|
'Thai', # 0E00..0E7F
|
||||||
|
'Lao', # 0E80..0EFF
|
||||||
|
'Tibetan', # 0F00..0FFF
|
||||||
|
'Myanmar', # 1000..109F
|
||||||
|
'Georgian', # 10A0..10FF
|
||||||
|
'Hangul Jamo', # 1100..11FF
|
||||||
|
'Ethiopic', # 1200..137F
|
||||||
|
'Ethiopic Supplement', # 1380..139F
|
||||||
|
'Cherokee', # 13A0..13FF
|
||||||
|
'Unified Canadian Aboriginal Syllabics', # 1400..167F
|
||||||
|
'Ogham', # 1680..169F
|
||||||
|
'Runic', # 16A0..16FF
|
||||||
|
'Tagalog', # 1700..171F
|
||||||
|
'Hanunoo', # 1720..173F
|
||||||
|
'Buhid', # 1740..175F
|
||||||
|
'Tagbanwa', # 1760..177F
|
||||||
|
'Khmer', # 1780..17FF
|
||||||
|
'Mongolian', # 1800..18AF
|
||||||
|
'Unified Canadian Aboriginal Syllabics Extended', # 18B0..18FF
|
||||||
|
'Limbu', # 1900..194F
|
||||||
|
'Tai Le', # 1950..197F
|
||||||
|
'New Tai Lue', # 1980..19DF
|
||||||
|
'Khmer Symbols', # 19E0..19FF
|
||||||
|
'Buginese', # 1A00..1A1F
|
||||||
|
'Tai Tham', # 1A20..1AAF
|
||||||
|
'Combining Diacritical Marks Extended', # 1AB0..1AFF
|
||||||
|
'Balinese', # 1B00..1B7F
|
||||||
|
'Sundanese', # 1B80..1BBF
|
||||||
|
'Batak', # 1BC0..1BFF
|
||||||
|
'Lepcha', # 1C00..1C4F
|
||||||
|
'Ol Chiki', # 1C50..1C7F
|
||||||
|
'Cyrillic Extended-C', # 1C80..1C8F
|
||||||
|
'No_Block', # 1C90..1CBF
|
||||||
|
'Sundanese Supplement', # 1CC0..1CCF
|
||||||
|
'Vedic Extensions', # 1CD0..1CFF
|
||||||
|
'Phonetic Extensions', # 1D00..1D7F
|
||||||
|
'Phonetic Extensions Supplement', # 1D80..1DBF
|
||||||
|
'Combining Diacritical Marks Supplement', # 1DC0..1DFF
|
||||||
|
'Latin Extended Additional', # 1E00..1EFF
|
||||||
|
'Greek Extended', # 1F00..1FFF
|
||||||
|
'General Punctuation', # 2000..206F
|
||||||
|
'Superscripts and Subscripts', # 2070..209F
|
||||||
|
'Currency Symbols', # 20A0..20CF
|
||||||
|
'Combining Diacritical Marks for Symbols', # 20D0..20FF
|
||||||
|
'Letterlike Symbols', # 2100..214F
|
||||||
|
'Number Forms', # 2150..218F
|
||||||
|
'Arrows', # 2190..21FF
|
||||||
|
'Mathematical Operators', # 2200..22FF
|
||||||
|
'Miscellaneous Technical', # 2300..23FF
|
||||||
|
'Control Pictures', # 2400..243F
|
||||||
|
'Optical Character Recognition', # 2440..245F
|
||||||
|
'Enclosed Alphanumerics', # 2460..24FF
|
||||||
|
'Box Drawing', # 2500..257F
|
||||||
|
'Block Elements', # 2580..259F
|
||||||
|
'Geometric Shapes', # 25A0..25FF
|
||||||
|
'Miscellaneous Symbols', # 2600..26FF
|
||||||
|
'Dingbats', # 2700..27BF
|
||||||
|
'Miscellaneous Mathematical Symbols-A', # 27C0..27EF
|
||||||
|
'Supplemental Arrows-A', # 27F0..27FF
|
||||||
|
'Braille Patterns', # 2800..28FF
|
||||||
|
'Supplemental Arrows-B', # 2900..297F
|
||||||
|
'Miscellaneous Mathematical Symbols-B', # 2980..29FF
|
||||||
|
'Supplemental Mathematical Operators', # 2A00..2AFF
|
||||||
|
'Miscellaneous Symbols and Arrows', # 2B00..2BFF
|
||||||
|
'Glagolitic', # 2C00..2C5F
|
||||||
|
'Latin Extended-C', # 2C60..2C7F
|
||||||
|
'Coptic', # 2C80..2CFF
|
||||||
|
'Georgian Supplement', # 2D00..2D2F
|
||||||
|
'Tifinagh', # 2D30..2D7F
|
||||||
|
'Ethiopic Extended', # 2D80..2DDF
|
||||||
|
'Cyrillic Extended-A', # 2DE0..2DFF
|
||||||
|
'Supplemental Punctuation', # 2E00..2E7F
|
||||||
|
'CJK Radicals Supplement', # 2E80..2EFF
|
||||||
|
'Kangxi Radicals', # 2F00..2FDF
|
||||||
|
'No_Block', # 2FE0..2FEF
|
||||||
|
'Ideographic Description Characters', # 2FF0..2FFF
|
||||||
|
'CJK Symbols and Punctuation', # 3000..303F
|
||||||
|
'Hiragana', # 3040..309F
|
||||||
|
'Katakana', # 30A0..30FF
|
||||||
|
'Bopomofo', # 3100..312F
|
||||||
|
'Hangul Compatibility Jamo', # 3130..318F
|
||||||
|
'Kanbun', # 3190..319F
|
||||||
|
'Bopomofo Extended', # 31A0..31BF
|
||||||
|
'CJK Strokes', # 31C0..31EF
|
||||||
|
'Katakana Phonetic Extensions', # 31F0..31FF
|
||||||
|
'Enclosed CJK Letters and Months', # 3200..32FF
|
||||||
|
'CJK Compatibility', # 3300..33FF
|
||||||
|
'CJK Unified Ideographs Extension A', # 3400..4DBF
|
||||||
|
'Yijing Hexagram Symbols', # 4DC0..4DFF
|
||||||
|
'CJK Unified Ideographs', # 4E00..9FFF
|
||||||
|
'Yi Syllables', # A000..A48F
|
||||||
|
'Yi Radicals', # A490..A4CF
|
||||||
|
'Lisu', # A4D0..A4FF
|
||||||
|
'Vai', # A500..A63F
|
||||||
|
'Cyrillic Extended-B', # A640..A69F
|
||||||
|
'Bamum', # A6A0..A6FF
|
||||||
|
'Modifier Tone Letters', # A700..A71F
|
||||||
|
'Latin Extended-D', # A720..A7FF
|
||||||
|
'Syloti Nagri', # A800..A82F
|
||||||
|
'Common Indic Number Forms', # A830..A83F
|
||||||
|
'Phags-pa', # A840..A87F
|
||||||
|
'Saurashtra', # A880..A8DF
|
||||||
|
'Devanagari Extended', # A8E0..A8FF
|
||||||
|
'Kayah Li', # A900..A92F
|
||||||
|
'Rejang', # A930..A95F
|
||||||
|
'Hangul Jamo Extended-A', # A960..A97F
|
||||||
|
'Javanese', # A980..A9DF
|
||||||
|
'Myanmar Extended-B', # A9E0..A9FF
|
||||||
|
'Cham', # AA00..AA5F
|
||||||
|
'Myanmar Extended-A', # AA60..AA7F
|
||||||
|
'Tai Viet', # AA80..AADF
|
||||||
|
'Meetei Mayek Extensions', # AAE0..AAFF
|
||||||
|
'Ethiopic Extended-A', # AB00..AB2F
|
||||||
|
'Latin Extended-E', # AB30..AB6F
|
||||||
|
'Cherokee Supplement', # AB70..ABBF
|
||||||
|
'Meetei Mayek', # ABC0..ABFF
|
||||||
|
'Hangul Syllables', # AC00..D7AF
|
||||||
|
'Hangul Jamo Extended-B', # D7B0..D7FF
|
||||||
|
'High Surrogates', # D800..DB7F
|
||||||
|
'High Private Use Surrogates', # DB80..DBFF
|
||||||
|
'Low Surrogates', # DC00..DFFF
|
||||||
|
'Private Use Area', # E000..F8FF
|
||||||
|
'CJK Compatibility Ideographs', # F900..FAFF
|
||||||
|
'Alphabetic Presentation Forms', # FB00..FB4F
|
||||||
|
'Arabic Presentation Forms-A', # FB50..FDFF
|
||||||
|
'Variation Selectors', # FE00..FE0F
|
||||||
|
'Vertical Forms', # FE10..FE1F
|
||||||
|
'Combining Half Marks', # FE20..FE2F
|
||||||
|
'CJK Compatibility Forms', # FE30..FE4F
|
||||||
|
'Small Form Variants', # FE50..FE6F
|
||||||
|
'Arabic Presentation Forms-B', # FE70..FEFF
|
||||||
|
'Halfwidth and Fullwidth Forms', # FF00..FFEF
|
||||||
|
'Specials', # FFF0..FFFF
|
||||||
|
'Linear B Syllabary', # 10000..1007F
|
||||||
|
'Linear B Ideograms', # 10080..100FF
|
||||||
|
'Aegean Numbers', # 10100..1013F
|
||||||
|
'Ancient Greek Numbers', # 10140..1018F
|
||||||
|
'Ancient Symbols', # 10190..101CF
|
||||||
|
'Phaistos Disc', # 101D0..101FF
|
||||||
|
'No_Block', # 10200..1027F
|
||||||
|
'Lycian', # 10280..1029F
|
||||||
|
'Carian', # 102A0..102DF
|
||||||
|
'Coptic Epact Numbers', # 102E0..102FF
|
||||||
|
'Old Italic', # 10300..1032F
|
||||||
|
'Gothic', # 10330..1034F
|
||||||
|
'Old Permic', # 10350..1037F
|
||||||
|
'Ugaritic', # 10380..1039F
|
||||||
|
'Old Persian', # 103A0..103DF
|
||||||
|
'No_Block', # 103E0..103FF
|
||||||
|
'Deseret', # 10400..1044F
|
||||||
|
'Shavian', # 10450..1047F
|
||||||
|
'Osmanya', # 10480..104AF
|
||||||
|
'Osage', # 104B0..104FF
|
||||||
|
'Elbasan', # 10500..1052F
|
||||||
|
'Caucasian Albanian', # 10530..1056F
|
||||||
|
'No_Block', # 10570..105FF
|
||||||
|
'Linear A', # 10600..1077F
|
||||||
|
'No_Block', # 10780..107FF
|
||||||
|
'Cypriot Syllabary', # 10800..1083F
|
||||||
|
'Imperial Aramaic', # 10840..1085F
|
||||||
|
'Palmyrene', # 10860..1087F
|
||||||
|
'Nabataean', # 10880..108AF
|
||||||
|
'No_Block', # 108B0..108DF
|
||||||
|
'Hatran', # 108E0..108FF
|
||||||
|
'Phoenician', # 10900..1091F
|
||||||
|
'Lydian', # 10920..1093F
|
||||||
|
'No_Block', # 10940..1097F
|
||||||
|
'Meroitic Hieroglyphs', # 10980..1099F
|
||||||
|
'Meroitic Cursive', # 109A0..109FF
|
||||||
|
'Kharoshthi', # 10A00..10A5F
|
||||||
|
'Old South Arabian', # 10A60..10A7F
|
||||||
|
'Old North Arabian', # 10A80..10A9F
|
||||||
|
'No_Block', # 10AA0..10ABF
|
||||||
|
'Manichaean', # 10AC0..10AFF
|
||||||
|
'Avestan', # 10B00..10B3F
|
||||||
|
'Inscriptional Parthian', # 10B40..10B5F
|
||||||
|
'Inscriptional Pahlavi', # 10B60..10B7F
|
||||||
|
'Psalter Pahlavi', # 10B80..10BAF
|
||||||
|
'No_Block', # 10BB0..10BFF
|
||||||
|
'Old Turkic', # 10C00..10C4F
|
||||||
|
'No_Block', # 10C50..10C7F
|
||||||
|
'Old Hungarian', # 10C80..10CFF
|
||||||
|
'No_Block', # 10D00..10E5F
|
||||||
|
'Rumi Numeral Symbols', # 10E60..10E7F
|
||||||
|
'No_Block', # 10E80..10FFF
|
||||||
|
'Brahmi', # 11000..1107F
|
||||||
|
'Kaithi', # 11080..110CF
|
||||||
|
'Sora Sompeng', # 110D0..110FF
|
||||||
|
'Chakma', # 11100..1114F
|
||||||
|
'Mahajani', # 11150..1117F
|
||||||
|
'Sharada', # 11180..111DF
|
||||||
|
'Sinhala Archaic Numbers', # 111E0..111FF
|
||||||
|
'Khojki', # 11200..1124F
|
||||||
|
'No_Block', # 11250..1127F
|
||||||
|
'Multani', # 11280..112AF
|
||||||
|
'Khudawadi', # 112B0..112FF
|
||||||
|
'Grantha', # 11300..1137F
|
||||||
|
'No_Block', # 11380..113FF
|
||||||
|
'Newa', # 11400..1147F
|
||||||
|
'Tirhuta', # 11480..114DF
|
||||||
|
'No_Block', # 114E0..1157F
|
||||||
|
'Siddham', # 11580..115FF
|
||||||
|
'Modi', # 11600..1165F
|
||||||
|
'Mongolian Supplement', # 11660..1167F
|
||||||
|
'Takri', # 11680..116CF
|
||||||
|
'No_Block', # 116D0..116FF
|
||||||
|
'Ahom', # 11700..1173F
|
||||||
|
'No_Block', # 11740..1189F
|
||||||
|
'Warang Citi', # 118A0..118FF
|
||||||
|
'No_Block', # 11900..119FF
|
||||||
|
'Zanabazar Square', # 11A00..11A4F
|
||||||
|
'Soyombo', # 11A50..11AAF
|
||||||
|
'No_Block', # 11AB0..11ABF
|
||||||
|
'Pau Cin Hau', # 11AC0..11AFF
|
||||||
|
'No_Block', # 11B00..11BFF
|
||||||
|
'Bhaiksuki', # 11C00..11C6F
|
||||||
|
'Marchen', # 11C70..11CBF
|
||||||
|
'No_Block', # 11CC0..11CFF
|
||||||
|
'Masaram Gondi', # 11D00..11D5F
|
||||||
|
'No_Block', # 11D60..11FFF
|
||||||
|
'Cuneiform', # 12000..123FF
|
||||||
|
'Cuneiform Numbers and Punctuation', # 12400..1247F
|
||||||
|
'Early Dynastic Cuneiform', # 12480..1254F
|
||||||
|
'No_Block', # 12550..12FFF
|
||||||
|
'Egyptian Hieroglyphs', # 13000..1342F
|
||||||
|
'No_Block', # 13430..143FF
|
||||||
|
'Anatolian Hieroglyphs', # 14400..1467F
|
||||||
|
'No_Block', # 14680..167FF
|
||||||
|
'Bamum Supplement', # 16800..16A3F
|
||||||
|
'Mro', # 16A40..16A6F
|
||||||
|
'No_Block', # 16A70..16ACF
|
||||||
|
'Bassa Vah', # 16AD0..16AFF
|
||||||
|
'Pahawh Hmong', # 16B00..16B8F
|
||||||
|
'No_Block', # 16B90..16EFF
|
||||||
|
'Miao', # 16F00..16F9F
|
||||||
|
'No_Block', # 16FA0..16FDF
|
||||||
|
'Ideographic Symbols and Punctuation', # 16FE0..16FFF
|
||||||
|
'Tangut', # 17000..187FF
|
||||||
|
'Tangut Components', # 18800..18AFF
|
||||||
|
'No_Block', # 18B00..1AFFF
|
||||||
|
'Kana Supplement', # 1B000..1B0FF
|
||||||
|
'Kana Extended-A', # 1B100..1B12F
|
||||||
|
'No_Block', # 1B130..1B16F
|
||||||
|
'Nushu', # 1B170..1B2FF
|
||||||
|
'No_Block', # 1B300..1BBFF
|
||||||
|
'Duployan', # 1BC00..1BC9F
|
||||||
|
'Shorthand Format Controls', # 1BCA0..1BCAF
|
||||||
|
'No_Block', # 1BCB0..1CFFF
|
||||||
|
'Byzantine Musical Symbols', # 1D000..1D0FF
|
||||||
|
'Musical Symbols', # 1D100..1D1FF
|
||||||
|
'Ancient Greek Musical Notation', # 1D200..1D24F
|
||||||
|
'No_Block', # 1D250..1D2FF
|
||||||
|
'Tai Xuan Jing Symbols', # 1D300..1D35F
|
||||||
|
'Counting Rod Numerals', # 1D360..1D37F
|
||||||
|
'No_Block', # 1D380..1D3FF
|
||||||
|
'Mathematical Alphanumeric Symbols', # 1D400..1D7FF
|
||||||
|
'Sutton SignWriting', # 1D800..1DAAF
|
||||||
|
'No_Block', # 1DAB0..1DFFF
|
||||||
|
'Glagolitic Supplement', # 1E000..1E02F
|
||||||
|
'No_Block', # 1E030..1E7FF
|
||||||
|
'Mende Kikakui', # 1E800..1E8DF
|
||||||
|
'No_Block', # 1E8E0..1E8FF
|
||||||
|
'Adlam', # 1E900..1E95F
|
||||||
|
'No_Block', # 1E960..1EDFF
|
||||||
|
'Arabic Mathematical Alphabetic Symbols', # 1EE00..1EEFF
|
||||||
|
'No_Block', # 1EF00..1EFFF
|
||||||
|
'Mahjong Tiles', # 1F000..1F02F
|
||||||
|
'Domino Tiles', # 1F030..1F09F
|
||||||
|
'Playing Cards', # 1F0A0..1F0FF
|
||||||
|
'Enclosed Alphanumeric Supplement', # 1F100..1F1FF
|
||||||
|
'Enclosed Ideographic Supplement', # 1F200..1F2FF
|
||||||
|
'Miscellaneous Symbols and Pictographs', # 1F300..1F5FF
|
||||||
|
'Emoticons', # 1F600..1F64F
|
||||||
|
'Ornamental Dingbats', # 1F650..1F67F
|
||||||
|
'Transport and Map Symbols', # 1F680..1F6FF
|
||||||
|
'Alchemical Symbols', # 1F700..1F77F
|
||||||
|
'Geometric Shapes Extended', # 1F780..1F7FF
|
||||||
|
'Supplemental Arrows-C', # 1F800..1F8FF
|
||||||
|
'Supplemental Symbols and Pictographs', # 1F900..1F9FF
|
||||||
|
'No_Block', # 1FA00..1FFFF
|
||||||
|
'CJK Unified Ideographs Extension B', # 20000..2A6DF
|
||||||
|
'No_Block', # 2A6E0..2A6FF
|
||||||
|
'CJK Unified Ideographs Extension C', # 2A700..2B73F
|
||||||
|
'CJK Unified Ideographs Extension D', # 2B740..2B81F
|
||||||
|
'CJK Unified Ideographs Extension E', # 2B820..2CEAF
|
||||||
|
'CJK Unified Ideographs Extension F', # 2CEB0..2EBEF
|
||||||
|
'No_Block', # 2EBF0..2F7FF
|
||||||
|
'CJK Compatibility Ideographs Supplement', # 2F800..2FA1F
|
||||||
|
'No_Block', # 2FA20..DFFFF
|
||||||
|
'Tags', # E0000..E007F
|
||||||
|
'No_Block', # E0080..E00FF
|
||||||
|
'Variation Selectors Supplement', # E0100..E01EF
|
||||||
|
'No_Block', # E01F0..EFFFF
|
||||||
|
'Supplementary Private Use Area-A', # F0000..FFFFF
|
||||||
|
'Supplementary Private Use Area-B', # 100000..10FFFF
|
||||||
|
]
|
389
Lib/fontTools/unicodedata/ScriptExtensions.py
Normal file
389
Lib/fontTools/unicodedata/ScriptExtensions.py
Normal file
@ -0,0 +1,389 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# NOTE: This file was auto-generated with MetaTools/buildUCD.py.
|
||||||
|
# Source: https://unicode.org/Public/UNIDATA/ScriptExtensions.txt
|
||||||
|
# License: http://unicode.org/copyright.html#License
|
||||||
|
#
|
||||||
|
# ScriptExtensions-10.0.0.txt
|
||||||
|
# Date: 2017-05-31, 01:07:00 GMT [RP]
|
||||||
|
# © 2017 Unicode®, Inc.
|
||||||
|
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||||
|
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||||
|
#
|
||||||
|
# Unicode Character Database
|
||||||
|
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||||
|
#
|
||||||
|
# The Script_Extensions property indicates which characters are commonly used
|
||||||
|
# with more than one script, but with a limited number of scripts.
|
||||||
|
# For each code point, there is one or more property values. Each such value is a Script property value.
|
||||||
|
# For more information, see:
|
||||||
|
# UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/
|
||||||
|
# Especially the sections:
|
||||||
|
# http://www.unicode.org/reports/tr24/#Assignment_Script_Values
|
||||||
|
# http://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
|
||||||
|
#
|
||||||
|
# Each Script_Extensions value in this file consists of a set
|
||||||
|
# of one or more abbreviated Script property values. The ordering of the
|
||||||
|
# values in that set is not material, but for stability in presentation
|
||||||
|
# it is given here as alphabetical.
|
||||||
|
#
|
||||||
|
# The Script_Extensions values are presented in sorted order in the file.
|
||||||
|
# They are sorted first by the number of Script property values in their sets,
|
||||||
|
# and then alphabetically by first differing Script property value.
|
||||||
|
#
|
||||||
|
# Following each distinct Script_Extensions value is the list of code
|
||||||
|
# points associated with that value, listed in code point order.
|
||||||
|
#
|
||||||
|
# All code points not explicitly listed for Script_Extensions
|
||||||
|
# have as their value the corresponding Script property value
|
||||||
|
#
|
||||||
|
# @missing: 0000..10FFFF; <script>
|
||||||
|
|
||||||
|
|
||||||
|
RANGES = [
|
||||||
|
0x0000, # .. 0x0341 ; None
|
||||||
|
0x0342, # .. 0x0342 ; {'Grek'}
|
||||||
|
0x0343, # .. 0x0344 ; None
|
||||||
|
0x0345, # .. 0x0345 ; {'Grek'}
|
||||||
|
0x0346, # .. 0x0362 ; None
|
||||||
|
0x0363, # .. 0x036F ; {'Latn'}
|
||||||
|
0x0370, # .. 0x0482 ; None
|
||||||
|
0x0483, # .. 0x0483 ; {'Cyrl', 'Perm'}
|
||||||
|
0x0484, # .. 0x0484 ; {'Cyrl', 'Glag'}
|
||||||
|
0x0485, # .. 0x0486 ; {'Cyrl', 'Latn'}
|
||||||
|
0x0487, # .. 0x0487 ; {'Cyrl', 'Glag'}
|
||||||
|
0x0488, # .. 0x0588 ; None
|
||||||
|
0x0589, # .. 0x0589 ; {'Armn', 'Geor'}
|
||||||
|
0x058A, # .. 0x060B ; None
|
||||||
|
0x060C, # .. 0x060C ; {'Arab', 'Syrc', 'Thaa'}
|
||||||
|
0x060D, # .. 0x061A ; None
|
||||||
|
0x061B, # .. 0x061C ; {'Arab', 'Syrc', 'Thaa'}
|
||||||
|
0x061D, # .. 0x061E ; None
|
||||||
|
0x061F, # .. 0x061F ; {'Arab', 'Syrc', 'Thaa'}
|
||||||
|
0x0620, # .. 0x063F ; None
|
||||||
|
0x0640, # .. 0x0640 ; {'Adlm', 'Arab', 'Mand', 'Mani', 'Phlp', 'Syrc'}
|
||||||
|
0x0641, # .. 0x064A ; None
|
||||||
|
0x064B, # .. 0x0655 ; {'Arab', 'Syrc'}
|
||||||
|
0x0656, # .. 0x065F ; None
|
||||||
|
0x0660, # .. 0x0669 ; {'Arab', 'Thaa'}
|
||||||
|
0x066A, # .. 0x066F ; None
|
||||||
|
0x0670, # .. 0x0670 ; {'Arab', 'Syrc'}
|
||||||
|
0x0671, # .. 0x0950 ; None
|
||||||
|
0x0951, # .. 0x0951 ; {'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Latn', 'Mlym', 'Orya', 'Shrd', 'Taml', 'Telu'}
|
||||||
|
0x0952, # .. 0x0952 ; {'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Latn', 'Mlym', 'Orya', 'Taml', 'Telu'}
|
||||||
|
0x0953, # .. 0x0963 ; None
|
||||||
|
0x0964, # .. 0x0964 ; {'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Mahj', 'Mlym', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml', 'Telu', 'Tirh'}
|
||||||
|
0x0965, # .. 0x0965 ; {'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Limb', 'Mahj', 'Mlym', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml', 'Telu', 'Tirh'}
|
||||||
|
0x0966, # .. 0x096F ; {'Deva', 'Kthi', 'Mahj'}
|
||||||
|
0x0970, # .. 0x09E5 ; None
|
||||||
|
0x09E6, # .. 0x09EF ; {'Beng', 'Cakm', 'Sylo'}
|
||||||
|
0x09F0, # .. 0x0A65 ; None
|
||||||
|
0x0A66, # .. 0x0A6F ; {'Guru', 'Mult'}
|
||||||
|
0x0A70, # .. 0x0AE5 ; None
|
||||||
|
0x0AE6, # .. 0x0AEF ; {'Gujr', 'Khoj'}
|
||||||
|
0x0AF0, # .. 0x0BA9 ; None
|
||||||
|
0x0BAA, # .. 0x0BAA ; {'Gran', 'Taml'}
|
||||||
|
0x0BAB, # .. 0x0BB4 ; None
|
||||||
|
0x0BB5, # .. 0x0BB5 ; {'Gran', 'Taml'}
|
||||||
|
0x0BB6, # .. 0x0BE5 ; None
|
||||||
|
0x0BE6, # .. 0x0BF2 ; {'Gran', 'Taml'}
|
||||||
|
0x0BF3, # .. 0x103F ; None
|
||||||
|
0x1040, # .. 0x1049 ; {'Cakm', 'Mymr', 'Tale'}
|
||||||
|
0x104A, # .. 0x10FA ; None
|
||||||
|
0x10FB, # .. 0x10FB ; {'Geor', 'Latn'}
|
||||||
|
0x10FC, # .. 0x1734 ; None
|
||||||
|
0x1735, # .. 0x1736 ; {'Buhd', 'Hano', 'Tagb', 'Tglg'}
|
||||||
|
0x1737, # .. 0x1801 ; None
|
||||||
|
0x1802, # .. 0x1803 ; {'Mong', 'Phag'}
|
||||||
|
0x1804, # .. 0x1804 ; None
|
||||||
|
0x1805, # .. 0x1805 ; {'Mong', 'Phag'}
|
||||||
|
0x1806, # .. 0x1CCF ; None
|
||||||
|
0x1CD0, # .. 0x1CD0 ; {'Deva', 'Gran'}
|
||||||
|
0x1CD1, # .. 0x1CD1 ; {'Deva'}
|
||||||
|
0x1CD2, # .. 0x1CD3 ; {'Deva', 'Gran'}
|
||||||
|
0x1CD4, # .. 0x1CD6 ; {'Deva'}
|
||||||
|
0x1CD7, # .. 0x1CD7 ; {'Deva', 'Shrd'}
|
||||||
|
0x1CD8, # .. 0x1CD8 ; {'Deva'}
|
||||||
|
0x1CD9, # .. 0x1CD9 ; {'Deva', 'Shrd'}
|
||||||
|
0x1CDA, # .. 0x1CDA ; {'Deva', 'Knda', 'Mlym', 'Taml', 'Telu'}
|
||||||
|
0x1CDB, # .. 0x1CDB ; {'Deva'}
|
||||||
|
0x1CDC, # .. 0x1CDD ; {'Deva', 'Shrd'}
|
||||||
|
0x1CDE, # .. 0x1CDF ; {'Deva'}
|
||||||
|
0x1CE0, # .. 0x1CE0 ; {'Deva', 'Shrd'}
|
||||||
|
0x1CE1, # .. 0x1CF1 ; {'Deva'}
|
||||||
|
0x1CF2, # .. 0x1CF4 ; {'Deva', 'Gran'}
|
||||||
|
0x1CF5, # .. 0x1CF5 ; {'Deva', 'Knda'}
|
||||||
|
0x1CF6, # .. 0x1CF6 ; {'Deva'}
|
||||||
|
0x1CF7, # .. 0x1CF7 ; {'Beng'}
|
||||||
|
0x1CF8, # .. 0x1CF9 ; {'Deva', 'Gran'}
|
||||||
|
0x1CFA, # .. 0x1DBF ; None
|
||||||
|
0x1DC0, # .. 0x1DC1 ; {'Grek'}
|
||||||
|
0x1DC2, # .. 0x20EF ; None
|
||||||
|
0x20F0, # .. 0x20F0 ; {'Deva', 'Gran', 'Latn'}
|
||||||
|
0x20F1, # .. 0x2E42 ; None
|
||||||
|
0x2E43, # .. 0x2E43 ; {'Cyrl', 'Glag'}
|
||||||
|
0x2E44, # .. 0x3000 ; None
|
||||||
|
0x3001, # .. 0x3002 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
|
||||||
|
0x3003, # .. 0x3003 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||||
|
0x3004, # .. 0x3005 ; None
|
||||||
|
0x3006, # .. 0x3006 ; {'Hani'}
|
||||||
|
0x3007, # .. 0x3007 ; None
|
||||||
|
0x3008, # .. 0x3011 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
|
||||||
|
0x3012, # .. 0x3012 ; None
|
||||||
|
0x3013, # .. 0x3013 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||||
|
0x3014, # .. 0x301B ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
|
||||||
|
0x301C, # .. 0x301F ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||||
|
0x3020, # .. 0x3029 ; None
|
||||||
|
0x302A, # .. 0x302D ; {'Bopo', 'Hani'}
|
||||||
|
0x302E, # .. 0x302F ; None
|
||||||
|
0x3030, # .. 0x3030 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||||
|
0x3031, # .. 0x3035 ; {'Hira', 'Kana'}
|
||||||
|
0x3036, # .. 0x3036 ; None
|
||||||
|
0x3037, # .. 0x3037 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||||
|
0x3038, # .. 0x303B ; None
|
||||||
|
0x303C, # .. 0x303D ; {'Hani', 'Hira', 'Kana'}
|
||||||
|
0x303E, # .. 0x303F ; {'Hani'}
|
||||||
|
0x3040, # .. 0x3098 ; None
|
||||||
|
0x3099, # .. 0x309C ; {'Hira', 'Kana'}
|
||||||
|
0x309D, # .. 0x309F ; None
|
||||||
|
0x30A0, # .. 0x30A0 ; {'Hira', 'Kana'}
|
||||||
|
0x30A1, # .. 0x30FA ; None
|
||||||
|
0x30FB, # .. 0x30FB ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
|
||||||
|
0x30FC, # .. 0x30FC ; {'Hira', 'Kana'}
|
||||||
|
0x30FD, # .. 0x318F ; None
|
||||||
|
0x3190, # .. 0x319F ; {'Hani'}
|
||||||
|
0x31A0, # .. 0x31BF ; None
|
||||||
|
0x31C0, # .. 0x31E3 ; {'Hani'}
|
||||||
|
0x31E4, # .. 0x321F ; None
|
||||||
|
0x3220, # .. 0x3247 ; {'Hani'}
|
||||||
|
0x3248, # .. 0x327F ; None
|
||||||
|
0x3280, # .. 0x32B0 ; {'Hani'}
|
||||||
|
0x32B1, # .. 0x32BF ; None
|
||||||
|
0x32C0, # .. 0x32CB ; {'Hani'}
|
||||||
|
0x32CC, # .. 0x3357 ; None
|
||||||
|
0x3358, # .. 0x3370 ; {'Hani'}
|
||||||
|
0x3371, # .. 0x337A ; None
|
||||||
|
0x337B, # .. 0x337F ; {'Hani'}
|
||||||
|
0x3380, # .. 0x33DF ; None
|
||||||
|
0x33E0, # .. 0x33FE ; {'Hani'}
|
||||||
|
0x33FF, # .. 0xA66E ; None
|
||||||
|
0xA66F, # .. 0xA66F ; {'Cyrl', 'Glag'}
|
||||||
|
0xA670, # .. 0xA82F ; None
|
||||||
|
0xA830, # .. 0xA835 ; {'Deva', 'Gujr', 'Guru', 'Knda', 'Kthi', 'Mahj', 'Modi', 'Sind', 'Takr', 'Tirh'}
|
||||||
|
0xA836, # .. 0xA839 ; {'Deva', 'Gujr', 'Guru', 'Kthi', 'Mahj', 'Modi', 'Sind', 'Takr', 'Tirh'}
|
||||||
|
0xA83A, # .. 0xA8F0 ; None
|
||||||
|
0xA8F1, # .. 0xA8F1 ; {'Beng', 'Deva'}
|
||||||
|
0xA8F2, # .. 0xA8F2 ; None
|
||||||
|
0xA8F3, # .. 0xA8F3 ; {'Deva', 'Taml'}
|
||||||
|
0xA8F4, # .. 0xA92D ; None
|
||||||
|
0xA92E, # .. 0xA92E ; {'Kali', 'Latn', 'Mymr'}
|
||||||
|
0xA92F, # .. 0xA9CE ; None
|
||||||
|
0xA9CF, # .. 0xA9CF ; {'Bugi', 'Java'}
|
||||||
|
0xA9D0, # .. 0xFDF1 ; None
|
||||||
|
0xFDF2, # .. 0xFDF2 ; {'Arab', 'Thaa'}
|
||||||
|
0xFDF3, # .. 0xFDFC ; None
|
||||||
|
0xFDFD, # .. 0xFDFD ; {'Arab', 'Thaa'}
|
||||||
|
0xFDFE, # .. 0xFE44 ; None
|
||||||
|
0xFE45, # .. 0xFE46 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||||
|
0xFE47, # .. 0xFF60 ; None
|
||||||
|
0xFF61, # .. 0xFF65 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
|
||||||
|
0xFF66, # .. 0xFF6F ; None
|
||||||
|
0xFF70, # .. 0xFF70 ; {'Hira', 'Kana'}
|
||||||
|
0xFF71, # .. 0xFF9D ; None
|
||||||
|
0xFF9E, # .. 0xFF9F ; {'Hira', 'Kana'}
|
||||||
|
0xFFA0, # .. 0x100FF ; None
|
||||||
|
0x10100, # .. 0x10102 ; {'Cprt', 'Linb'}
|
||||||
|
0x10103, # .. 0x10106 ; None
|
||||||
|
0x10107, # .. 0x10133 ; {'Cprt', 'Lina', 'Linb'}
|
||||||
|
0x10134, # .. 0x10136 ; None
|
||||||
|
0x10137, # .. 0x1013F ; {'Cprt', 'Linb'}
|
||||||
|
0x10140, # .. 0x102DF ; None
|
||||||
|
0x102E0, # .. 0x102FB ; {'Arab', 'Copt'}
|
||||||
|
0x102FC, # .. 0x11300 ; None
|
||||||
|
0x11301, # .. 0x11301 ; {'Gran', 'Taml'}
|
||||||
|
0x11302, # .. 0x11302 ; None
|
||||||
|
0x11303, # .. 0x11303 ; {'Gran', 'Taml'}
|
||||||
|
0x11304, # .. 0x1133B ; None
|
||||||
|
0x1133C, # .. 0x1133C ; {'Gran', 'Taml'}
|
||||||
|
0x1133D, # .. 0x1BC9F ; None
|
||||||
|
0x1BCA0, # .. 0x1BCA3 ; {'Dupl'}
|
||||||
|
0x1BCA4, # .. 0x1D35F ; None
|
||||||
|
0x1D360, # .. 0x1D371 ; {'Hani'}
|
||||||
|
0x1D372, # .. 0x1F24F ; None
|
||||||
|
0x1F250, # .. 0x1F251 ; {'Hani'}
|
||||||
|
0x1F252, # .. 0x10FFFF ; None
|
||||||
|
]
|
||||||
|
|
||||||
|
VALUES = [
|
||||||
|
None, # 0000..0341
|
||||||
|
{'Grek'}, # 0342..0342
|
||||||
|
None, # 0343..0344
|
||||||
|
{'Grek'}, # 0345..0345
|
||||||
|
None, # 0346..0362
|
||||||
|
{'Latn'}, # 0363..036F
|
||||||
|
None, # 0370..0482
|
||||||
|
{'Cyrl', 'Perm'}, # 0483..0483
|
||||||
|
{'Cyrl', 'Glag'}, # 0484..0484
|
||||||
|
{'Cyrl', 'Latn'}, # 0485..0486
|
||||||
|
{'Cyrl', 'Glag'}, # 0487..0487
|
||||||
|
None, # 0488..0588
|
||||||
|
{'Armn', 'Geor'}, # 0589..0589
|
||||||
|
None, # 058A..060B
|
||||||
|
{'Arab', 'Syrc', 'Thaa'}, # 060C..060C
|
||||||
|
None, # 060D..061A
|
||||||
|
{'Arab', 'Syrc', 'Thaa'}, # 061B..061C
|
||||||
|
None, # 061D..061E
|
||||||
|
{'Arab', 'Syrc', 'Thaa'}, # 061F..061F
|
||||||
|
None, # 0620..063F
|
||||||
|
{'Adlm', 'Arab', 'Mand', 'Mani', 'Phlp', 'Syrc'}, # 0640..0640
|
||||||
|
None, # 0641..064A
|
||||||
|
{'Arab', 'Syrc'}, # 064B..0655
|
||||||
|
None, # 0656..065F
|
||||||
|
{'Arab', 'Thaa'}, # 0660..0669
|
||||||
|
None, # 066A..066F
|
||||||
|
{'Arab', 'Syrc'}, # 0670..0670
|
||||||
|
None, # 0671..0950
|
||||||
|
{'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Latn', 'Mlym', 'Orya', 'Shrd', 'Taml', 'Telu'}, # 0951..0951
|
||||||
|
{'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Latn', 'Mlym', 'Orya', 'Taml', 'Telu'}, # 0952..0952
|
||||||
|
None, # 0953..0963
|
||||||
|
{'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Mahj', 'Mlym', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml', 'Telu', 'Tirh'}, # 0964..0964
|
||||||
|
{'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Limb', 'Mahj', 'Mlym', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml', 'Telu', 'Tirh'}, # 0965..0965
|
||||||
|
{'Deva', 'Kthi', 'Mahj'}, # 0966..096F
|
||||||
|
None, # 0970..09E5
|
||||||
|
{'Beng', 'Cakm', 'Sylo'}, # 09E6..09EF
|
||||||
|
None, # 09F0..0A65
|
||||||
|
{'Guru', 'Mult'}, # 0A66..0A6F
|
||||||
|
None, # 0A70..0AE5
|
||||||
|
{'Gujr', 'Khoj'}, # 0AE6..0AEF
|
||||||
|
None, # 0AF0..0BA9
|
||||||
|
{'Gran', 'Taml'}, # 0BAA..0BAA
|
||||||
|
None, # 0BAB..0BB4
|
||||||
|
{'Gran', 'Taml'}, # 0BB5..0BB5
|
||||||
|
None, # 0BB6..0BE5
|
||||||
|
{'Gran', 'Taml'}, # 0BE6..0BF2
|
||||||
|
None, # 0BF3..103F
|
||||||
|
{'Cakm', 'Mymr', 'Tale'}, # 1040..1049
|
||||||
|
None, # 104A..10FA
|
||||||
|
{'Geor', 'Latn'}, # 10FB..10FB
|
||||||
|
None, # 10FC..1734
|
||||||
|
{'Buhd', 'Hano', 'Tagb', 'Tglg'}, # 1735..1736
|
||||||
|
None, # 1737..1801
|
||||||
|
{'Mong', 'Phag'}, # 1802..1803
|
||||||
|
None, # 1804..1804
|
||||||
|
{'Mong', 'Phag'}, # 1805..1805
|
||||||
|
None, # 1806..1CCF
|
||||||
|
{'Deva', 'Gran'}, # 1CD0..1CD0
|
||||||
|
{'Deva'}, # 1CD1..1CD1
|
||||||
|
{'Deva', 'Gran'}, # 1CD2..1CD3
|
||||||
|
{'Deva'}, # 1CD4..1CD6
|
||||||
|
{'Deva', 'Shrd'}, # 1CD7..1CD7
|
||||||
|
{'Deva'}, # 1CD8..1CD8
|
||||||
|
{'Deva', 'Shrd'}, # 1CD9..1CD9
|
||||||
|
{'Deva', 'Knda', 'Mlym', 'Taml', 'Telu'}, # 1CDA..1CDA
|
||||||
|
{'Deva'}, # 1CDB..1CDB
|
||||||
|
{'Deva', 'Shrd'}, # 1CDC..1CDD
|
||||||
|
{'Deva'}, # 1CDE..1CDF
|
||||||
|
{'Deva', 'Shrd'}, # 1CE0..1CE0
|
||||||
|
{'Deva'}, # 1CE1..1CF1
|
||||||
|
{'Deva', 'Gran'}, # 1CF2..1CF4
|
||||||
|
{'Deva', 'Knda'}, # 1CF5..1CF5
|
||||||
|
{'Deva'}, # 1CF6..1CF6
|
||||||
|
{'Beng'}, # 1CF7..1CF7
|
||||||
|
{'Deva', 'Gran'}, # 1CF8..1CF9
|
||||||
|
None, # 1CFA..1DBF
|
||||||
|
{'Grek'}, # 1DC0..1DC1
|
||||||
|
None, # 1DC2..20EF
|
||||||
|
{'Deva', 'Gran', 'Latn'}, # 20F0..20F0
|
||||||
|
None, # 20F1..2E42
|
||||||
|
{'Cyrl', 'Glag'}, # 2E43..2E43
|
||||||
|
None, # 2E44..3000
|
||||||
|
{'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}, # 3001..3002
|
||||||
|
{'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}, # 3003..3003
|
||||||
|
None, # 3004..3005
|
||||||
|
{'Hani'}, # 3006..3006
|
||||||
|
None, # 3007..3007
|
||||||
|
{'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}, # 3008..3011
|
||||||
|
None, # 3012..3012
|
||||||
|
{'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}, # 3013..3013
|
||||||
|
{'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}, # 3014..301B
|
||||||
|
{'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}, # 301C..301F
|
||||||
|
None, # 3020..3029
|
||||||
|
{'Bopo', 'Hani'}, # 302A..302D
|
||||||
|
None, # 302E..302F
|
||||||
|
{'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}, # 3030..3030
|
||||||
|
{'Hira', 'Kana'}, # 3031..3035
|
||||||
|
None, # 3036..3036
|
||||||
|
{'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}, # 3037..3037
|
||||||
|
None, # 3038..303B
|
||||||
|
{'Hani', 'Hira', 'Kana'}, # 303C..303D
|
||||||
|
{'Hani'}, # 303E..303F
|
||||||
|
None, # 3040..3098
|
||||||
|
{'Hira', 'Kana'}, # 3099..309C
|
||||||
|
None, # 309D..309F
|
||||||
|
{'Hira', 'Kana'}, # 30A0..30A0
|
||||||
|
None, # 30A1..30FA
|
||||||
|
{'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}, # 30FB..30FB
|
||||||
|
{'Hira', 'Kana'}, # 30FC..30FC
|
||||||
|
None, # 30FD..318F
|
||||||
|
{'Hani'}, # 3190..319F
|
||||||
|
None, # 31A0..31BF
|
||||||
|
{'Hani'}, # 31C0..31E3
|
||||||
|
None, # 31E4..321F
|
||||||
|
{'Hani'}, # 3220..3247
|
||||||
|
None, # 3248..327F
|
||||||
|
{'Hani'}, # 3280..32B0
|
||||||
|
None, # 32B1..32BF
|
||||||
|
{'Hani'}, # 32C0..32CB
|
||||||
|
None, # 32CC..3357
|
||||||
|
{'Hani'}, # 3358..3370
|
||||||
|
None, # 3371..337A
|
||||||
|
{'Hani'}, # 337B..337F
|
||||||
|
None, # 3380..33DF
|
||||||
|
{'Hani'}, # 33E0..33FE
|
||||||
|
None, # 33FF..A66E
|
||||||
|
{'Cyrl', 'Glag'}, # A66F..A66F
|
||||||
|
None, # A670..A82F
|
||||||
|
{'Deva', 'Gujr', 'Guru', 'Knda', 'Kthi', 'Mahj', 'Modi', 'Sind', 'Takr', 'Tirh'}, # A830..A835
|
||||||
|
{'Deva', 'Gujr', 'Guru', 'Kthi', 'Mahj', 'Modi', 'Sind', 'Takr', 'Tirh'}, # A836..A839
|
||||||
|
None, # A83A..A8F0
|
||||||
|
{'Beng', 'Deva'}, # A8F1..A8F1
|
||||||
|
None, # A8F2..A8F2
|
||||||
|
{'Deva', 'Taml'}, # A8F3..A8F3
|
||||||
|
None, # A8F4..A92D
|
||||||
|
{'Kali', 'Latn', 'Mymr'}, # A92E..A92E
|
||||||
|
None, # A92F..A9CE
|
||||||
|
{'Bugi', 'Java'}, # A9CF..A9CF
|
||||||
|
None, # A9D0..FDF1
|
||||||
|
{'Arab', 'Thaa'}, # FDF2..FDF2
|
||||||
|
None, # FDF3..FDFC
|
||||||
|
{'Arab', 'Thaa'}, # FDFD..FDFD
|
||||||
|
None, # FDFE..FE44
|
||||||
|
{'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}, # FE45..FE46
|
||||||
|
None, # FE47..FF60
|
||||||
|
{'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}, # FF61..FF65
|
||||||
|
None, # FF66..FF6F
|
||||||
|
{'Hira', 'Kana'}, # FF70..FF70
|
||||||
|
None, # FF71..FF9D
|
||||||
|
{'Hira', 'Kana'}, # FF9E..FF9F
|
||||||
|
None, # FFA0..100FF
|
||||||
|
{'Cprt', 'Linb'}, # 10100..10102
|
||||||
|
None, # 10103..10106
|
||||||
|
{'Cprt', 'Lina', 'Linb'}, # 10107..10133
|
||||||
|
None, # 10134..10136
|
||||||
|
{'Cprt', 'Linb'}, # 10137..1013F
|
||||||
|
None, # 10140..102DF
|
||||||
|
{'Arab', 'Copt'}, # 102E0..102FB
|
||||||
|
None, # 102FC..11300
|
||||||
|
{'Gran', 'Taml'}, # 11301..11301
|
||||||
|
None, # 11302..11302
|
||||||
|
{'Gran', 'Taml'}, # 11303..11303
|
||||||
|
None, # 11304..1133B
|
||||||
|
{'Gran', 'Taml'}, # 1133C..1133C
|
||||||
|
None, # 1133D..1BC9F
|
||||||
|
{'Dupl'}, # 1BCA0..1BCA3
|
||||||
|
None, # 1BCA4..1D35F
|
||||||
|
{'Hani'}, # 1D360..1D371
|
||||||
|
None, # 1D372..1F24F
|
||||||
|
{'Hani'}, # 1F250..1F251
|
||||||
|
None, # 1F252..10FFFF
|
||||||
|
]
|
3055
Lib/fontTools/unicodedata/Scripts.py
Normal file
3055
Lib/fontTools/unicodedata/Scripts.py
Normal file
File diff suppressed because it is too large
Load Diff
100
Lib/fontTools/unicodedata/__init__.py
Normal file
100
Lib/fontTools/unicodedata/__init__.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
from __future__ import (
|
||||||
|
print_function, division, absolute_import, unicode_literals)
|
||||||
|
from fontTools.misc.py23 import *
|
||||||
|
|
||||||
|
from bisect import bisect_right
|
||||||
|
|
||||||
|
try:
|
||||||
|
# use unicodedata backport compatible with python2:
|
||||||
|
# https://github.com/mikekap/unicodedata2
|
||||||
|
from unicodedata2 import *
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
# fall back to built-in unicodedata (possibly outdated)
|
||||||
|
from unicodedata import *
|
||||||
|
|
||||||
|
from . import Blocks, Scripts, ScriptExtensions
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
# names from built-in unicodedata module
|
||||||
|
"lookup",
|
||||||
|
"name",
|
||||||
|
"decimal",
|
||||||
|
"digit",
|
||||||
|
"numeric",
|
||||||
|
"category",
|
||||||
|
"bidirectional",
|
||||||
|
"combining",
|
||||||
|
"east_asian_width",
|
||||||
|
"mirrored",
|
||||||
|
"decomposition",
|
||||||
|
"normalize",
|
||||||
|
"unidata_version",
|
||||||
|
"ucd_3_2_0",
|
||||||
|
# additonal functions
|
||||||
|
"block",
|
||||||
|
"script",
|
||||||
|
"script_extension",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def script(char):
|
||||||
|
""" Return the script property assigned to the Unicode character 'char'
|
||||||
|
as string.
|
||||||
|
|
||||||
|
>>> script("a")
|
||||||
|
'Latin'
|
||||||
|
>>> script(",")
|
||||||
|
'Common'
|
||||||
|
>>> script(unichr(0x10FFFF))
|
||||||
|
'Unknown'
|
||||||
|
"""
|
||||||
|
code = byteord(char)
|
||||||
|
# 'bisect_right(a, x, lo=0, hi=len(a))' returns an insertion point which
|
||||||
|
# comes after (to the right of) any existing entries of x in a, and it
|
||||||
|
# partitions array a into two halves so that, for the left side
|
||||||
|
# all(val <= x for val in a[lo:i]), and for the right side
|
||||||
|
# all(val > x for val in a[i:hi]).
|
||||||
|
# Our 'SCRIPT_RANGES' is a sorted list of ranges (only their starting
|
||||||
|
# breakpoints); we want to use `bisect_right` to look up the range that
|
||||||
|
# contains the given codepoint: i.e. whose start is less than or equal
|
||||||
|
# to the codepoint. Thus, we subtract -1 from the index returned.
|
||||||
|
i = bisect_right(Scripts.RANGES, code)
|
||||||
|
return Scripts.VALUES[i-1]
|
||||||
|
|
||||||
|
|
||||||
|
def script_extension(char):
|
||||||
|
""" Return the script extension property assigned to the Unicode character
|
||||||
|
'char' as a set of string.
|
||||||
|
|
||||||
|
>>> script_extension("a") == {'Latin'}
|
||||||
|
True
|
||||||
|
>>> script_extension(unichr(0x060C)) == {'Arab', 'Syrc', 'Thaa'}
|
||||||
|
True
|
||||||
|
>>> script_extension(unichr(0x10FFFF)) == {'Unknown'}
|
||||||
|
True
|
||||||
|
"""
|
||||||
|
code = byteord(char)
|
||||||
|
i = bisect_right(ScriptExtensions.RANGES, code)
|
||||||
|
value = ScriptExtensions.VALUES[i-1]
|
||||||
|
if value is None:
|
||||||
|
# code points not explicitly listed for Script Extensions
|
||||||
|
# have as their value the corresponding Script property value
|
||||||
|
return {script(char)}
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def block(char):
|
||||||
|
""" Return the block property assigned to the Unicode character 'char'
|
||||||
|
as a string.
|
||||||
|
|
||||||
|
>>> block("a")
|
||||||
|
'Basic Latin'
|
||||||
|
>>> block(unichr(0x060C))
|
||||||
|
'Arabic'
|
||||||
|
>>> block(unichr(0xEFFFF))
|
||||||
|
'No_Block'
|
||||||
|
"""
|
||||||
|
code = byteord(char)
|
||||||
|
i = bisect_right(Blocks.RANGES, code)
|
||||||
|
return Blocks.VALUES[i-1]
|
211
MetaTools/buildUCD.py
Executable file
211
MetaTools/buildUCD.py
Executable file
@ -0,0 +1,211 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
Tools to parse data files from the Unicode Character Database.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import print_function, absolute_import, division
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from fontTools.misc.py23 import *
|
||||||
|
|
||||||
|
try:
|
||||||
|
from urllib.request import urlopen
|
||||||
|
except ImportError:
|
||||||
|
from urllib2 import urlopen
|
||||||
|
from contextlib import closing, contextmanager
|
||||||
|
import re
|
||||||
|
from codecs import iterdecode
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from os.path import abspath, dirname, join as pjoin, pardir, sep
|
||||||
|
|
||||||
|
|
||||||
|
UNIDATA_URL = "https://unicode.org/Public/UNIDATA/"
|
||||||
|
UNIDATA_LICENSE_URL = "http://unicode.org/copyright.html#License"
|
||||||
|
|
||||||
|
# by default save output files to ../Lib/fontTools/unicodedata/
|
||||||
|
UNIDATA_PATH = pjoin(abspath(dirname(__file__)), pardir,
|
||||||
|
"Lib", "fontTools", "unicodedata") + sep
|
||||||
|
|
||||||
|
SRC_ENCODING = "# -*- coding: utf-8 -*-\n"
|
||||||
|
|
||||||
|
NOTICE = "# NOTE: This file was auto-generated with MetaTools/buildUCD.py.\n"
|
||||||
|
|
||||||
|
MAX_UNICODE = 0x10FFFF
|
||||||
|
|
||||||
|
log = logging.getLogger()
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def open_unidata_file(filename):
|
||||||
|
"""Open a text file from https://unicode.org/Public/UNIDATA/"""
|
||||||
|
url = UNIDATA_URL + filename
|
||||||
|
with closing(urlopen(url)) as response:
|
||||||
|
yield iterdecode(response, encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_unidata_header(infile):
|
||||||
|
"""Read the top header of data files, until the first line
|
||||||
|
that does not start with '#'.
|
||||||
|
"""
|
||||||
|
header = []
|
||||||
|
line = next(infile)
|
||||||
|
while line.startswith("#"):
|
||||||
|
header.append(line)
|
||||||
|
line = next(infile)
|
||||||
|
return "".join(header)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_range_properties(infile, default=None, is_set=False):
|
||||||
|
"""Parse a Unicode data file containing a column with one character or
|
||||||
|
a range of characters, and another column containing a property value
|
||||||
|
separated by a semicolon. Comments after '#' are ignored.
|
||||||
|
|
||||||
|
If the ranges defined in the data file are not continuous, assign the
|
||||||
|
'default' property to the unassigned codepoints.
|
||||||
|
|
||||||
|
Return a list of (start, end, property_name) tuples.
|
||||||
|
"""
|
||||||
|
ranges = []
|
||||||
|
line_regex = re.compile(
|
||||||
|
r"^"
|
||||||
|
r"([0-9A-F]{4,6})" # first character code
|
||||||
|
r"(?:\.\.([0-9A-F]{4,6}))?" # optional second character code
|
||||||
|
r"\s*;\s*"
|
||||||
|
r"([^#]+)") # everything up to the potential comment
|
||||||
|
for line in infile:
|
||||||
|
match = line_regex.match(line)
|
||||||
|
if not match:
|
||||||
|
continue
|
||||||
|
|
||||||
|
first, last, data = match.groups()
|
||||||
|
if last is None:
|
||||||
|
last = first
|
||||||
|
|
||||||
|
first = int(first, 16)
|
||||||
|
last = int(last, 16)
|
||||||
|
data = tostr(data.rstrip(), encoding="ascii")
|
||||||
|
|
||||||
|
ranges.append((first, last, data))
|
||||||
|
|
||||||
|
ranges.sort()
|
||||||
|
|
||||||
|
if isinstance(default, unicode):
|
||||||
|
default = tostr(default, encoding="ascii")
|
||||||
|
|
||||||
|
# fill the gaps between explicitly defined ranges
|
||||||
|
last_start, last_end = -1, -1
|
||||||
|
full_ranges = []
|
||||||
|
for start, end, value in ranges:
|
||||||
|
assert last_end < start
|
||||||
|
assert start <= end
|
||||||
|
if start - last_end > 1:
|
||||||
|
full_ranges.append((last_end+1, start-1, default))
|
||||||
|
if is_set:
|
||||||
|
value = set(value.split())
|
||||||
|
full_ranges.append((start, end, value))
|
||||||
|
last_start, last_end = start, end
|
||||||
|
if last_end != MAX_UNICODE:
|
||||||
|
full_ranges.append((last_end+1, MAX_UNICODE, default))
|
||||||
|
|
||||||
|
# reduce total number of ranges by combining continuous ones
|
||||||
|
last_start, last_end, last_value = full_ranges.pop(0)
|
||||||
|
merged_ranges = []
|
||||||
|
for start, end, value in full_ranges:
|
||||||
|
if value == last_value:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
merged_ranges.append((last_start, start-1, last_value))
|
||||||
|
last_start, line_end, last_value = start, end, value
|
||||||
|
merged_ranges.append((last_start, MAX_UNICODE, last_value))
|
||||||
|
|
||||||
|
# make sure that the ranges cover the full unicode repertoire
|
||||||
|
assert merged_ranges[0][0] == 0
|
||||||
|
for (cs, ce, cv), (ns, ne, nv) in zip(merged_ranges, merged_ranges[1:]):
|
||||||
|
assert ce+1 == ns
|
||||||
|
assert merged_ranges[-1][1] == MAX_UNICODE
|
||||||
|
|
||||||
|
return merged_ranges
|
||||||
|
|
||||||
|
|
||||||
|
def _set_repr(value):
|
||||||
|
return 'None' if value is None else "{{{}}}".format(
|
||||||
|
", ".join(repr(v) for v in sorted(value)))
|
||||||
|
|
||||||
|
|
||||||
|
def build_ranges(filename, local_ucd=None, output_path=None,
|
||||||
|
default=None, is_set=False):
|
||||||
|
"""Fetch 'filename' UCD data file from Unicode official website, parse
|
||||||
|
the ranges and properties and write them as two Python lists
|
||||||
|
to 'fontTools.unicodedata.<filename>.py'.
|
||||||
|
|
||||||
|
To load the data file from a local directory, you can use the
|
||||||
|
'local_ucd' argument.
|
||||||
|
"""
|
||||||
|
modname = os.path.splitext(filename)[0] + ".py"
|
||||||
|
if not output_path:
|
||||||
|
output_path = UNIDATA_PATH + modname
|
||||||
|
|
||||||
|
if local_ucd:
|
||||||
|
log.info("loading '%s' from local directory '%s'", filename, local_ucd)
|
||||||
|
cm = open(pjoin(local_ucd, filename), "r", encoding="utf-8")
|
||||||
|
else:
|
||||||
|
log.info("downloading '%s' from '%s'", filename, UNIDATA_URL)
|
||||||
|
cm = open_unidata_file(filename)
|
||||||
|
|
||||||
|
with cm as f:
|
||||||
|
header = parse_unidata_header(f)
|
||||||
|
ranges = parse_range_properties(f, default=default, is_set=is_set)
|
||||||
|
|
||||||
|
max_value_length = min(56, max(len(repr(v)) for _, _, v in ranges))
|
||||||
|
|
||||||
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(SRC_ENCODING)
|
||||||
|
f.write("#\n")
|
||||||
|
f.write(NOTICE)
|
||||||
|
f.write("# Source: {}{}\n".format(UNIDATA_URL, filename))
|
||||||
|
f.write("# License: {}\n".format(UNIDATA_LICENSE_URL))
|
||||||
|
f.write("#\n")
|
||||||
|
f.write(header+"\n\n")
|
||||||
|
|
||||||
|
f.write("RANGES = [\n")
|
||||||
|
for first, last, value in ranges:
|
||||||
|
f.write(" 0x{:0>4X}, # .. 0x{:0>4X} ; {}\n".format(
|
||||||
|
first, last, _set_repr(value) if is_set else value))
|
||||||
|
f.write("]\n")
|
||||||
|
|
||||||
|
f.write("\n")
|
||||||
|
f.write("VALUES = [\n")
|
||||||
|
for first, last, value in ranges:
|
||||||
|
if is_set:
|
||||||
|
value_repr = "{},".format(_set_repr(value))
|
||||||
|
else:
|
||||||
|
value_repr = "{!r},".format(value)
|
||||||
|
f.write(" {} # {:0>4X}..{:0>4X}\n".format(
|
||||||
|
value_repr.ljust(max_value_length+1), first, last))
|
||||||
|
f.write("]\n")
|
||||||
|
|
||||||
|
log.info("saved new file: '%s'", os.path.normpath(output_path))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Generate fontTools.unicodedata from UCD data files")
|
||||||
|
parser.add_argument(
|
||||||
|
'--ucd-path', help="Path to local folder containing UCD data files")
|
||||||
|
parser.add_argument('-q', '--quiet', action="store_true")
|
||||||
|
options = parser.parse_args()
|
||||||
|
|
||||||
|
level = "WARNING" if options.quiet else "INFO"
|
||||||
|
logging.basicConfig(level=level, format="%(message)s")
|
||||||
|
|
||||||
|
build_ranges("Blocks.txt", local_ucd=options.ucd_path, default="No_Block")
|
||||||
|
build_ranges("Scripts.txt", local_ucd=options.ucd_path, default="Unknown")
|
||||||
|
build_ranges("ScriptExtensions.txt", local_ucd=options.ucd_path,
|
||||||
|
is_set=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
sys.exit(main())
|
174
Tests/unicodedata_test.py
Normal file
174
Tests/unicodedata_test.py
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
from __future__ import (
|
||||||
|
print_function, division, absolute_import, unicode_literals)
|
||||||
|
from fontTools.misc.py23 import *
|
||||||
|
|
||||||
|
from fontTools import unicodedata
|
||||||
|
|
||||||
|
|
||||||
|
def test_script():
|
||||||
|
assert unicodedata.script("a") == "Latin"
|
||||||
|
assert unicodedata.script(unichr(0)) == "Common"
|
||||||
|
assert unicodedata.script(unichr(0x0378)) == "Unknown"
|
||||||
|
assert unicodedata.script(unichr(0x10FFFF)) == "Unknown"
|
||||||
|
|
||||||
|
# these were randomly sampled, one character per script
|
||||||
|
assert unicodedata.script(unichr(0x1E918)) == 'Adlam'
|
||||||
|
assert unicodedata.script(unichr(0x1170D)) == 'Ahom'
|
||||||
|
assert unicodedata.script(unichr(0x145A0)) == 'Anatolian_Hieroglyphs'
|
||||||
|
assert unicodedata.script(unichr(0x0607)) == 'Arabic'
|
||||||
|
assert unicodedata.script(unichr(0x056C)) == 'Armenian'
|
||||||
|
assert unicodedata.script(unichr(0x10B27)) == 'Avestan'
|
||||||
|
assert unicodedata.script(unichr(0x1B41)) == 'Balinese'
|
||||||
|
assert unicodedata.script(unichr(0x168AD)) == 'Bamum'
|
||||||
|
assert unicodedata.script(unichr(0x16ADD)) == 'Bassa_Vah'
|
||||||
|
assert unicodedata.script(unichr(0x1BE5)) == 'Batak'
|
||||||
|
assert unicodedata.script(unichr(0x09F3)) == 'Bengali'
|
||||||
|
assert unicodedata.script(unichr(0x11C5B)) == 'Bhaiksuki'
|
||||||
|
assert unicodedata.script(unichr(0x3126)) == 'Bopomofo'
|
||||||
|
assert unicodedata.script(unichr(0x1103B)) == 'Brahmi'
|
||||||
|
assert unicodedata.script(unichr(0x2849)) == 'Braille'
|
||||||
|
assert unicodedata.script(unichr(0x1A0A)) == 'Buginese'
|
||||||
|
assert unicodedata.script(unichr(0x174E)) == 'Buhid'
|
||||||
|
assert unicodedata.script(unichr(0x18EE)) == 'Canadian_Aboriginal'
|
||||||
|
assert unicodedata.script(unichr(0x102B7)) == 'Carian'
|
||||||
|
assert unicodedata.script(unichr(0x1053D)) == 'Caucasian_Albanian'
|
||||||
|
assert unicodedata.script(unichr(0x11123)) == 'Chakma'
|
||||||
|
assert unicodedata.script(unichr(0xAA1F)) == 'Cham'
|
||||||
|
assert unicodedata.script(unichr(0xAB95)) == 'Cherokee'
|
||||||
|
assert unicodedata.script(unichr(0x1F0C7)) == 'Common'
|
||||||
|
assert unicodedata.script(unichr(0x2C85)) == 'Coptic'
|
||||||
|
assert unicodedata.script(unichr(0x12014)) == 'Cuneiform'
|
||||||
|
assert unicodedata.script(unichr(0x1082E)) == 'Cypriot'
|
||||||
|
assert unicodedata.script(unichr(0xA686)) == 'Cyrillic'
|
||||||
|
assert unicodedata.script(unichr(0x10417)) == 'Deseret'
|
||||||
|
assert unicodedata.script(unichr(0x093E)) == 'Devanagari'
|
||||||
|
assert unicodedata.script(unichr(0x1BC4B)) == 'Duployan'
|
||||||
|
assert unicodedata.script(unichr(0x1310C)) == 'Egyptian_Hieroglyphs'
|
||||||
|
assert unicodedata.script(unichr(0x1051C)) == 'Elbasan'
|
||||||
|
assert unicodedata.script(unichr(0x2DA6)) == 'Ethiopic'
|
||||||
|
assert unicodedata.script(unichr(0x10AD)) == 'Georgian'
|
||||||
|
assert unicodedata.script(unichr(0x2C52)) == 'Glagolitic'
|
||||||
|
assert unicodedata.script(unichr(0x10343)) == 'Gothic'
|
||||||
|
assert unicodedata.script(unichr(0x11371)) == 'Grantha'
|
||||||
|
assert unicodedata.script(unichr(0x03D0)) == 'Greek'
|
||||||
|
assert unicodedata.script(unichr(0x0AAA)) == 'Gujarati'
|
||||||
|
assert unicodedata.script(unichr(0x0A4C)) == 'Gurmukhi'
|
||||||
|
assert unicodedata.script(unichr(0x23C9F)) == 'Han'
|
||||||
|
assert unicodedata.script(unichr(0xC259)) == 'Hangul'
|
||||||
|
assert unicodedata.script(unichr(0x1722)) == 'Hanunoo'
|
||||||
|
assert unicodedata.script(unichr(0x108F5)) == 'Hatran'
|
||||||
|
assert unicodedata.script(unichr(0x05C2)) == 'Hebrew'
|
||||||
|
assert unicodedata.script(unichr(0x1B072)) == 'Hiragana'
|
||||||
|
assert unicodedata.script(unichr(0x10847)) == 'Imperial_Aramaic'
|
||||||
|
assert unicodedata.script(unichr(0x033A)) == 'Inherited'
|
||||||
|
assert unicodedata.script(unichr(0x10B66)) == 'Inscriptional_Pahlavi'
|
||||||
|
assert unicodedata.script(unichr(0x10B4B)) == 'Inscriptional_Parthian'
|
||||||
|
assert unicodedata.script(unichr(0xA98A)) == 'Javanese'
|
||||||
|
assert unicodedata.script(unichr(0x110B2)) == 'Kaithi'
|
||||||
|
assert unicodedata.script(unichr(0x0CC6)) == 'Kannada'
|
||||||
|
assert unicodedata.script(unichr(0x3337)) == 'Katakana'
|
||||||
|
assert unicodedata.script(unichr(0xA915)) == 'Kayah_Li'
|
||||||
|
assert unicodedata.script(unichr(0x10A2E)) == 'Kharoshthi'
|
||||||
|
assert unicodedata.script(unichr(0x17AA)) == 'Khmer'
|
||||||
|
assert unicodedata.script(unichr(0x11225)) == 'Khojki'
|
||||||
|
assert unicodedata.script(unichr(0x112B6)) == 'Khudawadi'
|
||||||
|
assert unicodedata.script(unichr(0x0ED7)) == 'Lao'
|
||||||
|
assert unicodedata.script(unichr(0xAB3C)) == 'Latin'
|
||||||
|
assert unicodedata.script(unichr(0x1C48)) == 'Lepcha'
|
||||||
|
assert unicodedata.script(unichr(0x1923)) == 'Limbu'
|
||||||
|
assert unicodedata.script(unichr(0x1071D)) == 'Linear_A'
|
||||||
|
assert unicodedata.script(unichr(0x100EC)) == 'Linear_B'
|
||||||
|
assert unicodedata.script(unichr(0xA4E9)) == 'Lisu'
|
||||||
|
assert unicodedata.script(unichr(0x10284)) == 'Lycian'
|
||||||
|
assert unicodedata.script(unichr(0x10926)) == 'Lydian'
|
||||||
|
assert unicodedata.script(unichr(0x11161)) == 'Mahajani'
|
||||||
|
assert unicodedata.script(unichr(0x0D56)) == 'Malayalam'
|
||||||
|
assert unicodedata.script(unichr(0x0856)) == 'Mandaic'
|
||||||
|
assert unicodedata.script(unichr(0x10AF0)) == 'Manichaean'
|
||||||
|
assert unicodedata.script(unichr(0x11CB0)) == 'Marchen'
|
||||||
|
assert unicodedata.script(unichr(0x11D28)) == 'Masaram_Gondi'
|
||||||
|
assert unicodedata.script(unichr(0xABDD)) == 'Meetei_Mayek'
|
||||||
|
assert unicodedata.script(unichr(0x1E897)) == 'Mende_Kikakui'
|
||||||
|
assert unicodedata.script(unichr(0x109B0)) == 'Meroitic_Cursive'
|
||||||
|
assert unicodedata.script(unichr(0x10993)) == 'Meroitic_Hieroglyphs'
|
||||||
|
assert unicodedata.script(unichr(0x16F5D)) == 'Miao'
|
||||||
|
assert unicodedata.script(unichr(0x1160B)) == 'Modi'
|
||||||
|
assert unicodedata.script(unichr(0x18A8)) == 'Mongolian'
|
||||||
|
assert unicodedata.script(unichr(0x16A48)) == 'Mro'
|
||||||
|
assert unicodedata.script(unichr(0x1128C)) == 'Multani'
|
||||||
|
assert unicodedata.script(unichr(0x105B)) == 'Myanmar'
|
||||||
|
assert unicodedata.script(unichr(0x108AF)) == 'Nabataean'
|
||||||
|
assert unicodedata.script(unichr(0x19B3)) == 'New_Tai_Lue'
|
||||||
|
assert unicodedata.script(unichr(0x1143D)) == 'Newa'
|
||||||
|
assert unicodedata.script(unichr(0x07F4)) == 'Nko'
|
||||||
|
assert unicodedata.script(unichr(0x1B192)) == 'Nushu'
|
||||||
|
assert unicodedata.script(unichr(0x169C)) == 'Ogham'
|
||||||
|
assert unicodedata.script(unichr(0x1C56)) == 'Ol_Chiki'
|
||||||
|
assert unicodedata.script(unichr(0x10CE9)) == 'Old_Hungarian'
|
||||||
|
assert unicodedata.script(unichr(0x10316)) == 'Old_Italic'
|
||||||
|
assert unicodedata.script(unichr(0x10A93)) == 'Old_North_Arabian'
|
||||||
|
assert unicodedata.script(unichr(0x1035A)) == 'Old_Permic'
|
||||||
|
assert unicodedata.script(unichr(0x103D5)) == 'Old_Persian'
|
||||||
|
assert unicodedata.script(unichr(0x10A65)) == 'Old_South_Arabian'
|
||||||
|
assert unicodedata.script(unichr(0x10C09)) == 'Old_Turkic'
|
||||||
|
assert unicodedata.script(unichr(0x0B60)) == 'Oriya'
|
||||||
|
assert unicodedata.script(unichr(0x104CF)) == 'Osage'
|
||||||
|
assert unicodedata.script(unichr(0x104A8)) == 'Osmanya'
|
||||||
|
assert unicodedata.script(unichr(0x16B12)) == 'Pahawh_Hmong'
|
||||||
|
assert unicodedata.script(unichr(0x10879)) == 'Palmyrene'
|
||||||
|
assert unicodedata.script(unichr(0x11AF1)) == 'Pau_Cin_Hau'
|
||||||
|
assert unicodedata.script(unichr(0xA869)) == 'Phags_Pa'
|
||||||
|
assert unicodedata.script(unichr(0x10909)) == 'Phoenician'
|
||||||
|
assert unicodedata.script(unichr(0x10B81)) == 'Psalter_Pahlavi'
|
||||||
|
assert unicodedata.script(unichr(0xA941)) == 'Rejang'
|
||||||
|
assert unicodedata.script(unichr(0x16C3)) == 'Runic'
|
||||||
|
assert unicodedata.script(unichr(0x0814)) == 'Samaritan'
|
||||||
|
assert unicodedata.script(unichr(0xA88C)) == 'Saurashtra'
|
||||||
|
assert unicodedata.script(unichr(0x111C8)) == 'Sharada'
|
||||||
|
assert unicodedata.script(unichr(0x1045F)) == 'Shavian'
|
||||||
|
assert unicodedata.script(unichr(0x115AD)) == 'Siddham'
|
||||||
|
assert unicodedata.script(unichr(0x1D8C0)) == 'SignWriting'
|
||||||
|
assert unicodedata.script(unichr(0x0DB9)) == 'Sinhala'
|
||||||
|
assert unicodedata.script(unichr(0x110F9)) == 'Sora_Sompeng'
|
||||||
|
assert unicodedata.script(unichr(0x11A60)) == 'Soyombo'
|
||||||
|
assert unicodedata.script(unichr(0x1B94)) == 'Sundanese'
|
||||||
|
assert unicodedata.script(unichr(0xA81F)) == 'Syloti_Nagri'
|
||||||
|
assert unicodedata.script(unichr(0x0740)) == 'Syriac'
|
||||||
|
assert unicodedata.script(unichr(0x1714)) == 'Tagalog'
|
||||||
|
assert unicodedata.script(unichr(0x1761)) == 'Tagbanwa'
|
||||||
|
assert unicodedata.script(unichr(0x1965)) == 'Tai_Le'
|
||||||
|
assert unicodedata.script(unichr(0x1A32)) == 'Tai_Tham'
|
||||||
|
assert unicodedata.script(unichr(0xAA86)) == 'Tai_Viet'
|
||||||
|
assert unicodedata.script(unichr(0x116A5)) == 'Takri'
|
||||||
|
assert unicodedata.script(unichr(0x0B8E)) == 'Tamil'
|
||||||
|
assert unicodedata.script(unichr(0x1754D)) == 'Tangut'
|
||||||
|
assert unicodedata.script(unichr(0x0C40)) == 'Telugu'
|
||||||
|
assert unicodedata.script(unichr(0x07A4)) == 'Thaana'
|
||||||
|
assert unicodedata.script(unichr(0x0E42)) == 'Thai'
|
||||||
|
assert unicodedata.script(unichr(0x0F09)) == 'Tibetan'
|
||||||
|
assert unicodedata.script(unichr(0x2D3A)) == 'Tifinagh'
|
||||||
|
assert unicodedata.script(unichr(0x114B0)) == 'Tirhuta'
|
||||||
|
assert unicodedata.script(unichr(0x1038B)) == 'Ugaritic'
|
||||||
|
assert unicodedata.script(unichr(0xA585)) == 'Vai'
|
||||||
|
assert unicodedata.script(unichr(0x118CF)) == 'Warang_Citi'
|
||||||
|
assert unicodedata.script(unichr(0xA066)) == 'Yi'
|
||||||
|
assert unicodedata.script(unichr(0x11A31)) == 'Zanabazar_Square'
|
||||||
|
|
||||||
|
|
||||||
|
def test_script_extension():
|
||||||
|
assert unicodedata.script_extension("a") == {"Latin"}
|
||||||
|
assert unicodedata.script_extension(unichr(0)) == {"Common"}
|
||||||
|
assert unicodedata.script_extension(unichr(0x0378)) == {"Unknown"}
|
||||||
|
assert unicodedata.script_extension(unichr(0x10FFFF)) == {"Unknown"}
|
||||||
|
|
||||||
|
assert unicodedata.script_extension("\u0660") == {'Arab', 'Thaa'}
|
||||||
|
assert unicodedata.script_extension("\u0964") == {
|
||||||
|
'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Mahj', 'Mlym',
|
||||||
|
'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml', 'Telu', 'Tirh'}
|
||||||
|
|
||||||
|
|
||||||
|
def test_block():
|
||||||
|
assert unicodedata.block("\x00") == "Basic Latin"
|
||||||
|
assert unicodedata.block("\x7F") == "Basic Latin"
|
||||||
|
assert unicodedata.block("\x80") == "Latin-1 Supplement"
|
||||||
|
assert unicodedata.block("\u1c90") == "No_Block"
|
Loading…
x
Reference in New Issue
Block a user