From a4d791a9952436630dbcc593fdf03ff4100b2b32 Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Fri, 24 Nov 2023 15:18:37 -0500 Subject: [PATCH] Add tokenizer --- .gitignore | 8 + .../xcshareddata/IDEWorkspaceChecks.plist | 8 + Package.swift | 23 + Sources/HTMLStreamer/HTMLEntities.swift | 560 ++++++ Sources/HTMLStreamer/HTMLParser.swift | 12 + Sources/HTMLStreamer/InlineArray3.swift | 220 +++ Sources/HTMLStreamer/Tokenizer.swift | 1692 +++++++++++++++++ .../HTMLStreamerTests/InlineArray3Tests.swift | 38 + Tests/HTMLStreamerTests/TokenizerTests.swift | 79 + 9 files changed, 2640 insertions(+) create mode 100644 .gitignore create mode 100644 .swiftpm/xcode/package.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist create mode 100644 Package.swift create mode 100644 Sources/HTMLStreamer/HTMLEntities.swift create mode 100644 Sources/HTMLStreamer/HTMLParser.swift create mode 100644 Sources/HTMLStreamer/InlineArray3.swift create mode 100644 Sources/HTMLStreamer/Tokenizer.swift create mode 100644 Tests/HTMLStreamerTests/InlineArray3Tests.swift create mode 100644 Tests/HTMLStreamerTests/TokenizerTests.swift diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0023a53 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +/.build +/Packages +xcuserdata/ +DerivedData/ +.swiftpm/configuration/registries.json +.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata +.netrc diff --git a/.swiftpm/xcode/package.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/.swiftpm/xcode/package.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 0000000..18d9810 --- /dev/null +++ b/.swiftpm/xcode/package.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/Package.swift b/Package.swift new file mode 100644 index 0000000..f2a208b --- /dev/null +++ b/Package.swift @@ -0,0 +1,23 @@ +// swift-tools-version: 5.9 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +import PackageDescription + +let package = Package( + name: "HTMLStreamer", + products: [ + // Products define the executables and libraries a package produces, making them visible to other packages. + .library( + name: "HTMLStreamer", + targets: ["HTMLStreamer"]), + ], + targets: [ + // Targets are the basic building blocks of a package, defining a module or a test suite. + // Targets can depend on other targets in this package and products from dependencies. + .target( + name: "HTMLStreamer"), + .testTarget( + name: "HTMLStreamerTests", + dependencies: ["HTMLStreamer"]), + ] +) diff --git a/Sources/HTMLStreamer/HTMLEntities.swift b/Sources/HTMLStreamer/HTMLEntities.swift new file mode 100644 index 0000000..e59f445 --- /dev/null +++ b/Sources/HTMLStreamer/HTMLEntities.swift @@ -0,0 +1,560 @@ +// +// HTMLEntities.swift +// HTMLStreamer +// +// Created by Shadowfacts on 11/22/23. +// + +import Foundation + +// From https://github.com/Kitura/swift-html-entities +// Under the Apache-2.0 license + +// Split map into two halves; otherwise, segmentation fault when compiling +let namedCharactersDecodeMap = namedCharactersDecodeMap1.merging(namedCharactersDecodeMap2, uniquingKeysWith: { $1 }) + +private let namedCharactersDecodeMap1: [String: Character] = [ + "Aacute;":"\u{C1}","aacute;":"\u{E1}","Abreve;":"\u{102}","abreve;":"\u{103}", + "ac;":"\u{223E}","acd;":"\u{223F}","acE;":"\u{223E}\u{333}","Acirc;":"\u{C2}", + "acirc;":"\u{E2}","acute;":"\u{B4}","Acy;":"\u{410}","acy;":"\u{430}", + "AElig;":"\u{C6}","aelig;":"\u{E6}","af;":"\u{2061}","Afr;":"\u{1D504}", + "afr;":"\u{1D51E}","Agrave;":"\u{C0}","agrave;":"\u{E0}","alefsym;":"\u{2135}", + "aleph;":"\u{2135}","Alpha;":"\u{391}","alpha;":"\u{3B1}","Amacr;":"\u{100}", + "amacr;":"\u{101}","amalg;":"\u{2A3F}","AMP;":"\u{26}","amp;":"\u{26}", + "And;":"\u{2A53}","and;":"\u{2227}","andand;":"\u{2A55}","andd;":"\u{2A5C}", + "andslope;":"\u{2A58}","andv;":"\u{2A5A}","ang;":"\u{2220}","ange;":"\u{29A4}", + "angle;":"\u{2220}","angmsd;":"\u{2221}","angmsdaa;":"\u{29A8}","angmsdab;":"\u{29A9}", + "angmsdac;":"\u{29AA}","angmsdad;":"\u{29AB}","angmsdae;":"\u{29AC}","angmsdaf;":"\u{29AD}", + "angmsdag;":"\u{29AE}","angmsdah;":"\u{29AF}","angrt;":"\u{221F}","angrtvb;":"\u{22BE}", + "angrtvbd;":"\u{299D}","angsph;":"\u{2222}","angst;":"\u{C5}","angzarr;":"\u{237C}", + "Aogon;":"\u{104}","aogon;":"\u{105}","Aopf;":"\u{1D538}","aopf;":"\u{1D552}", + "ap;":"\u{2248}","apacir;":"\u{2A6F}","apE;":"\u{2A70}","ape;":"\u{224A}", + "apid;":"\u{224B}","apos;":"\u{27}","ApplyFunction;":"\u{2061}","approx;":"\u{2248}", + "approxeq;":"\u{224A}","Aring;":"\u{C5}","aring;":"\u{E5}","Ascr;":"\u{1D49C}", + "ascr;":"\u{1D4B6}","Assign;":"\u{2254}","ast;":"\u{2A}","asymp;":"\u{2248}", + "asympeq;":"\u{224D}","Atilde;":"\u{C3}","atilde;":"\u{E3}","Auml;":"\u{C4}", + "auml;":"\u{E4}","awconint;":"\u{2233}","awint;":"\u{2A11}","backcong;":"\u{224C}", + "backepsilon;":"\u{3F6}","backprime;":"\u{2035}","backsim;":"\u{223D}","backsimeq;":"\u{22CD}", + "Backslash;":"\u{2216}","Barv;":"\u{2AE7}","barvee;":"\u{22BD}","Barwed;":"\u{2306}", + "barwed;":"\u{2305}","barwedge;":"\u{2305}","bbrk;":"\u{23B5}","bbrktbrk;":"\u{23B6}", + "bcong;":"\u{224C}","Bcy;":"\u{411}","bcy;":"\u{431}","bdquo;":"\u{201E}", + "becaus;":"\u{2235}","Because;":"\u{2235}","because;":"\u{2235}","bemptyv;":"\u{29B0}", + "bepsi;":"\u{3F6}","bernou;":"\u{212C}","Bernoullis;":"\u{212C}","Beta;":"\u{392}", + "beta;":"\u{3B2}","beth;":"\u{2136}","between;":"\u{226C}","Bfr;":"\u{1D505}", + "bfr;":"\u{1D51F}","bigcap;":"\u{22C2}","bigcirc;":"\u{25EF}","bigcup;":"\u{22C3}", + "bigodot;":"\u{2A00}","bigoplus;":"\u{2A01}","bigotimes;":"\u{2A02}","bigsqcup;":"\u{2A06}", + "bigstar;":"\u{2605}","bigtriangledown;":"\u{25BD}","bigtriangleup;":"\u{25B3}","biguplus;":"\u{2A04}", + "bigvee;":"\u{22C1}","bigwedge;":"\u{22C0}","bkarow;":"\u{290D}","blacklozenge;":"\u{29EB}", + "blacksquare;":"\u{25AA}","blacktriangle;":"\u{25B4}","blacktriangledown;":"\u{25BE}","blacktriangleleft;":"\u{25C2}", + "blacktriangleright;":"\u{25B8}","blank;":"\u{2423}","blk12;":"\u{2592}","blk14;":"\u{2591}", + "blk34;":"\u{2593}","block;":"\u{2588}","bne;":"\u{3D}\u{20E5}","bnequiv;":"\u{2261}\u{20E5}", + "bNot;":"\u{2AED}","bnot;":"\u{2310}","Bopf;":"\u{1D539}","bopf;":"\u{1D553}", + "bot;":"\u{22A5}","bottom;":"\u{22A5}","bowtie;":"\u{22C8}","boxbox;":"\u{29C9}", + "boxDL;":"\u{2557}","boxDl;":"\u{2556}","boxdL;":"\u{2555}","boxdl;":"\u{2510}", + "boxDR;":"\u{2554}","boxDr;":"\u{2553}","boxdR;":"\u{2552}","boxdr;":"\u{250C}", + "boxH;":"\u{2550}","boxh;":"\u{2500}","boxHD;":"\u{2566}","boxHd;":"\u{2564}", + "boxhD;":"\u{2565}","boxhd;":"\u{252C}","boxHU;":"\u{2569}","boxHu;":"\u{2567}", + "boxhU;":"\u{2568}","boxhu;":"\u{2534}","boxminus;":"\u{229F}","boxplus;":"\u{229E}", + "boxtimes;":"\u{22A0}","boxUL;":"\u{255D}","boxUl;":"\u{255C}","boxuL;":"\u{255B}", + "boxul;":"\u{2518}","boxUR;":"\u{255A}","boxUr;":"\u{2559}","boxuR;":"\u{2558}", + "boxur;":"\u{2514}","boxV;":"\u{2551}","boxv;":"\u{2502}","boxVH;":"\u{256C}", + "boxVh;":"\u{256B}","boxvH;":"\u{256A}","boxvh;":"\u{253C}","boxVL;":"\u{2563}", + "boxVl;":"\u{2562}","boxvL;":"\u{2561}","boxvl;":"\u{2524}","boxVR;":"\u{2560}", + "boxVr;":"\u{255F}","boxvR;":"\u{255E}","boxvr;":"\u{251C}","bprime;":"\u{2035}", + "Breve;":"\u{2D8}","breve;":"\u{2D8}","brvbar;":"\u{A6}","Bscr;":"\u{212C}", + "bscr;":"\u{1D4B7}","bsemi;":"\u{204F}","bsim;":"\u{223D}","bsime;":"\u{22CD}", + "bsol;":"\u{5C}","bsolb;":"\u{29C5}","bsolhsub;":"\u{27C8}","bull;":"\u{2022}", + "bullet;":"\u{2022}","bump;":"\u{224E}","bumpE;":"\u{2AAE}","bumpe;":"\u{224F}", + "Bumpeq;":"\u{224E}","bumpeq;":"\u{224F}","Cacute;":"\u{106}","cacute;":"\u{107}", + "Cap;":"\u{22D2}","cap;":"\u{2229}","capand;":"\u{2A44}","capbrcup;":"\u{2A49}", + "capcap;":"\u{2A4B}","capcup;":"\u{2A47}","capdot;":"\u{2A40}","CapitalDifferentialD;":"\u{2145}", + "caps;":"\u{2229}\u{FE00}","caret;":"\u{2041}","caron;":"\u{2C7}","Cayleys;":"\u{212D}", + "ccaps;":"\u{2A4D}","Ccaron;":"\u{10C}","ccaron;":"\u{10D}","Ccedil;":"\u{C7}", + "ccedil;":"\u{E7}","Ccirc;":"\u{108}","ccirc;":"\u{109}","Cconint;":"\u{2230}", + "ccups;":"\u{2A4C}","ccupssm;":"\u{2A50}","Cdot;":"\u{10A}","cdot;":"\u{10B}", + "cedil;":"\u{B8}","Cedilla;":"\u{B8}","cemptyv;":"\u{29B2}","cent;":"\u{A2}", + "CenterDot;":"\u{B7}","centerdot;":"\u{B7}","Cfr;":"\u{212D}","cfr;":"\u{1D520}", + "CHcy;":"\u{427}","chcy;":"\u{447}","check;":"\u{2713}","checkmark;":"\u{2713}", + "Chi;":"\u{3A7}","chi;":"\u{3C7}","cir;":"\u{25CB}","circ;":"\u{2C6}", + "circeq;":"\u{2257}","circlearrowleft;":"\u{21BA}","circlearrowright;":"\u{21BB}","circledast;":"\u{229B}", + "circledcirc;":"\u{229A}","circleddash;":"\u{229D}","CircleDot;":"\u{2299}","circledR;":"\u{AE}", + "circledS;":"\u{24C8}","CircleMinus;":"\u{2296}","CirclePlus;":"\u{2295}","CircleTimes;":"\u{2297}", + "cirE;":"\u{29C3}","cire;":"\u{2257}","cirfnint;":"\u{2A10}","cirmid;":"\u{2AEF}", + "cirscir;":"\u{29C2}","ClockwiseContourIntegral;":"\u{2232}","CloseCurlyDoubleQuote;":"\u{201D}","CloseCurlyQuote;":"\u{2019}", + "clubs;":"\u{2663}","clubsuit;":"\u{2663}","Colon;":"\u{2237}","colon;":"\u{3A}", + "Colone;":"\u{2A74}","colone;":"\u{2254}","coloneq;":"\u{2254}","comma;":"\u{2C}", + "commat;":"\u{40}","comp;":"\u{2201}","compfn;":"\u{2218}","complement;":"\u{2201}", + "complexes;":"\u{2102}","cong;":"\u{2245}","congdot;":"\u{2A6D}","Congruent;":"\u{2261}", + "Conint;":"\u{222F}","conint;":"\u{222E}","ContourIntegral;":"\u{222E}","Copf;":"\u{2102}", + "copf;":"\u{1D554}","coprod;":"\u{2210}","Coproduct;":"\u{2210}","COPY;":"\u{A9}", + "copy;":"\u{A9}","copysr;":"\u{2117}","CounterClockwiseContourIntegral;":"\u{2233}","crarr;":"\u{21B5}", + "Cross;":"\u{2A2F}","cross;":"\u{2717}","Cscr;":"\u{1D49E}","cscr;":"\u{1D4B8}", + "csub;":"\u{2ACF}","csube;":"\u{2AD1}","csup;":"\u{2AD0}","csupe;":"\u{2AD2}", + "ctdot;":"\u{22EF}","cudarrl;":"\u{2938}","cudarrr;":"\u{2935}","cuepr;":"\u{22DE}", + "cuesc;":"\u{22DF}","cularr;":"\u{21B6}","cularrp;":"\u{293D}","Cup;":"\u{22D3}", + "cup;":"\u{222A}","cupbrcap;":"\u{2A48}","CupCap;":"\u{224D}","cupcap;":"\u{2A46}", + "cupcup;":"\u{2A4A}","cupdot;":"\u{228D}","cupor;":"\u{2A45}","cups;":"\u{222A}\u{FE00}", + "curarr;":"\u{21B7}","curarrm;":"\u{293C}","curlyeqprec;":"\u{22DE}","curlyeqsucc;":"\u{22DF}", + "curlyvee;":"\u{22CE}","curlywedge;":"\u{22CF}","curren;":"\u{A4}","curvearrowleft;":"\u{21B6}", + "curvearrowright;":"\u{21B7}","cuvee;":"\u{22CE}","cuwed;":"\u{22CF}","cwconint;":"\u{2232}", + "cwint;":"\u{2231}","cylcty;":"\u{232D}","Dagger;":"\u{2021}","dagger;":"\u{2020}", + "daleth;":"\u{2138}","Darr;":"\u{21A1}","dArr;":"\u{21D3}","darr;":"\u{2193}", + "dash;":"\u{2010}","Dashv;":"\u{2AE4}","dashv;":"\u{22A3}","dbkarow;":"\u{290F}", + "dblac;":"\u{2DD}","Dcaron;":"\u{10E}","dcaron;":"\u{10F}","Dcy;":"\u{414}", + "dcy;":"\u{434}","DD;":"\u{2145}","dd;":"\u{2146}","ddagger;":"\u{2021}", + "ddarr;":"\u{21CA}","DDotrahd;":"\u{2911}","ddotseq;":"\u{2A77}","deg;":"\u{B0}", + "Del;":"\u{2207}","Delta;":"\u{394}","delta;":"\u{3B4}","demptyv;":"\u{29B1}", + "dfisht;":"\u{297F}","Dfr;":"\u{1D507}","dfr;":"\u{1D521}","dHar;":"\u{2965}", + "dharl;":"\u{21C3}","dharr;":"\u{21C2}","DiacriticalAcute;":"\u{B4}","DiacriticalDot;":"\u{2D9}", + "DiacriticalDoubleAcute;":"\u{2DD}","DiacriticalGrave;":"\u{60}","DiacriticalTilde;":"\u{2DC}","diam;":"\u{22C4}", + "Diamond;":"\u{22C4}","diamond;":"\u{22C4}","diamondsuit;":"\u{2666}","diams;":"\u{2666}", + "die;":"\u{A8}","DifferentialD;":"\u{2146}","digamma;":"\u{3DD}","disin;":"\u{22F2}", + "div;":"\u{F7}","divide;":"\u{F7}","divideontimes;":"\u{22C7}","divonx;":"\u{22C7}", + "DJcy;":"\u{402}","djcy;":"\u{452}","dlcorn;":"\u{231E}","dlcrop;":"\u{230D}", + "dollar;":"\u{24}","Dopf;":"\u{1D53B}","dopf;":"\u{1D555}","Dot;":"\u{A8}", + "dot;":"\u{2D9}","DotDot;":"\u{20DC}","doteq;":"\u{2250}","doteqdot;":"\u{2251}", + "DotEqual;":"\u{2250}","dotminus;":"\u{2238}","dotplus;":"\u{2214}","dotsquare;":"\u{22A1}", + "doublebarwedge;":"\u{2306}","DoubleContourIntegral;":"\u{222F}","DoubleDot;":"\u{A8}","DoubleDownArrow;":"\u{21D3}", + "DoubleLeftArrow;":"\u{21D0}","DoubleLeftRightArrow;":"\u{21D4}","DoubleLeftTee;":"\u{2AE4}","DoubleLongLeftArrow;":"\u{27F8}", + "DoubleLongLeftRightArrow;":"\u{27FA}","DoubleLongRightArrow;":"\u{27F9}","DoubleRightArrow;":"\u{21D2}","DoubleRightTee;":"\u{22A8}", + "DoubleUpArrow;":"\u{21D1}","DoubleUpDownArrow;":"\u{21D5}","DoubleVerticalBar;":"\u{2225}","DownArrow;":"\u{2193}", + "Downarrow;":"\u{21D3}","downarrow;":"\u{2193}","DownArrowBar;":"\u{2913}","DownArrowUpArrow;":"\u{21F5}", + "DownBreve;":"\u{311}","downdownarrows;":"\u{21CA}","downharpoonleft;":"\u{21C3}","downharpoonright;":"\u{21C2}", + "DownLeftRightVector;":"\u{2950}","DownLeftTeeVector;":"\u{295E}","DownLeftVector;":"\u{21BD}","DownLeftVectorBar;":"\u{2956}", + "DownRightTeeVector;":"\u{295F}","DownRightVector;":"\u{21C1}","DownRightVectorBar;":"\u{2957}","DownTee;":"\u{22A4}", + "DownTeeArrow;":"\u{21A7}","drbkarow;":"\u{2910}","drcorn;":"\u{231F}","drcrop;":"\u{230C}", + "Dscr;":"\u{1D49F}","dscr;":"\u{1D4B9}","DScy;":"\u{405}","dscy;":"\u{455}", + "dsol;":"\u{29F6}","Dstrok;":"\u{110}","dstrok;":"\u{111}","dtdot;":"\u{22F1}", + "dtri;":"\u{25BF}","dtrif;":"\u{25BE}","duarr;":"\u{21F5}","duhar;":"\u{296F}", + "dwangle;":"\u{29A6}","DZcy;":"\u{40F}","dzcy;":"\u{45F}","dzigrarr;":"\u{27FF}", + "Eacute;":"\u{C9}","eacute;":"\u{E9}","easter;":"\u{2A6E}","Ecaron;":"\u{11A}", + "ecaron;":"\u{11B}","ecir;":"\u{2256}","Ecirc;":"\u{CA}","ecirc;":"\u{EA}", + "ecolon;":"\u{2255}","Ecy;":"\u{42D}","ecy;":"\u{44D}","eDDot;":"\u{2A77}", + "Edot;":"\u{116}","eDot;":"\u{2251}","edot;":"\u{117}","ee;":"\u{2147}", + "efDot;":"\u{2252}","Efr;":"\u{1D508}","efr;":"\u{1D522}","eg;":"\u{2A9A}", + "Egrave;":"\u{C8}","egrave;":"\u{E8}","egs;":"\u{2A96}","egsdot;":"\u{2A98}", + "el;":"\u{2A99}","Element;":"\u{2208}","elinters;":"\u{23E7}","ell;":"\u{2113}", + "els;":"\u{2A95}","elsdot;":"\u{2A97}","Emacr;":"\u{112}","emacr;":"\u{113}", + "empty;":"\u{2205}","emptyset;":"\u{2205}","EmptySmallSquare;":"\u{25FB}","emptyv;":"\u{2205}", + "EmptyVerySmallSquare;":"\u{25AB}","emsp;":"\u{2003}","emsp13;":"\u{2004}","emsp14;":"\u{2005}", + "ENG;":"\u{14A}","eng;":"\u{14B}","ensp;":"\u{2002}","Eogon;":"\u{118}", + "eogon;":"\u{119}","Eopf;":"\u{1D53C}","eopf;":"\u{1D556}","epar;":"\u{22D5}", + "eparsl;":"\u{29E3}","eplus;":"\u{2A71}","epsi;":"\u{3B5}","Epsilon;":"\u{395}", + "epsilon;":"\u{3B5}","epsiv;":"\u{3F5}","eqcirc;":"\u{2256}","eqcolon;":"\u{2255}", + "eqsim;":"\u{2242}","eqslantgtr;":"\u{2A96}","eqslantless;":"\u{2A95}","Equal;":"\u{2A75}", + "equals;":"\u{3D}","EqualTilde;":"\u{2242}","equest;":"\u{225F}","Equilibrium;":"\u{21CC}", + "equiv;":"\u{2261}","equivDD;":"\u{2A78}","eqvparsl;":"\u{29E5}","erarr;":"\u{2971}", + "erDot;":"\u{2253}","Escr;":"\u{2130}","escr;":"\u{212F}","esdot;":"\u{2250}", + "Esim;":"\u{2A73}","esim;":"\u{2242}","Eta;":"\u{397}","eta;":"\u{3B7}", + "ETH;":"\u{D0}","eth;":"\u{F0}","Euml;":"\u{CB}","euml;":"\u{EB}", + "euro;":"\u{20AC}","excl;":"\u{21}","exist;":"\u{2203}","Exists;":"\u{2203}", + "expectation;":"\u{2130}","ExponentialE;":"\u{2147}","exponentiale;":"\u{2147}","fallingdotseq;":"\u{2252}", + "Fcy;":"\u{424}","fcy;":"\u{444}","female;":"\u{2640}","ffilig;":"\u{FB03}", + "fflig;":"\u{FB00}","ffllig;":"\u{FB04}","Ffr;":"\u{1D509}","ffr;":"\u{1D523}", + "filig;":"\u{FB01}","FilledSmallSquare;":"\u{25FC}","FilledVerySmallSquare;":"\u{25AA}", + + // Skip "fjlig;" due to Swift not recognizing it as a single grapheme cluster + // "fjlig;":"\u{66}\u{6A}", + + "flat;":"\u{266D}","fllig;":"\u{FB02}","fltns;":"\u{25B1}","fnof;":"\u{192}", + "Fopf;":"\u{1D53D}","fopf;":"\u{1D557}","ForAll;":"\u{2200}","forall;":"\u{2200}", + "fork;":"\u{22D4}","forkv;":"\u{2AD9}","Fouriertrf;":"\u{2131}","fpartint;":"\u{2A0D}", + "frac12;":"\u{BD}","frac13;":"\u{2153}","frac14;":"\u{BC}","frac15;":"\u{2155}", + "frac16;":"\u{2159}","frac18;":"\u{215B}","frac23;":"\u{2154}","frac25;":"\u{2156}", + "frac34;":"\u{BE}","frac35;":"\u{2157}","frac38;":"\u{215C}","frac45;":"\u{2158}", + "frac56;":"\u{215A}","frac58;":"\u{215D}","frac78;":"\u{215E}","frasl;":"\u{2044}", + "frown;":"\u{2322}","Fscr;":"\u{2131}","fscr;":"\u{1D4BB}","gacute;":"\u{1F5}", + "Gamma;":"\u{393}","gamma;":"\u{3B3}","Gammad;":"\u{3DC}","gammad;":"\u{3DD}", + "gap;":"\u{2A86}","Gbreve;":"\u{11E}","gbreve;":"\u{11F}","Gcedil;":"\u{122}", + "Gcirc;":"\u{11C}","gcirc;":"\u{11D}","Gcy;":"\u{413}","gcy;":"\u{433}", + "Gdot;":"\u{120}","gdot;":"\u{121}","gE;":"\u{2267}","ge;":"\u{2265}", + "gEl;":"\u{2A8C}","gel;":"\u{22DB}","geq;":"\u{2265}","geqq;":"\u{2267}", + "geqslant;":"\u{2A7E}","ges;":"\u{2A7E}","gescc;":"\u{2AA9}","gesdot;":"\u{2A80}", + "gesdoto;":"\u{2A82}","gesdotol;":"\u{2A84}","gesl;":"\u{22DB}\u{FE00}","gesles;":"\u{2A94}", + "Gfr;":"\u{1D50A}","gfr;":"\u{1D524}","Gg;":"\u{22D9}","gg;":"\u{226B}", + "ggg;":"\u{22D9}","gimel;":"\u{2137}","GJcy;":"\u{403}","gjcy;":"\u{453}", + "gl;":"\u{2277}","gla;":"\u{2AA5}","glE;":"\u{2A92}","glj;":"\u{2AA4}", + "gnap;":"\u{2A8A}","gnapprox;":"\u{2A8A}","gnE;":"\u{2269}","gne;":"\u{2A88}", + "gneq;":"\u{2A88}","gneqq;":"\u{2269}","gnsim;":"\u{22E7}","Gopf;":"\u{1D53E}", + "gopf;":"\u{1D558}","grave;":"\u{60}","GreaterEqual;":"\u{2265}","GreaterEqualLess;":"\u{22DB}", + "GreaterFullEqual;":"\u{2267}","GreaterGreater;":"\u{2AA2}","GreaterLess;":"\u{2277}","GreaterSlantEqual;":"\u{2A7E}", + "GreaterTilde;":"\u{2273}","Gscr;":"\u{1D4A2}","gscr;":"\u{210A}","gsim;":"\u{2273}", + "gsime;":"\u{2A8E}","gsiml;":"\u{2A90}","GT;":"\u{3E}","Gt;":"\u{226B}", + "gt;":"\u{3E}","gtcc;":"\u{2AA7}","gtcir;":"\u{2A7A}","gtdot;":"\u{22D7}", + "gtlPar;":"\u{2995}","gtquest;":"\u{2A7C}","gtrapprox;":"\u{2A86}","gtrarr;":"\u{2978}", + "gtrdot;":"\u{22D7}","gtreqless;":"\u{22DB}","gtreqqless;":"\u{2A8C}","gtrless;":"\u{2277}", + "gtrsim;":"\u{2273}","gvertneqq;":"\u{2269}\u{FE00}","gvnE;":"\u{2269}\u{FE00}","Hacek;":"\u{2C7}", + "hairsp;":"\u{200A}","half;":"\u{BD}","hamilt;":"\u{210B}","HARDcy;":"\u{42A}", + "hardcy;":"\u{44A}","hArr;":"\u{21D4}","harr;":"\u{2194}","harrcir;":"\u{2948}", + "harrw;":"\u{21AD}","Hat;":"\u{5E}","hbar;":"\u{210F}","Hcirc;":"\u{124}", + "hcirc;":"\u{125}","hearts;":"\u{2665}","heartsuit;":"\u{2665}","hellip;":"\u{2026}", + "hercon;":"\u{22B9}","Hfr;":"\u{210C}","hfr;":"\u{1D525}","HilbertSpace;":"\u{210B}", + "hksearow;":"\u{2925}","hkswarow;":"\u{2926}","hoarr;":"\u{21FF}","homtht;":"\u{223B}", + "hookleftarrow;":"\u{21A9}","hookrightarrow;":"\u{21AA}","Hopf;":"\u{210D}","hopf;":"\u{1D559}", + "horbar;":"\u{2015}","HorizontalLine;":"\u{2500}","Hscr;":"\u{210B}","hscr;":"\u{1D4BD}", + "hslash;":"\u{210F}","Hstrok;":"\u{126}","hstrok;":"\u{127}","HumpDownHump;":"\u{224E}", + "HumpEqual;":"\u{224F}","hybull;":"\u{2043}","hyphen;":"\u{2010}","Iacute;":"\u{CD}", + "iacute;":"\u{ED}","ic;":"\u{2063}","Icirc;":"\u{CE}","icirc;":"\u{EE}", + "Icy;":"\u{418}","icy;":"\u{438}","Idot;":"\u{130}","IEcy;":"\u{415}", + "iecy;":"\u{435}","iexcl;":"\u{A1}","iff;":"\u{21D4}","Ifr;":"\u{2111}", + "ifr;":"\u{1D526}","Igrave;":"\u{CC}","igrave;":"\u{EC}","ii;":"\u{2148}", + "iiiint;":"\u{2A0C}","iiint;":"\u{222D}","iinfin;":"\u{29DC}","iiota;":"\u{2129}", + "IJlig;":"\u{132}","ijlig;":"\u{133}","Im;":"\u{2111}","Imacr;":"\u{12A}", + "imacr;":"\u{12B}","image;":"\u{2111}","ImaginaryI;":"\u{2148}","imagline;":"\u{2110}", + "imagpart;":"\u{2111}","imath;":"\u{131}","imof;":"\u{22B7}","imped;":"\u{1B5}", + "Implies;":"\u{21D2}","in;":"\u{2208}","incare;":"\u{2105}","infin;":"\u{221E}", + "infintie;":"\u{29DD}","inodot;":"\u{131}","Int;":"\u{222C}","int;":"\u{222B}", + "intcal;":"\u{22BA}","integers;":"\u{2124}","Integral;":"\u{222B}","intercal;":"\u{22BA}", + "Intersection;":"\u{22C2}","intlarhk;":"\u{2A17}","intprod;":"\u{2A3C}","InvisibleComma;":"\u{2063}", + "InvisibleTimes;":"\u{2062}","IOcy;":"\u{401}","iocy;":"\u{451}","Iogon;":"\u{12E}", + "iogon;":"\u{12F}","Iopf;":"\u{1D540}","iopf;":"\u{1D55A}","Iota;":"\u{399}", + "iota;":"\u{3B9}","iprod;":"\u{2A3C}","iquest;":"\u{BF}","Iscr;":"\u{2110}", + "iscr;":"\u{1D4BE}","isin;":"\u{2208}","isindot;":"\u{22F5}","isinE;":"\u{22F9}", + "isins;":"\u{22F4}","isinsv;":"\u{22F3}","isinv;":"\u{2208}","it;":"\u{2062}", + "Itilde;":"\u{128}","itilde;":"\u{129}","Iukcy;":"\u{406}","iukcy;":"\u{456}", + "Iuml;":"\u{CF}","iuml;":"\u{EF}","Jcirc;":"\u{134}","jcirc;":"\u{135}", + "Jcy;":"\u{419}","jcy;":"\u{439}","Jfr;":"\u{1D50D}","jfr;":"\u{1D527}", + "jmath;":"\u{237}","Jopf;":"\u{1D541}","jopf;":"\u{1D55B}","Jscr;":"\u{1D4A5}", + "jscr;":"\u{1D4BF}","Jsercy;":"\u{408}","jsercy;":"\u{458}","Jukcy;":"\u{404}", + "jukcy;":"\u{454}","Kappa;":"\u{39A}","kappa;":"\u{3BA}","kappav;":"\u{3F0}", + "Kcedil;":"\u{136}","kcedil;":"\u{137}","Kcy;":"\u{41A}","kcy;":"\u{43A}", + "Kfr;":"\u{1D50E}","kfr;":"\u{1D528}","kgreen;":"\u{138}","KHcy;":"\u{425}", + "khcy;":"\u{445}","KJcy;":"\u{40C}","kjcy;":"\u{45C}","Kopf;":"\u{1D542}", + "kopf;":"\u{1D55C}","Kscr;":"\u{1D4A6}","kscr;":"\u{1D4C0}","lAarr;":"\u{21DA}", + "Lacute;":"\u{139}","lacute;":"\u{13A}","laemptyv;":"\u{29B4}","lagran;":"\u{2112}", + "Lambda;":"\u{39B}","lambda;":"\u{3BB}","Lang;":"\u{27EA}","lang;":"\u{27E8}", + "langd;":"\u{2991}","langle;":"\u{27E8}","lap;":"\u{2A85}","Laplacetrf;":"\u{2112}", + "laquo;":"\u{AB}","Larr;":"\u{219E}","lArr;":"\u{21D0}","larr;":"\u{2190}", + "larrb;":"\u{21E4}","larrbfs;":"\u{291F}","larrfs;":"\u{291D}","larrhk;":"\u{21A9}", + "larrlp;":"\u{21AB}","larrpl;":"\u{2939}","larrsim;":"\u{2973}","larrtl;":"\u{21A2}", + "lat;":"\u{2AAB}","lAtail;":"\u{291B}","latail;":"\u{2919}","late;":"\u{2AAD}", + "lates;":"\u{2AAD}\u{FE00}","lBarr;":"\u{290E}","lbarr;":"\u{290C}","lbbrk;":"\u{2772}", + "lbrace;":"\u{7B}","lbrack;":"\u{5B}","lbrke;":"\u{298B}","lbrksld;":"\u{298F}", + "lbrkslu;":"\u{298D}","Lcaron;":"\u{13D}","lcaron;":"\u{13E}","Lcedil;":"\u{13B}", + "lcedil;":"\u{13C}","lceil;":"\u{2308}","lcub;":"\u{7B}","Lcy;":"\u{41B}", + "lcy;":"\u{43B}","ldca;":"\u{2936}","ldquo;":"\u{201C}","ldquor;":"\u{201E}", + "ldrdhar;":"\u{2967}","ldrushar;":"\u{294B}","ldsh;":"\u{21B2}","lE;":"\u{2266}", + "le;":"\u{2264}","LeftAngleBracket;":"\u{27E8}","LeftArrow;":"\u{2190}","Leftarrow;":"\u{21D0}", + "leftarrow;":"\u{2190}","LeftArrowBar;":"\u{21E4}","LeftArrowRightArrow;":"\u{21C6}","leftarrowtail;":"\u{21A2}", + "LeftCeiling;":"\u{2308}","LeftDoubleBracket;":"\u{27E6}","LeftDownTeeVector;":"\u{2961}","LeftDownVector;":"\u{21C3}", + "LeftDownVectorBar;":"\u{2959}","LeftFloor;":"\u{230A}","leftharpoondown;":"\u{21BD}","leftharpoonup;":"\u{21BC}", + "leftleftarrows;":"\u{21C7}","LeftRightArrow;":"\u{2194}","Leftrightarrow;":"\u{21D4}","leftrightarrow;":"\u{2194}", + "leftrightarrows;":"\u{21C6}","leftrightharpoons;":"\u{21CB}","leftrightsquigarrow;":"\u{21AD}","LeftRightVector;":"\u{294E}", + "LeftTee;":"\u{22A3}","LeftTeeArrow;":"\u{21A4}","LeftTeeVector;":"\u{295A}","leftthreetimes;":"\u{22CB}", + "LeftTriangle;":"\u{22B2}","LeftTriangleBar;":"\u{29CF}","LeftTriangleEqual;":"\u{22B4}","LeftUpDownVector;":"\u{2951}", + "LeftUpTeeVector;":"\u{2960}","LeftUpVector;":"\u{21BF}","LeftUpVectorBar;":"\u{2958}","LeftVector;":"\u{21BC}", + "LeftVectorBar;":"\u{2952}","lEg;":"\u{2A8B}","leg;":"\u{22DA}","leq;":"\u{2264}", + "leqq;":"\u{2266}","leqslant;":"\u{2A7D}","les;":"\u{2A7D}","lescc;":"\u{2AA8}", + "lesdot;":"\u{2A7F}","lesdoto;":"\u{2A81}","lesdotor;":"\u{2A83}","lesg;":"\u{22DA}\u{FE00}", + "lesges;":"\u{2A93}","lessapprox;":"\u{2A85}","lessdot;":"\u{22D6}","lesseqgtr;":"\u{22DA}", + "lesseqqgtr;":"\u{2A8B}","LessEqualGreater;":"\u{22DA}","LessFullEqual;":"\u{2266}","LessGreater;":"\u{2276}", + "lessgtr;":"\u{2276}","LessLess;":"\u{2AA1}","lesssim;":"\u{2272}","LessSlantEqual;":"\u{2A7D}", + "LessTilde;":"\u{2272}","lfisht;":"\u{297C}","lfloor;":"\u{230A}","Lfr;":"\u{1D50F}", + "lfr;":"\u{1D529}","lg;":"\u{2276}","lgE;":"\u{2A91}","lHar;":"\u{2962}", + "lhard;":"\u{21BD}","lharu;":"\u{21BC}","lharul;":"\u{296A}","lhblk;":"\u{2584}", + "LJcy;":"\u{409}","ljcy;":"\u{459}","Ll;":"\u{22D8}","ll;":"\u{226A}", + "llarr;":"\u{21C7}","llcorner;":"\u{231E}","Lleftarrow;":"\u{21DA}","llhard;":"\u{296B}", + "lltri;":"\u{25FA}","Lmidot;":"\u{13F}","lmidot;":"\u{140}","lmoust;":"\u{23B0}", + "lmoustache;":"\u{23B0}","lnap;":"\u{2A89}","lnapprox;":"\u{2A89}","lnE;":"\u{2268}", + "lne;":"\u{2A87}","lneq;":"\u{2A87}","lneqq;":"\u{2268}","lnsim;":"\u{22E6}", + "loang;":"\u{27EC}","loarr;":"\u{21FD}","lobrk;":"\u{27E6}","LongLeftArrow;":"\u{27F5}", + "Longleftarrow;":"\u{27F8}","longleftarrow;":"\u{27F5}","LongLeftRightArrow;":"\u{27F7}","Longleftrightarrow;":"\u{27FA}", + "longleftrightarrow;":"\u{27F7}","longmapsto;":"\u{27FC}","LongRightArrow;":"\u{27F6}","Longrightarrow;":"\u{27F9}", + "longrightarrow;":"\u{27F6}","looparrowleft;":"\u{21AB}","looparrowright;":"\u{21AC}","lopar;":"\u{2985}", + "Lopf;":"\u{1D543}","lopf;":"\u{1D55D}","loplus;":"\u{2A2D}","lotimes;":"\u{2A34}", + "lowast;":"\u{2217}","lowbar;":"\u{5F}","LowerLeftArrow;":"\u{2199}","LowerRightArrow;":"\u{2198}", + "loz;":"\u{25CA}","lozenge;":"\u{25CA}","lozf;":"\u{29EB}","lpar;":"\u{28}", + "lparlt;":"\u{2993}","lrarr;":"\u{21C6}","lrcorner;":"\u{231F}","lrhar;":"\u{21CB}", + "lrhard;":"\u{296D}","lrm;":"\u{200E}","lrtri;":"\u{22BF}","lsaquo;":"\u{2039}", + "Lscr;":"\u{2112}","lscr;":"\u{1D4C1}","Lsh;":"\u{21B0}","lsh;":"\u{21B0}", + "lsim;":"\u{2272}","lsime;":"\u{2A8D}","lsimg;":"\u{2A8F}","lsqb;":"\u{5B}", + "lsquo;":"\u{2018}","lsquor;":"\u{201A}","Lstrok;":"\u{141}","lstrok;":"\u{142}", + "LT;":"\u{3C}","Lt;":"\u{226A}","lt;":"\u{3C}","ltcc;":"\u{2AA6}", + "ltcir;":"\u{2A79}","ltdot;":"\u{22D6}","lthree;":"\u{22CB}","ltimes;":"\u{22C9}", + "ltlarr;":"\u{2976}","ltquest;":"\u{2A7B}","ltri;":"\u{25C3}","ltrie;":"\u{22B4}", + "ltrif;":"\u{25C2}","ltrPar;":"\u{2996}","lurdshar;":"\u{294A}","luruhar;":"\u{2966}", + "lvertneqq;":"\u{2268}\u{FE00}","lvnE;":"\u{2268}\u{FE00}","macr;":"\u{AF}","male;":"\u{2642}", + "malt;":"\u{2720}","maltese;":"\u{2720}","Map;":"\u{2905}","map;":"\u{21A6}", + "mapsto;":"\u{21A6}","mapstodown;":"\u{21A7}","mapstoleft;":"\u{21A4}","mapstoup;":"\u{21A5}", + "marker;":"\u{25AE}","mcomma;":"\u{2A29}","Mcy;":"\u{41C}","mcy;":"\u{43C}", + "mdash;":"\u{2014}","mDDot;":"\u{223A}","measuredangle;":"\u{2221}","MediumSpace;":"\u{205F}", + "Mellintrf;":"\u{2133}","Mfr;":"\u{1D510}","mfr;":"\u{1D52A}","mho;":"\u{2127}", + "micro;":"\u{B5}","mid;":"\u{2223}","midast;":"\u{2A}","midcir;":"\u{2AF0}", + "middot;":"\u{B7}","minus;":"\u{2212}","minusb;":"\u{229F}","minusd;":"\u{2238}", + "minusdu;":"\u{2A2A}","MinusPlus;":"\u{2213}","mlcp;":"\u{2ADB}","mldr;":"\u{2026}" +] + +private let namedCharactersDecodeMap2: [String: Character] = [ + "mnplus;":"\u{2213}","models;":"\u{22A7}","Mopf;":"\u{1D544}","mopf;":"\u{1D55E}", + "mp;":"\u{2213}","Mscr;":"\u{2133}","mscr;":"\u{1D4C2}","mstpos;":"\u{223E}", + "Mu;":"\u{39C}","mu;":"\u{3BC}","multimap;":"\u{22B8}","mumap;":"\u{22B8}", + "nabla;":"\u{2207}","Nacute;":"\u{143}","nacute;":"\u{144}","nang;":"\u{2220}\u{20D2}", + "nap;":"\u{2249}","napE;":"\u{2A70}\u{338}","napid;":"\u{224B}\u{338}","napos;":"\u{149}", + "napprox;":"\u{2249}","natur;":"\u{266E}","natural;":"\u{266E}","naturals;":"\u{2115}", + "nbsp;":"\u{A0}","nbump;":"\u{224E}\u{338}","nbumpe;":"\u{224F}\u{338}","ncap;":"\u{2A43}", + "Ncaron;":"\u{147}","ncaron;":"\u{148}","Ncedil;":"\u{145}","ncedil;":"\u{146}", + "ncong;":"\u{2247}","ncongdot;":"\u{2A6D}\u{338}","ncup;":"\u{2A42}","Ncy;":"\u{41D}", + "ncy;":"\u{43D}","ndash;":"\u{2013}","ne;":"\u{2260}","nearhk;":"\u{2924}", + "neArr;":"\u{21D7}","nearr;":"\u{2197}","nearrow;":"\u{2197}","nedot;":"\u{2250}\u{338}", + "NegativeMediumSpace;":"\u{200B}","NegativeThickSpace;":"\u{200B}","NegativeThinSpace;":"\u{200B}","NegativeVeryThinSpace;":"\u{200B}", + "nequiv;":"\u{2262}","nesear;":"\u{2928}","nesim;":"\u{2242}\u{338}","NestedGreaterGreater;":"\u{226B}", + "NestedLessLess;":"\u{226A}","NewLine;":"\u{A}","nexist;":"\u{2204}","nexists;":"\u{2204}", + "Nfr;":"\u{1D511}","nfr;":"\u{1D52B}","ngE;":"\u{2267}\u{338}","nge;":"\u{2271}", + "ngeq;":"\u{2271}","ngeqq;":"\u{2267}\u{338}","ngeqslant;":"\u{2A7E}\u{338}","nges;":"\u{2A7E}\u{338}", + "nGg;":"\u{22D9}\u{338}","ngsim;":"\u{2275}","nGt;":"\u{226B}\u{20D2}","ngt;":"\u{226F}", + "ngtr;":"\u{226F}","nGtv;":"\u{226B}\u{338}","nhArr;":"\u{21CE}","nharr;":"\u{21AE}", + "nhpar;":"\u{2AF2}","ni;":"\u{220B}","nis;":"\u{22FC}","nisd;":"\u{22FA}", + "niv;":"\u{220B}","NJcy;":"\u{40A}","njcy;":"\u{45A}","nlArr;":"\u{21CD}", + "nlarr;":"\u{219A}","nldr;":"\u{2025}","nlE;":"\u{2266}\u{338}","nle;":"\u{2270}", + "nLeftarrow;":"\u{21CD}","nleftarrow;":"\u{219A}","nLeftrightarrow;":"\u{21CE}","nleftrightarrow;":"\u{21AE}", + "nleq;":"\u{2270}","nleqq;":"\u{2266}\u{338}","nleqslant;":"\u{2A7D}\u{338}","nles;":"\u{2A7D}\u{338}", + "nless;":"\u{226E}","nLl;":"\u{22D8}\u{338}","nlsim;":"\u{2274}","nLt;":"\u{226A}\u{338}", + "nlt;":"\u{226E}","nltri;":"\u{22EA}","nltrie;":"\u{22EC}","nLtv;":"\u{226A}\u{338}", + "nmid;":"\u{2224}","NoBreak;":"\u{2060}","NonBreakingSpace;":"\u{A0}","Nopf;":"\u{2115}", + "nopf;":"\u{1D55F}","Not;":"\u{2AEC}","not;":"\u{AC}","NotCongruent;":"\u{2262}", + "NotCupCap;":"\u{226D}","NotDoubleVerticalBar;":"\u{2226}","NotElement;":"\u{2209}","NotEqual;":"\u{2260}", + "NotEqualTilde;":"\u{2242}\u{338}","NotExists;":"\u{2204}","NotGreater;":"\u{226F}","NotGreaterEqual;":"\u{2271}", + "NotGreaterFullEqual;":"\u{2267}\u{338}","NotGreaterGreater;":"\u{226B}\u{338}","NotGreaterLess;":"\u{2279}","NotGreaterSlantEqual;":"\u{2A7E}\u{338}", + "NotGreaterTilde;":"\u{2275}","NotHumpDownHump;":"\u{224E}\u{338}","NotHumpEqual;":"\u{224F}\u{338}","notin;":"\u{2209}", + "notindot;":"\u{22F5}\u{338}","notinE;":"\u{22F9}\u{338}","notinva;":"\u{2209}","notinvb;":"\u{22F7}", + "notinvc;":"\u{22F6}","NotLeftTriangle;":"\u{22EA}","NotLeftTriangleBar;":"\u{29CF}\u{338}","NotLeftTriangleEqual;":"\u{22EC}", + "NotLess;":"\u{226E}","NotLessEqual;":"\u{2270}","NotLessGreater;":"\u{2278}","NotLessLess;":"\u{226A}\u{338}", + "NotLessSlantEqual;":"\u{2A7D}\u{338}","NotLessTilde;":"\u{2274}","NotNestedGreaterGreater;":"\u{2AA2}\u{338}","NotNestedLessLess;":"\u{2AA1}\u{338}", + "notni;":"\u{220C}","notniva;":"\u{220C}","notnivb;":"\u{22FE}","notnivc;":"\u{22FD}", + "NotPrecedes;":"\u{2280}","NotPrecedesEqual;":"\u{2AAF}\u{338}","NotPrecedesSlantEqual;":"\u{22E0}","NotReverseElement;":"\u{220C}", + "NotRightTriangle;":"\u{22EB}","NotRightTriangleBar;":"\u{29D0}\u{338}","NotRightTriangleEqual;":"\u{22ED}","NotSquareSubset;":"\u{228F}\u{338}", + "NotSquareSubsetEqual;":"\u{22E2}","NotSquareSuperset;":"\u{2290}\u{338}","NotSquareSupersetEqual;":"\u{22E3}","NotSubset;":"\u{2282}\u{20D2}", + "NotSubsetEqual;":"\u{2288}","NotSucceeds;":"\u{2281}","NotSucceedsEqual;":"\u{2AB0}\u{338}","NotSucceedsSlantEqual;":"\u{22E1}", + "NotSucceedsTilde;":"\u{227F}\u{338}","NotSuperset;":"\u{2283}\u{20D2}","NotSupersetEqual;":"\u{2289}","NotTilde;":"\u{2241}", + "NotTildeEqual;":"\u{2244}","NotTildeFullEqual;":"\u{2247}","NotTildeTilde;":"\u{2249}","NotVerticalBar;":"\u{2224}", + "npar;":"\u{2226}","nparallel;":"\u{2226}","nparsl;":"\u{2AFD}\u{20E5}","npart;":"\u{2202}\u{338}", + "npolint;":"\u{2A14}","npr;":"\u{2280}","nprcue;":"\u{22E0}","npre;":"\u{2AAF}\u{338}", + "nprec;":"\u{2280}","npreceq;":"\u{2AAF}\u{338}","nrArr;":"\u{21CF}","nrarr;":"\u{219B}", + "nrarrc;":"\u{2933}\u{338}","nrarrw;":"\u{219D}\u{338}","nRightarrow;":"\u{21CF}","nrightarrow;":"\u{219B}", + "nrtri;":"\u{22EB}","nrtrie;":"\u{22ED}","nsc;":"\u{2281}","nsccue;":"\u{22E1}", + "nsce;":"\u{2AB0}\u{338}","Nscr;":"\u{1D4A9}","nscr;":"\u{1D4C3}","nshortmid;":"\u{2224}", + "nshortparallel;":"\u{2226}","nsim;":"\u{2241}","nsime;":"\u{2244}","nsimeq;":"\u{2244}", + "nsmid;":"\u{2224}","nspar;":"\u{2226}","nsqsube;":"\u{22E2}","nsqsupe;":"\u{22E3}", + "nsub;":"\u{2284}","nsubE;":"\u{2AC5}\u{338}","nsube;":"\u{2288}","nsubset;":"\u{2282}\u{20D2}", + "nsubseteq;":"\u{2288}","nsubseteqq;":"\u{2AC5}\u{338}","nsucc;":"\u{2281}","nsucceq;":"\u{2AB0}\u{338}", + "nsup;":"\u{2285}","nsupE;":"\u{2AC6}\u{338}","nsupe;":"\u{2289}","nsupset;":"\u{2283}\u{20D2}", + "nsupseteq;":"\u{2289}","nsupseteqq;":"\u{2AC6}\u{338}","ntgl;":"\u{2279}","Ntilde;":"\u{D1}", + "ntilde;":"\u{F1}","ntlg;":"\u{2278}","ntriangleleft;":"\u{22EA}","ntrianglelefteq;":"\u{22EC}", + "ntriangleright;":"\u{22EB}","ntrianglerighteq;":"\u{22ED}","Nu;":"\u{39D}","nu;":"\u{3BD}", + "num;":"\u{23}","numero;":"\u{2116}","numsp;":"\u{2007}","nvap;":"\u{224D}\u{20D2}", + "nVDash;":"\u{22AF}","nVdash;":"\u{22AE}","nvDash;":"\u{22AD}","nvdash;":"\u{22AC}", + "nvge;":"\u{2265}\u{20D2}","nvgt;":"\u{3E}\u{20D2}","nvHarr;":"\u{2904}","nvinfin;":"\u{29DE}", + "nvlArr;":"\u{2902}","nvle;":"\u{2264}\u{20D2}","nvlt;":"\u{3C}\u{20D2}","nvltrie;":"\u{22B4}\u{20D2}", + "nvrArr;":"\u{2903}","nvrtrie;":"\u{22B5}\u{20D2}","nvsim;":"\u{223C}\u{20D2}","nwarhk;":"\u{2923}", + "nwArr;":"\u{21D6}","nwarr;":"\u{2196}","nwarrow;":"\u{2196}","nwnear;":"\u{2927}", + "Oacute;":"\u{D3}","oacute;":"\u{F3}","oast;":"\u{229B}","ocir;":"\u{229A}", + "Ocirc;":"\u{D4}","ocirc;":"\u{F4}","Ocy;":"\u{41E}","ocy;":"\u{43E}", + "odash;":"\u{229D}","Odblac;":"\u{150}","odblac;":"\u{151}","odiv;":"\u{2A38}", + "odot;":"\u{2299}","odsold;":"\u{29BC}","OElig;":"\u{152}","oelig;":"\u{153}", + "ofcir;":"\u{29BF}","Ofr;":"\u{1D512}","ofr;":"\u{1D52C}","ogon;":"\u{2DB}", + "Ograve;":"\u{D2}","ograve;":"\u{F2}","ogt;":"\u{29C1}","ohbar;":"\u{29B5}", + "ohm;":"\u{3A9}","oint;":"\u{222E}","olarr;":"\u{21BA}","olcir;":"\u{29BE}", + "olcross;":"\u{29BB}","oline;":"\u{203E}","olt;":"\u{29C0}","Omacr;":"\u{14C}", + "omacr;":"\u{14D}","Omega;":"\u{3A9}","omega;":"\u{3C9}","Omicron;":"\u{39F}", + "omicron;":"\u{3BF}","omid;":"\u{29B6}","ominus;":"\u{2296}","Oopf;":"\u{1D546}", + "oopf;":"\u{1D560}","opar;":"\u{29B7}","OpenCurlyDoubleQuote;":"\u{201C}","OpenCurlyQuote;":"\u{2018}", + "operp;":"\u{29B9}","oplus;":"\u{2295}","Or;":"\u{2A54}","or;":"\u{2228}", + "orarr;":"\u{21BB}","ord;":"\u{2A5D}","order;":"\u{2134}","orderof;":"\u{2134}", + "ordf;":"\u{AA}","ordm;":"\u{BA}","origof;":"\u{22B6}","oror;":"\u{2A56}", + "orslope;":"\u{2A57}","orv;":"\u{2A5B}","oS;":"\u{24C8}","Oscr;":"\u{1D4AA}", + "oscr;":"\u{2134}","Oslash;":"\u{D8}","oslash;":"\u{F8}","osol;":"\u{2298}", + "Otilde;":"\u{D5}","otilde;":"\u{F5}","Otimes;":"\u{2A37}","otimes;":"\u{2297}", + "otimesas;":"\u{2A36}","Ouml;":"\u{D6}","ouml;":"\u{F6}","ovbar;":"\u{233D}", + "OverBar;":"\u{203E}","OverBrace;":"\u{23DE}","OverBracket;":"\u{23B4}","OverParenthesis;":"\u{23DC}", + "par;":"\u{2225}","para;":"\u{B6}","parallel;":"\u{2225}","parsim;":"\u{2AF3}", + "parsl;":"\u{2AFD}","part;":"\u{2202}","PartialD;":"\u{2202}","Pcy;":"\u{41F}", + "pcy;":"\u{43F}","percnt;":"\u{25}","period;":"\u{2E}","permil;":"\u{2030}", + "perp;":"\u{22A5}","pertenk;":"\u{2031}","Pfr;":"\u{1D513}","pfr;":"\u{1D52D}", + "Phi;":"\u{3A6}","phi;":"\u{3C6}","phiv;":"\u{3D5}","phmmat;":"\u{2133}", + "phone;":"\u{260E}","Pi;":"\u{3A0}","pi;":"\u{3C0}","pitchfork;":"\u{22D4}", + "piv;":"\u{3D6}","planck;":"\u{210F}","planckh;":"\u{210E}","plankv;":"\u{210F}", + "plus;":"\u{2B}","plusacir;":"\u{2A23}","plusb;":"\u{229E}","pluscir;":"\u{2A22}", + "plusdo;":"\u{2214}","plusdu;":"\u{2A25}","pluse;":"\u{2A72}","PlusMinus;":"\u{B1}", + "plusmn;":"\u{B1}","plussim;":"\u{2A26}","plustwo;":"\u{2A27}","pm;":"\u{B1}", + "Poincareplane;":"\u{210C}","pointint;":"\u{2A15}","Popf;":"\u{2119}","popf;":"\u{1D561}", + "pound;":"\u{A3}","Pr;":"\u{2ABB}","pr;":"\u{227A}","prap;":"\u{2AB7}", + "prcue;":"\u{227C}","prE;":"\u{2AB3}","pre;":"\u{2AAF}","prec;":"\u{227A}", + "precapprox;":"\u{2AB7}","preccurlyeq;":"\u{227C}","Precedes;":"\u{227A}","PrecedesEqual;":"\u{2AAF}", + "PrecedesSlantEqual;":"\u{227C}","PrecedesTilde;":"\u{227E}","preceq;":"\u{2AAF}","precnapprox;":"\u{2AB9}", + "precneqq;":"\u{2AB5}","precnsim;":"\u{22E8}","precsim;":"\u{227E}","Prime;":"\u{2033}", + "prime;":"\u{2032}","primes;":"\u{2119}","prnap;":"\u{2AB9}","prnE;":"\u{2AB5}", + "prnsim;":"\u{22E8}","prod;":"\u{220F}","Product;":"\u{220F}","profalar;":"\u{232E}", + "profline;":"\u{2312}","profsurf;":"\u{2313}","prop;":"\u{221D}","Proportion;":"\u{2237}", + "Proportional;":"\u{221D}","propto;":"\u{221D}","prsim;":"\u{227E}","prurel;":"\u{22B0}", + "Pscr;":"\u{1D4AB}","pscr;":"\u{1D4C5}","Psi;":"\u{3A8}","psi;":"\u{3C8}", + "puncsp;":"\u{2008}","Qfr;":"\u{1D514}","qfr;":"\u{1D52E}","qint;":"\u{2A0C}", + "Qopf;":"\u{211A}","qopf;":"\u{1D562}","qprime;":"\u{2057}","Qscr;":"\u{1D4AC}", + "qscr;":"\u{1D4C6}","quaternions;":"\u{210D}","quatint;":"\u{2A16}","quest;":"\u{3F}", + "questeq;":"\u{225F}","QUOT;":"\u{22}","quot;":"\u{22}","rAarr;":"\u{21DB}", + "race;":"\u{223D}\u{331}","Racute;":"\u{154}","racute;":"\u{155}","radic;":"\u{221A}", + "raemptyv;":"\u{29B3}","Rang;":"\u{27EB}","rang;":"\u{27E9}","rangd;":"\u{2992}", + "range;":"\u{29A5}","rangle;":"\u{27E9}","raquo;":"\u{BB}","Rarr;":"\u{21A0}", + "rArr;":"\u{21D2}","rarr;":"\u{2192}","rarrap;":"\u{2975}","rarrb;":"\u{21E5}", + "rarrbfs;":"\u{2920}","rarrc;":"\u{2933}","rarrfs;":"\u{291E}","rarrhk;":"\u{21AA}", + "rarrlp;":"\u{21AC}","rarrpl;":"\u{2945}","rarrsim;":"\u{2974}","Rarrtl;":"\u{2916}", + "rarrtl;":"\u{21A3}","rarrw;":"\u{219D}","rAtail;":"\u{291C}","ratail;":"\u{291A}", + "ratio;":"\u{2236}","rationals;":"\u{211A}","RBarr;":"\u{2910}","rBarr;":"\u{290F}", + "rbarr;":"\u{290D}","rbbrk;":"\u{2773}","rbrace;":"\u{7D}","rbrack;":"\u{5D}", + "rbrke;":"\u{298C}","rbrksld;":"\u{298E}","rbrkslu;":"\u{2990}","Rcaron;":"\u{158}", + "rcaron;":"\u{159}","Rcedil;":"\u{156}","rcedil;":"\u{157}","rceil;":"\u{2309}", + "rcub;":"\u{7D}","Rcy;":"\u{420}","rcy;":"\u{440}","rdca;":"\u{2937}", + "rdldhar;":"\u{2969}","rdquo;":"\u{201D}","rdquor;":"\u{201D}","rdsh;":"\u{21B3}", + "Re;":"\u{211C}","real;":"\u{211C}","realine;":"\u{211B}","realpart;":"\u{211C}", + "reals;":"\u{211D}","rect;":"\u{25AD}","REG;":"\u{AE}","reg;":"\u{AE}", + "ReverseElement;":"\u{220B}","ReverseEquilibrium;":"\u{21CB}","ReverseUpEquilibrium;":"\u{296F}","rfisht;":"\u{297D}", + "rfloor;":"\u{230B}","Rfr;":"\u{211C}","rfr;":"\u{1D52F}","rHar;":"\u{2964}", + "rhard;":"\u{21C1}","rharu;":"\u{21C0}","rharul;":"\u{296C}","Rho;":"\u{3A1}", + "rho;":"\u{3C1}","rhov;":"\u{3F1}","RightAngleBracket;":"\u{27E9}","RightArrow;":"\u{2192}", + "Rightarrow;":"\u{21D2}","rightarrow;":"\u{2192}","RightArrowBar;":"\u{21E5}","RightArrowLeftArrow;":"\u{21C4}", + "rightarrowtail;":"\u{21A3}","RightCeiling;":"\u{2309}","RightDoubleBracket;":"\u{27E7}","RightDownTeeVector;":"\u{295D}", + "RightDownVector;":"\u{21C2}","RightDownVectorBar;":"\u{2955}","RightFloor;":"\u{230B}","rightharpoondown;":"\u{21C1}", + "rightharpoonup;":"\u{21C0}","rightleftarrows;":"\u{21C4}","rightleftharpoons;":"\u{21CC}","rightrightarrows;":"\u{21C9}", + "rightsquigarrow;":"\u{219D}","RightTee;":"\u{22A2}","RightTeeArrow;":"\u{21A6}","RightTeeVector;":"\u{295B}", + "rightthreetimes;":"\u{22CC}","RightTriangle;":"\u{22B3}","RightTriangleBar;":"\u{29D0}","RightTriangleEqual;":"\u{22B5}", + "RightUpDownVector;":"\u{294F}","RightUpTeeVector;":"\u{295C}","RightUpVector;":"\u{21BE}","RightUpVectorBar;":"\u{2954}", + "RightVector;":"\u{21C0}","RightVectorBar;":"\u{2953}","ring;":"\u{2DA}","risingdotseq;":"\u{2253}", + "rlarr;":"\u{21C4}","rlhar;":"\u{21CC}","rlm;":"\u{200F}","rmoust;":"\u{23B1}", + "rmoustache;":"\u{23B1}","rnmid;":"\u{2AEE}","roang;":"\u{27ED}","roarr;":"\u{21FE}", + "robrk;":"\u{27E7}","ropar;":"\u{2986}","Ropf;":"\u{211D}","ropf;":"\u{1D563}", + "roplus;":"\u{2A2E}","rotimes;":"\u{2A35}","RoundImplies;":"\u{2970}","rpar;":"\u{29}", + "rpargt;":"\u{2994}","rppolint;":"\u{2A12}","rrarr;":"\u{21C9}","Rrightarrow;":"\u{21DB}", + "rsaquo;":"\u{203A}","Rscr;":"\u{211B}","rscr;":"\u{1D4C7}","Rsh;":"\u{21B1}", + "rsh;":"\u{21B1}","rsqb;":"\u{5D}","rsquo;":"\u{2019}","rsquor;":"\u{2019}", + "rthree;":"\u{22CC}","rtimes;":"\u{22CA}","rtri;":"\u{25B9}","rtrie;":"\u{22B5}", + "rtrif;":"\u{25B8}","rtriltri;":"\u{29CE}","RuleDelayed;":"\u{29F4}","ruluhar;":"\u{2968}", + "rx;":"\u{211E}","Sacute;":"\u{15A}","sacute;":"\u{15B}","sbquo;":"\u{201A}", + "Sc;":"\u{2ABC}","sc;":"\u{227B}","scap;":"\u{2AB8}","Scaron;":"\u{160}", + "scaron;":"\u{161}","sccue;":"\u{227D}","scE;":"\u{2AB4}","sce;":"\u{2AB0}", + "Scedil;":"\u{15E}","scedil;":"\u{15F}","Scirc;":"\u{15C}","scirc;":"\u{15D}", + "scnap;":"\u{2ABA}","scnE;":"\u{2AB6}","scnsim;":"\u{22E9}","scpolint;":"\u{2A13}", + "scsim;":"\u{227F}","Scy;":"\u{421}","scy;":"\u{441}","sdot;":"\u{22C5}", + "sdotb;":"\u{22A1}","sdote;":"\u{2A66}","searhk;":"\u{2925}","seArr;":"\u{21D8}", + "searr;":"\u{2198}","searrow;":"\u{2198}","sect;":"\u{A7}","semi;":"\u{3B}", + "seswar;":"\u{2929}","setminus;":"\u{2216}","setmn;":"\u{2216}","sext;":"\u{2736}", + "Sfr;":"\u{1D516}","sfr;":"\u{1D530}","sfrown;":"\u{2322}","sharp;":"\u{266F}", + "SHCHcy;":"\u{429}","shchcy;":"\u{449}","SHcy;":"\u{428}","shcy;":"\u{448}", + "ShortDownArrow;":"\u{2193}","ShortLeftArrow;":"\u{2190}","shortmid;":"\u{2223}","shortparallel;":"\u{2225}", + "ShortRightArrow;":"\u{2192}","ShortUpArrow;":"\u{2191}","shy;":"\u{AD}","Sigma;":"\u{3A3}", + "sigma;":"\u{3C3}","sigmaf;":"\u{3C2}","sigmav;":"\u{3C2}","sim;":"\u{223C}", + "simdot;":"\u{2A6A}","sime;":"\u{2243}","simeq;":"\u{2243}","simg;":"\u{2A9E}", + "simgE;":"\u{2AA0}","siml;":"\u{2A9D}","simlE;":"\u{2A9F}","simne;":"\u{2246}", + "simplus;":"\u{2A24}","simrarr;":"\u{2972}","slarr;":"\u{2190}","SmallCircle;":"\u{2218}", + "smallsetminus;":"\u{2216}","smashp;":"\u{2A33}","smeparsl;":"\u{29E4}","smid;":"\u{2223}", + "smile;":"\u{2323}","smt;":"\u{2AAA}","smte;":"\u{2AAC}","smtes;":"\u{2AAC}\u{FE00}", + "SOFTcy;":"\u{42C}","softcy;":"\u{44C}","sol;":"\u{2F}","solb;":"\u{29C4}", + "solbar;":"\u{233F}","Sopf;":"\u{1D54A}","sopf;":"\u{1D564}","spades;":"\u{2660}", + "spadesuit;":"\u{2660}","spar;":"\u{2225}","sqcap;":"\u{2293}","sqcaps;":"\u{2293}\u{FE00}", + "sqcup;":"\u{2294}","sqcups;":"\u{2294}\u{FE00}","Sqrt;":"\u{221A}","sqsub;":"\u{228F}", + "sqsube;":"\u{2291}","sqsubset;":"\u{228F}","sqsubseteq;":"\u{2291}","sqsup;":"\u{2290}", + "sqsupe;":"\u{2292}","sqsupset;":"\u{2290}","sqsupseteq;":"\u{2292}","squ;":"\u{25A1}", + "Square;":"\u{25A1}","square;":"\u{25A1}","SquareIntersection;":"\u{2293}","SquareSubset;":"\u{228F}", + "SquareSubsetEqual;":"\u{2291}","SquareSuperset;":"\u{2290}","SquareSupersetEqual;":"\u{2292}","SquareUnion;":"\u{2294}", + "squarf;":"\u{25AA}","squf;":"\u{25AA}","srarr;":"\u{2192}","Sscr;":"\u{1D4AE}", + "sscr;":"\u{1D4C8}","ssetmn;":"\u{2216}","ssmile;":"\u{2323}","sstarf;":"\u{22C6}", + "Star;":"\u{22C6}","star;":"\u{2606}","starf;":"\u{2605}","straightepsilon;":"\u{3F5}", + "straightphi;":"\u{3D5}","strns;":"\u{AF}","Sub;":"\u{22D0}","sub;":"\u{2282}", + "subdot;":"\u{2ABD}","subE;":"\u{2AC5}","sube;":"\u{2286}","subedot;":"\u{2AC3}", + "submult;":"\u{2AC1}","subnE;":"\u{2ACB}","subne;":"\u{228A}","subplus;":"\u{2ABF}", + "subrarr;":"\u{2979}","Subset;":"\u{22D0}","subset;":"\u{2282}","subseteq;":"\u{2286}", + "subseteqq;":"\u{2AC5}","SubsetEqual;":"\u{2286}","subsetneq;":"\u{228A}","subsetneqq;":"\u{2ACB}", + "subsim;":"\u{2AC7}","subsub;":"\u{2AD5}","subsup;":"\u{2AD3}","succ;":"\u{227B}", + "succapprox;":"\u{2AB8}","succcurlyeq;":"\u{227D}","Succeeds;":"\u{227B}","SucceedsEqual;":"\u{2AB0}", + "SucceedsSlantEqual;":"\u{227D}","SucceedsTilde;":"\u{227F}","succeq;":"\u{2AB0}","succnapprox;":"\u{2ABA}", + "succneqq;":"\u{2AB6}","succnsim;":"\u{22E9}","succsim;":"\u{227F}","SuchThat;":"\u{220B}", + "Sum;":"\u{2211}","sum;":"\u{2211}","sung;":"\u{266A}","Sup;":"\u{22D1}", + "sup;":"\u{2283}","sup1;":"\u{B9}","sup2;":"\u{B2}","sup3;":"\u{B3}", + "supdot;":"\u{2ABE}","supdsub;":"\u{2AD8}","supE;":"\u{2AC6}","supe;":"\u{2287}", + "supedot;":"\u{2AC4}","Superset;":"\u{2283}","SupersetEqual;":"\u{2287}","suphsol;":"\u{27C9}", + "suphsub;":"\u{2AD7}","suplarr;":"\u{297B}","supmult;":"\u{2AC2}","supnE;":"\u{2ACC}", + "supne;":"\u{228B}","supplus;":"\u{2AC0}","Supset;":"\u{22D1}","supset;":"\u{2283}", + "supseteq;":"\u{2287}","supseteqq;":"\u{2AC6}","supsetneq;":"\u{228B}","supsetneqq;":"\u{2ACC}", + "supsim;":"\u{2AC8}","supsub;":"\u{2AD4}","supsup;":"\u{2AD6}","swarhk;":"\u{2926}", + "swArr;":"\u{21D9}","swarr;":"\u{2199}","swarrow;":"\u{2199}","swnwar;":"\u{292A}", + "szlig;":"\u{DF}","Tab;":"\u{9}","target;":"\u{2316}","Tau;":"\u{3A4}", + "tau;":"\u{3C4}","tbrk;":"\u{23B4}","Tcaron;":"\u{164}","tcaron;":"\u{165}", + "Tcedil;":"\u{162}","tcedil;":"\u{163}","Tcy;":"\u{422}","tcy;":"\u{442}", + "tdot;":"\u{20DB}","telrec;":"\u{2315}","Tfr;":"\u{1D517}","tfr;":"\u{1D531}", + "there4;":"\u{2234}","Therefore;":"\u{2234}","therefore;":"\u{2234}","Theta;":"\u{398}", + "theta;":"\u{3B8}","thetasym;":"\u{3D1}","thetav;":"\u{3D1}","thickapprox;":"\u{2248}", + "thicksim;":"\u{223C}", + + // Skip "ThickSpace;" due to Swift not recognizing it as a single grapheme cluster + // "ThickSpace;":"\u{205F}\u{200A}", + + "thinsp;":"\u{2009}","ThinSpace;":"\u{2009}","thkap;":"\u{2248}","thksim;":"\u{223C}", + "THORN;":"\u{DE}","thorn;":"\u{FE}","Tilde;":"\u{223C}","tilde;":"\u{2DC}", + "TildeEqual;":"\u{2243}","TildeFullEqual;":"\u{2245}","TildeTilde;":"\u{2248}","times;":"\u{D7}", + "timesb;":"\u{22A0}","timesbar;":"\u{2A31}","timesd;":"\u{2A30}","tint;":"\u{222D}", + "toea;":"\u{2928}","top;":"\u{22A4}","topbot;":"\u{2336}","topcir;":"\u{2AF1}", + "Topf;":"\u{1D54B}","topf;":"\u{1D565}","topfork;":"\u{2ADA}","tosa;":"\u{2929}", + "tprime;":"\u{2034}","TRADE;":"\u{2122}","trade;":"\u{2122}","triangle;":"\u{25B5}", + "triangledown;":"\u{25BF}","triangleleft;":"\u{25C3}","trianglelefteq;":"\u{22B4}","triangleq;":"\u{225C}", + "triangleright;":"\u{25B9}","trianglerighteq;":"\u{22B5}","tridot;":"\u{25EC}","trie;":"\u{225C}", + "triminus;":"\u{2A3A}","TripleDot;":"\u{20DB}","triplus;":"\u{2A39}","trisb;":"\u{29CD}", + "tritime;":"\u{2A3B}","trpezium;":"\u{23E2}","Tscr;":"\u{1D4AF}","tscr;":"\u{1D4C9}", + "TScy;":"\u{426}","tscy;":"\u{446}","TSHcy;":"\u{40B}","tshcy;":"\u{45B}", + "Tstrok;":"\u{166}","tstrok;":"\u{167}","twixt;":"\u{226C}","twoheadleftarrow;":"\u{219E}", + "twoheadrightarrow;":"\u{21A0}","Uacute;":"\u{DA}","uacute;":"\u{FA}","Uarr;":"\u{219F}", + "uArr;":"\u{21D1}","uarr;":"\u{2191}","Uarrocir;":"\u{2949}","Ubrcy;":"\u{40E}", + "ubrcy;":"\u{45E}","Ubreve;":"\u{16C}","ubreve;":"\u{16D}","Ucirc;":"\u{DB}", + "ucirc;":"\u{FB}","Ucy;":"\u{423}","ucy;":"\u{443}","udarr;":"\u{21C5}", + "Udblac;":"\u{170}","udblac;":"\u{171}","udhar;":"\u{296E}","ufisht;":"\u{297E}", + "Ufr;":"\u{1D518}","ufr;":"\u{1D532}","Ugrave;":"\u{D9}","ugrave;":"\u{F9}", + "uHar;":"\u{2963}","uharl;":"\u{21BF}","uharr;":"\u{21BE}","uhblk;":"\u{2580}", + "ulcorn;":"\u{231C}","ulcorner;":"\u{231C}","ulcrop;":"\u{230F}","ultri;":"\u{25F8}", + "Umacr;":"\u{16A}","umacr;":"\u{16B}","uml;":"\u{A8}","UnderBar;":"\u{5F}", + "UnderBrace;":"\u{23DF}","UnderBracket;":"\u{23B5}","UnderParenthesis;":"\u{23DD}","Union;":"\u{22C3}", + "UnionPlus;":"\u{228E}","Uogon;":"\u{172}","uogon;":"\u{173}","Uopf;":"\u{1D54C}", + "uopf;":"\u{1D566}","UpArrow;":"\u{2191}","Uparrow;":"\u{21D1}","uparrow;":"\u{2191}", + "UpArrowBar;":"\u{2912}","UpArrowDownArrow;":"\u{21C5}","UpDownArrow;":"\u{2195}","Updownarrow;":"\u{21D5}", + "updownarrow;":"\u{2195}","UpEquilibrium;":"\u{296E}","upharpoonleft;":"\u{21BF}","upharpoonright;":"\u{21BE}", + "uplus;":"\u{228E}","UpperLeftArrow;":"\u{2196}","UpperRightArrow;":"\u{2197}","Upsi;":"\u{3D2}", + "upsi;":"\u{3C5}","upsih;":"\u{3D2}","Upsilon;":"\u{3A5}","upsilon;":"\u{3C5}", + "UpTee;":"\u{22A5}","UpTeeArrow;":"\u{21A5}","upuparrows;":"\u{21C8}","urcorn;":"\u{231D}", + "urcorner;":"\u{231D}","urcrop;":"\u{230E}","Uring;":"\u{16E}","uring;":"\u{16F}", + "urtri;":"\u{25F9}","Uscr;":"\u{1D4B0}","uscr;":"\u{1D4CA}","utdot;":"\u{22F0}", + "Utilde;":"\u{168}","utilde;":"\u{169}","utri;":"\u{25B5}","utrif;":"\u{25B4}", + "uuarr;":"\u{21C8}","Uuml;":"\u{DC}","uuml;":"\u{FC}","uwangle;":"\u{29A7}", + "vangrt;":"\u{299C}","varepsilon;":"\u{3F5}","varkappa;":"\u{3F0}","varnothing;":"\u{2205}", + "varphi;":"\u{3D5}","varpi;":"\u{3D6}","varpropto;":"\u{221D}","vArr;":"\u{21D5}", + "varr;":"\u{2195}","varrho;":"\u{3F1}","varsigma;":"\u{3C2}","varsubsetneq;":"\u{228A}\u{FE00}", + "varsubsetneqq;":"\u{2ACB}\u{FE00}","varsupsetneq;":"\u{228B}\u{FE00}","varsupsetneqq;":"\u{2ACC}\u{FE00}","vartheta;":"\u{3D1}", + "vartriangleleft;":"\u{22B2}","vartriangleright;":"\u{22B3}","Vbar;":"\u{2AEB}","vBar;":"\u{2AE8}", + "vBarv;":"\u{2AE9}","Vcy;":"\u{412}","vcy;":"\u{432}","VDash;":"\u{22AB}", + "Vdash;":"\u{22A9}","vDash;":"\u{22A8}","vdash;":"\u{22A2}","Vdashl;":"\u{2AE6}", + "Vee;":"\u{22C1}","vee;":"\u{2228}","veebar;":"\u{22BB}","veeeq;":"\u{225A}", + "vellip;":"\u{22EE}","Verbar;":"\u{2016}","verbar;":"\u{7C}","Vert;":"\u{2016}", + "vert;":"\u{7C}","VerticalBar;":"\u{2223}","VerticalLine;":"\u{7C}","VerticalSeparator;":"\u{2758}", + "VerticalTilde;":"\u{2240}","VeryThinSpace;":"\u{200A}","Vfr;":"\u{1D519}","vfr;":"\u{1D533}", + "vltri;":"\u{22B2}","vnsub;":"\u{2282}\u{20D2}","vnsup;":"\u{2283}\u{20D2}","Vopf;":"\u{1D54D}", + "vopf;":"\u{1D567}","vprop;":"\u{221D}","vrtri;":"\u{22B3}","Vscr;":"\u{1D4B1}", + "vscr;":"\u{1D4CB}","vsubnE;":"\u{2ACB}\u{FE00}","vsubne;":"\u{228A}\u{FE00}","vsupnE;":"\u{2ACC}\u{FE00}", + "vsupne;":"\u{228B}\u{FE00}","Vvdash;":"\u{22AA}","vzigzag;":"\u{299A}","Wcirc;":"\u{174}", + "wcirc;":"\u{175}","wedbar;":"\u{2A5F}","Wedge;":"\u{22C0}","wedge;":"\u{2227}", + "wedgeq;":"\u{2259}","weierp;":"\u{2118}","Wfr;":"\u{1D51A}","wfr;":"\u{1D534}", + "Wopf;":"\u{1D54E}","wopf;":"\u{1D568}","wp;":"\u{2118}","wr;":"\u{2240}", + "wreath;":"\u{2240}","Wscr;":"\u{1D4B2}","wscr;":"\u{1D4CC}","xcap;":"\u{22C2}", + "xcirc;":"\u{25EF}","xcup;":"\u{22C3}","xdtri;":"\u{25BD}","Xfr;":"\u{1D51B}", + "xfr;":"\u{1D535}","xhArr;":"\u{27FA}","xharr;":"\u{27F7}","Xi;":"\u{39E}", + "xi;":"\u{3BE}","xlArr;":"\u{27F8}","xlarr;":"\u{27F5}","xmap;":"\u{27FC}", + "xnis;":"\u{22FB}","xodot;":"\u{2A00}","Xopf;":"\u{1D54F}","xopf;":"\u{1D569}", + "xoplus;":"\u{2A01}","xotime;":"\u{2A02}","xrArr;":"\u{27F9}","xrarr;":"\u{27F6}", + "Xscr;":"\u{1D4B3}","xscr;":"\u{1D4CD}","xsqcup;":"\u{2A06}","xuplus;":"\u{2A04}", + "xutri;":"\u{25B3}","xvee;":"\u{22C1}","xwedge;":"\u{22C0}","Yacute;":"\u{DD}", + "yacute;":"\u{FD}","YAcy;":"\u{42F}","yacy;":"\u{44F}","Ycirc;":"\u{176}", + "ycirc;":"\u{177}","Ycy;":"\u{42B}","ycy;":"\u{44B}","yen;":"\u{A5}", + "Yfr;":"\u{1D51C}","yfr;":"\u{1D536}","YIcy;":"\u{407}","yicy;":"\u{457}", + "Yopf;":"\u{1D550}","yopf;":"\u{1D56A}","Yscr;":"\u{1D4B4}","yscr;":"\u{1D4CE}", + "YUcy;":"\u{42E}","yucy;":"\u{44E}","Yuml;":"\u{178}","yuml;":"\u{FF}", + "Zacute;":"\u{179}","zacute;":"\u{17A}","Zcaron;":"\u{17D}","zcaron;":"\u{17E}", + "Zcy;":"\u{417}","zcy;":"\u{437}","Zdot;":"\u{17B}","zdot;":"\u{17C}", + "zeetrf;":"\u{2128}","ZeroWidthSpace;":"\u{200B}","Zeta;":"\u{396}","zeta;":"\u{3B6}", + "Zfr;":"\u{2128}","zfr;":"\u{1D537}","ZHcy;":"\u{416}","zhcy;":"\u{436}", + "zigrarr;":"\u{21DD}","Zopf;":"\u{2124}","zopf;":"\u{1D56B}","Zscr;":"\u{1D4B5}", + "zscr;":"\u{1D4CF}","zwj;":"\u{200D}","zwnj;":"\u{200C}" +] diff --git a/Sources/HTMLStreamer/HTMLParser.swift b/Sources/HTMLStreamer/HTMLParser.swift new file mode 100644 index 0000000..3bbe09d --- /dev/null +++ b/Sources/HTMLStreamer/HTMLParser.swift @@ -0,0 +1,12 @@ +// +// HTMLParser.swift +// HTMLStreamer +// +// Created by Shadowfacts on 11/22/23. +// + +import Foundation + +struct HTMLParser { + +} diff --git a/Sources/HTMLStreamer/InlineArray3.swift b/Sources/HTMLStreamer/InlineArray3.swift new file mode 100644 index 0000000..d579649 --- /dev/null +++ b/Sources/HTMLStreamer/InlineArray3.swift @@ -0,0 +1,220 @@ +// +// InlineArray3.swift +// HTMLStreamer +// +// Created by Shadowfacts on 11/19/23. +// + +import Foundation + +/// An array with inline space for up to 3 elements. +/// +/// If the array grows beyond 3 elements, it will be stored out-of-line. +/// Once that happens, the array will never return to being stored inline, +/// since the allocation cost has already been paid. +struct InlineArray3 { + private var storage: Storage + + init() { + self.storage = .inline(nil, nil, nil) + } +} + +extension InlineArray3 { + fileprivate enum Storage { + case inline(Element?, Element?, Element?) + case array(ContiguousArray) + } +} + +extension InlineArray3: ExpressibleByArrayLiteral { + init(arrayLiteral elements: Element...) { + switch elements.count { + case 0: + self.storage = .inline(nil, nil, nil) + case 1: + self.storage = .inline(elements[0], nil, nil) + case 2: + self.storage = .inline(elements[0], elements[1], nil) + case 3: + self.storage = .inline(elements[0], elements[1], elements[2]) + default: + self.storage = .array(.init(elements)) + } + } +} + +extension InlineArray3: MutableCollection { + typealias Element = E + typealias Index = Int + typealias Indices = Range + + subscript(position: Int) -> Element { + _read { + precondition(position < endIndex) + switch storage { + case .inline(let a, let b, let c): + switch position { + case 0: + yield a.unsafelyUnwrapped + case 1: + yield b.unsafelyUnwrapped + case 2: + yield c.unsafelyUnwrapped + default: + fatalError("unreachable") + } + case .array(let arr): + yield arr[position] + } + } + _modify { + precondition(position < endIndex) + switch storage { + case .inline(let a, let b, let c): + switch position { + case 0: + var newValue = a.unsafelyUnwrapped + yield &newValue + storage = .inline(newValue, b, c) + case 1: + var newValue = b.unsafelyUnwrapped + yield &newValue + storage = .inline(a, newValue, c) + case 2: + var newValue = c.unsafelyUnwrapped + yield &newValue + storage = .inline(a, b, newValue) + default: + fatalError("unreachable") + } + case .array(var arr): + yield &arr[position] + } + } + } + + var startIndex: Int { + 0 + } + + var endIndex: Int { + switch storage { + case .inline(let a, let b, let c): + a == nil ? 0 : b == nil ? 1 : c == nil ? 2 : 3 + case .array(let arr): + arr.endIndex + } + } +} + +extension InlineArray3: BidirectionalCollection { +} + +extension InlineArray3: RandomAccessCollection { +} + +extension InlineArray3: RangeReplaceableCollection { + mutating func replaceSubrange(_ subrange: Range, with newElements: C) where C: Collection, Element == C.Element { + switch storage { + case .array(var arr): + arr.replaceSubrange(subrange, with: newElements) + storage = .array(arr) + case .inline(var a, var b, var c): + if count - subrange.count + newElements.count <= 3 { + // remove elements at subrange indices + if subrange.contains(2) { + c = nil + } + if subrange.contains(1) { + b = c + c = nil + } + if subrange.contains(0) { + a = b + b = c + c = nil + } + + // insert newElements starting at subrange.lowerBound + for (offset, el) in newElements.enumerated() { + // assert that we have space to insert + assert(c == nil) + let newIndex = subrange.lowerBound + offset + switch newIndex { + case 2: + c = el + case 1: + c = b + b = el + case 0: + c = b + b = a + a = el + default: + fatalError("unreachable") + } + } + + storage = .inline(a, b, c) + } else { + var arr: ContiguousArray = if let a { + if let b { + if let c { + [a, b, c] + } else { + [a, b] + } + } else { + [a] + } + } else { + [] + } + arr.replaceSubrange(subrange, with: newElements) + storage = .array(arr) + } + } + } +} + +private extension Collection { + func safeIndex(_ index: Index, offsetBy: Int) -> Index? { + var index = index + var offsetBy = offsetBy + while offsetBy > 0 { + if index < endIndex { + formIndex(after: &index) + offsetBy -= 1 + } else { + return nil + } + } + return index >= endIndex ? nil : index + } +} + +extension InlineArray3.Storage: Equatable where E: Equatable { +} + +extension InlineArray3: Equatable where E: Equatable { +} + +extension InlineArray3: CustomStringConvertible { + var description: String { + switch storage { + case .inline(nil, nil, nil): + return "[]" + case .inline(.some(let a), nil, nil): + return "[\(a)]" + case .inline(.some(let a), .some(let b), nil): + return "[\(a), \(b)]" + case .inline(.some(let a), .some(let b), .some(let c)): + return "[\(a), \(b), \(c)]" + case .inline(_, _, _): + fatalError("InlineArray3 invariant violated") + case .array(let arr): + return arr.description + } + } +} diff --git a/Sources/HTMLStreamer/Tokenizer.swift b/Sources/HTMLStreamer/Tokenizer.swift new file mode 100644 index 0000000..f39a878 --- /dev/null +++ b/Sources/HTMLStreamer/Tokenizer.swift @@ -0,0 +1,1692 @@ +// +// Tokenizer.swift +// HTMLStreamer +// +// Created by Shadowfacts on 11/22/23. +// + +import Foundation + +struct Tokenizer>: IteratorProtocol { + typealias Element = Token + + private var chars: Chars + private var reconsumeStack: InlineArray3 = [] + private var state = State.data + private var returnState: State? + private var temporaryBuffer: String? + private var characterReferenceCode: UInt32? + private var currentToken: Token? + + init(chars: Chars) { + self.chars = chars + } + + mutating func next() -> Token? { + switch state { + case .flushingTemporaryBuffer(let returnState): + if temporaryBuffer == nil || temporaryBuffer!.isEmpty { + state = returnState + return next() + } else { + return .character(temporaryBuffer!.removeFirst()) + } + case .endOfFile: + return nil + case .emitTokens(var tokens, let nextState): + if tokens.isEmpty { + state = nextState + return next() + } else { + let tok = tokens.removeFirst() + state = .emitTokens(tokens, nextState) + return tok + } + + case .data: + return tokenizeData() + case .characterReference: + return tokenizeCharacterReference() + case .namedCharacterReference: + return tokenizeNamedCharaterReference() + case .numericCharacterReference: + return tokenizeNumericCharacterReference() + case .numericCharacterReferenceEnd: + return tokenizeNumericCharacterReferenceEnd() + case .hexadecimalCharacterReferenceStart: + return tokenizeHexadecimalCharacterReferenceStart() + case .hexadecimalCharacterReference: + return tokenizeHexadecimalCharacterReference() + case .decimalCharacterReferenceStart: + return tokenizeDecimalCharacterReferenceStart() + case .decimalCharacterReference: + return tokenizeDecimalCharacterReference() + case .ambiguousAmpersand: + return tokenizeAmbiguousAmpersand() + case .tagOpen: + return tokenizeTagOpen() + case .endTagOpen: + return tokenizeEndTagOpen() + case .tagName: + return tokenizeTagName() + case .selfClosingStartTag: + return tokenizeSelfClosingStartTag() + case .beforeAttributeName: + return tokenizeBeforeAttributeName() + case .attributeName: + return tokenizeAttributeName() + case .afterAttributeName: + return tokenizeAfterAttributeName() + case .beforeAttributeValue: + return tokenizeBeforeAttributeValue() + case .attributeValue(let quotes): + return tokenizeAttributeValue(quotes: quotes) + case .afterAttributeValueQuoted: + return tokenizeAfterAttributeValueQuoted() + case .bogusComment: + return tokenizeBogusComment() + case .markupDeclarationOpen: + return tokenizeMarkupDeclarationOpen() + case .commentStart: + return tokenizeCommentStart() + case .commentStartDash: + return tokenizeCommentStartDash() + case .comment: + return tokenizeComment() + case .commentLessThanSign: + return tokenizeCommentLessThanSign() + case .commentLessThanSignBang: + return tokenizeCommentLessThanSignBang() + case .commentLessThanSignBangDash: + return tokenizeCommentLessThanSignBangDash() + case .commentLessThanSignBangDashDash: + return tokenizeCommentLessThanSignBangDashDash() + case .commentEndDash: + return tokenizeCommentEndDash() + case .commentEnd: + return tokenizeCommentEnd() + case .commentEndBang: + return tokenizeCommentEndBang() + case .doctype: + return tokenizeDoctype() + case .beforeDoctypeName: + return tokenizeBeforeDoctypeName() + case .doctypeName: + return tokenizeDoctypeName() + case .afterDoctypeName: + return tokenizeAfterDoctypeName() + case .afterDoctypePublicKeyword: + return tokenizeAfterDoctypePublicKeyword() + case .beforeDoctypePublicIdentifier: + return tokenizeBeforeDoctypePublicIdentifier() + case .doctypePublicIdentifier(let quotes): + return tokenizeDoctypePublicIdentifier(quotes: quotes) + case .afterDoctypePublicIdentifier: + return tokenizeAfterDoctypePublicIdentifier() + case .betweenDoctypePublicAndSystemIdentifiers: + return tokenizeBetweenDoctypePublicAndSystemIdentifiers() + case .afterDoctypeSystemKeyword: + return tokenizeAfterDoctypeSystemKeyword() + case .beforeDoctypeSystemIdentifier: + return tokenizeBeforeDoctypeSystemIdentifier() + case .doctypeSystemIdentifier(let quotes): + return tokenizeDoctypeSystemIdentifier(quotes: quotes) + case .afterDoctypeSystemIdentifier: + return tokenizeAfterDoctypeSystemIdentifier() + case .bogusDoctype: + return tokenizeBogusDoctype() + } + } + + private mutating func reconsume(_ c: Character?) { + if let c { + reconsumeStack.append(c) + } + } + + private mutating func nextChar() -> Character? { + if !reconsumeStack.isEmpty { + return reconsumeStack.removeLast() + } else { + return chars.next() + } + } + + private mutating func peekChar() -> Character? { + if let nextToReconsume = reconsumeStack.last { + return nextToReconsume + } else { + let c = chars.next() + if let c { + reconsume(c) + } + return c + } + } + + // TODO: extract this all out into a standalone type and test it separately + private mutating func peek(count: Int) -> String { + precondition(count >= 0) + var buf = "" + for _ in 0..= 0) + for _ in 0.. Token { + defer { currentToken = nil } + return currentToken! + } +} + +enum Token: Equatable { + case character(Character) + case comment(String) + case startTag(String, selfClosing: Bool, attributes: InlineArray3) + case endTag(String) + case doctype(String, forceQuirks: Bool, publicIdentifier: String?, systemIdentifier: String?) +} + +struct Attribute: Equatable { + var name: String + var value: String +} + +private enum State { + // Internal states used by the tokenizer + indirect case flushingTemporaryBuffer(State) + case endOfFile + indirect case emitTokens([Token], State) + + // States defined by the spec + case data + // RCDATA not currently supported +// case rcdata + // RAWTEXT not currently supported +// case rawtext + // script tag not currently supported +// case scriptData + // plaintext tag not currently supported +// case plaintext + case tagOpen + case endTagOpen + case tagName + // RCDATA not currently supported +// case rcdataLessThanSign +// case rcdataEndTagOpen +// case rcdataEndTagName + // RAWTEXT not currently supported +// case rawtextLessThanSign +// case rawtextEndTagOpen +// case rawtextEndTagName + // script not currently supported +// case scriptDataLessThanSign +// case scriptDataEndTagOpen +// case scriptDataEndTagName +// case scriptDataEscapeStart +// case scriptDataEscapeStartDash +// case scriptDataEscaped +// case scriptDataEscapedDash +// case scriptDataEscapedDashDash +// case scriptDataEscapedLessThanSign +// case scriptDataEscapedEndTagOpen +// case scriptDataEscapedEndTagName +// case scriptDataDoubleEscapeStart +// case scriptDataDoubleEscaped +// case scriptDataDoubleEscapedDash +// case scriptDataDoubleEscapedDashDash +// case scriptDataDoubleEscapedLessThanSign +// case scriptDataDoubleEscapeEnd + case beforeAttributeName + case attributeName + case afterAttributeName + case beforeAttributeValue + case attributeValue(AttributeValueQuotation) + case afterAttributeValueQuoted + case selfClosingStartTag + case bogusComment + case markupDeclarationOpen + case commentStart + case commentStartDash + case comment + case commentLessThanSign + case commentLessThanSignBang + case commentLessThanSignBangDash + case commentLessThanSignBangDashDash + case commentEndDash + case commentEnd + case commentEndBang + case doctype + case beforeDoctypeName + case doctypeName + case afterDoctypeName + case afterDoctypePublicKeyword + case beforeDoctypePublicIdentifier + case doctypePublicIdentifier(DoctypeIdentifierQuotation) + case afterDoctypePublicIdentifier + case betweenDoctypePublicAndSystemIdentifiers + case afterDoctypeSystemKeyword + case beforeDoctypeSystemIdentifier + case doctypeSystemIdentifier(DoctypeIdentifierQuotation) + case afterDoctypeSystemIdentifier + case bogusDoctype + // CDATA not currently supported +// case cdataSection +// case cdataSectionBracket +// case cdataSectionEndState + case characterReference + case namedCharacterReference + case ambiguousAmpersand + case numericCharacterReference + case hexadecimalCharacterReferenceStart + case decimalCharacterReferenceStart + case hexadecimalCharacterReference + case decimalCharacterReference + case numericCharacterReferenceEnd +} + +private enum AttributeValueQuotation { + case singleQuoted, doubleQuoted, unquoted +} + +private enum DoctypeIdentifierQuotation { + case singleQuoted, doubleQuoted +} + +private extension Tokenizer { + mutating func tokenizeData() -> Token? { + switch nextChar() { + case "&": + returnState = .data + state = .characterReference + return tokenizeCharacterReference() + case "<": + state = .tagOpen + return tokenizeTagOpen() + case "\0": + return .character("\0") + case nil: + return nil // end of fil + case .some(let c): + return .character(c) + } + } + + mutating func tokenizeCharacterReference() -> Token? { + temporaryBuffer = "&" + guard let c = nextChar() else { + reconsume(nil) + state = .flushingTemporaryBuffer(returnState!) + return next() + } + switch c { + case "a"..."z", "A"..."Z", "0"..."9": + reconsume(c) + state = .namedCharacterReference + return tokenizeNamedCharaterReference() + case "#": + temporaryBuffer!.append("#") + state = .numericCharacterReference + return tokenizeNumericCharacterReference() + default: + reconsume(c) + state = returnState! + return next() + } + } + + mutating func tokenizeNamedCharaterReference() -> Token? { + // TODO: this could definitely be faster + // maybe with a prefix tree for named characters + var everHadMatch = false + var outOfChars = false + func hasMatch() -> Bool { + let buf = temporaryBuffer! + let key = buf[buf.index(after: buf.startIndex)...] + return namedCharactersDecodeMap.keys.contains(where: { $0.starts(with: key) }) + } + while hasMatch() { + everHadMatch = true + guard let char = nextChar() else { + outOfChars = true + break + } + temporaryBuffer!.append(char) + } + if everHadMatch { + if !outOfChars { + // the last character changed us from having a match to not + reconsume(temporaryBuffer!.removeLast()) + } + + if case .attributeValue(_) = returnState, + temporaryBuffer!.last != ";", + let peeked = peekChar(), + peeked == "=" || (peeked.isASCII && (peeked.isLetter || peeked.isNumber)) { + state = .flushingTemporaryBuffer(returnState!) + } else { + let insertSemicolon = temporaryBuffer!.last != ";" + if insertSemicolon { + // parse error: missing-semicolon-after-character-reference + // Usually the parser behaves as if character reference is terminated by the U+003B (;) code point; however, there are some ambiguous cases in which the parser includes subsequent code points in the character reference. + temporaryBuffer!.append(";") + } + if let reference = namedCharactersDecodeMap[String(temporaryBuffer![temporaryBuffer!.index(after: temporaryBuffer!.startIndex)...])] { + temporaryBuffer = "\(reference)" + flushCharacterReference() + } else { + if insertSemicolon { + temporaryBuffer!.removeLast() + } + state = .flushingTemporaryBuffer(.ambiguousAmpersand) + } + } + } else { + state = .flushingTemporaryBuffer(.ambiguousAmpersand) + } + return next() + } + + mutating func flushCharacterReference() { + if case .attributeValue(_) = returnState { + if case .startTag(let s, let selfClosing, var attributes) = currentToken { + attributes[attributes.count - 1].value.append(temporaryBuffer!) + currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + temporaryBuffer = nil + state = returnState! + } else { + fatalError("bad current tag") + } + } else { + state = .flushingTemporaryBuffer(returnState!) + } + } + + mutating func tokenizeNumericCharacterReference() -> Token? { + characterReferenceCode = 0 + switch nextChar() { + case "x", "X": + temporaryBuffer!.append("x") + state = .hexadecimalCharacterReference + return tokenizeHexadecimalCharacterReference() + case let c: + reconsume(c) + state = .decimalCharacterReference + return tokenizeDecimalCharacterReference() + } + } + + mutating func tokenizeNumericCharacterReferenceEnd() -> Token? { + switch characterReferenceCode! { + case 0: + // parse error: null-character-reference + characterReferenceCode = 0xFFFD + case let c where c > 0x10FFFF: + // parse error: character-reference-outside-unicode-range + characterReferenceCode = 0xFFFD + case 0xD800...0xDBFF, 0xDC00...0xDFFF: // leading and trailing surrogate ranges + // parse error: surrogate-character-reference + characterReferenceCode = 0xFFFD + case let c where Unicode.Scalar(c) == nil: + // parse error: noncharacter-character-reference + // "The parser resolves such character references as-is." + // TODO: idfk what that means + characterReferenceCode = nil + state = returnState! + return next() + case 0x0D, 0...0x1F /* C0 control */, 0x7F...0x9F: + // parse error: control-character-reference + characterReferenceCode = switch characterReferenceCode! { + case 0x80: 0x20AC + case 0x82: 0x201A + case 0x83: 0x0192 + case 0x84: 0x201E + case 0x85: 0x2026 + case 0x86: 0x2020 + case 0x87: 0x2021 + case 0x88: 0x02C6 + case 0x89: 0x2030 + case 0x8A: 0x0160 + case 0x8B: 0x2039 + case 0x8C: 0x0152 + case 0x8E: 0x017D + case 0x91: 0x2018 + case 0x92: 0x2019 + case 0x93: 0x201C + case 0x94: 0x201D + case 0x95: 0x2022 + case 0x96: 0x2013 + case 0x97: 0x2014 + case 0x98: 0x02DC + case 0x99: 0x2122 + case 0x9A: 0x0161 + case 0x9B: 0x203A + case 0x9C: 0x0153 + case 0x9E: 0x017E + case 0x9F: 0x0178 + case let c: c + } + default: + break + } + temporaryBuffer = "" + if let c = Unicode.Scalar(characterReferenceCode!) { + temporaryBuffer!.append(Character(c)) + } + flushCharacterReference() + return next() + } + + mutating func tokenizeHexadecimalCharacterReferenceStart() -> Token? { + let c = nextChar() + switch c { + case .some("0"..."9"), .some("a"..."f"), .some("A"..."F"): + reconsume(c) + state = .hexadecimalCharacterReference + return tokenizeHexadecimalCharacterReference() + default: + // parse error: absence-of-digits-in-numeric-character-reference + reconsume(c) + state = .flushingTemporaryBuffer(returnState!) + return next() + } + } + + mutating func tokenizeHexadecimalCharacterReference() -> Token? { + let c = nextChar() + switch c { + case .some("0"..."9"), .some("a"..."f"), .some("A"..."F"): + characterReferenceCode = (characterReferenceCode! * 16) + UInt32(c!.hexDigitValue!) + return tokenizeHexadecimalCharacterReference() + case ";": + state = .numericCharacterReferenceEnd + return tokenizeNumericCharacterReferenceEnd() + case let c: + // parse error: missing-semicolon-after-character-reference + reconsume(c) + state = .numericCharacterReferenceEnd + return tokenizeNumericCharacterReferenceEnd() + } + } + + mutating func tokenizeDecimalCharacterReferenceStart() -> Token? { + let c = nextChar() + if let c, + c.isASCII && c.isNumber { + reconsume(c) + state = .decimalCharacterReference + return tokenizeDecimalCharacterReference() + } else { + // parse error: absence-of-digits-in-numeric-character-reference + reconsume(c) + state = returnState! + return next() + } + } + + mutating func tokenizeDecimalCharacterReference() -> Token? { + let c = nextChar() + switch c { + case .some("0"..."9"): + characterReferenceCode = (characterReferenceCode! * 10) + UInt32(c!.wholeNumberValue!) + return tokenizeDecimalCharacterReference() + case ";": + state = .numericCharacterReferenceEnd + return tokenizeNumericCharacterReferenceEnd() + default: + // if nil, parse error: missing-semicolon-after-character-reference + reconsume(c) + state = .numericCharacterReferenceEnd + return tokenizeNumericCharacterReferenceEnd() + } + } + + mutating func tokenizeAmbiguousAmpersand() -> Token? { + let c = nextChar() + switch c { + case .some("0"..."9"), .some("a"..."z"), .some("A"..."Z"): + if case .attributeValue(_) = returnState { + // TODO: append the current input character to the current attribute's value + if case .startTag(let s, let selfClosing, var attributes) = currentToken { + attributes[attributes.count - 1].value.append(c!) + currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + } else { + fatalError("bad current token") + } + return next() + } else { + return .character(c!) + } + default: + // if c == ";", parse error: unknown-named-character-reference + reconsume(c) + state = returnState! + return next() + } + } + + mutating func tokenizeTagOpen() -> Token? { + let c = nextChar() + switch c { + case "!": + state = .markupDeclarationOpen + return tokenizeMarkupDeclarationOpen() + case "/": + state = .endTagOpen + return tokenizeEndTagOpen() + case "?": + // parse error: unexpected-question-mark-instead-of-tag-name + currentToken = .comment("") + state = .bogusComment + return tokenizeBogusComment() + case nil: + // parser error: eof-before-tag-name + state = .endOfFile + return .character("<") + case .some("a"..."z"), .some("A"..."Z"): + currentToken = .startTag("", selfClosing: false, attributes: []) + reconsume(c) + state = .tagName + return tokenizeTagName() + case .some(_): + // parse error: invalid-first-character-of-tag-name + reconsume(c) + state = .data + return .character("<") + } + } + + mutating func tokenizeEndTagOpen() -> Token? { + let c = nextChar() + switch c { + case .some("a"..."z"), .some("A"..."Z"): + currentToken = .endTag("") + reconsume(c) + state = .tagName + return tokenizeTagName() + case ">": + // parse error: missing-end-tag-name + state = .data + return tokenizeData() + case nil: + // parse error: eof-before-tag-name + state = .emitTokens([.character("/")], .endOfFile) + return .character("<") + case .some(let c): + // parse error: invalid-first-character-of-tag-name + currentToken = .comment("") + reconsume(c) + state = .bogusComment + return tokenizeBogusComment() + } + } + + mutating func tokenizeTagName() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + state = .beforeAttributeName + return tokenizeBeforeAttributeName() + case "/": + state = .selfClosingStartTag + return tokenizeSelfClosingStartTag() + case ">": + state = .data + return takeCurrentToken() + case nil: + // parse error: eof-in-tag + state = .endOfFile + return nil + case .some(var c): + if c == "\0" { + // parse error: unexpected-null-character + c = "\u{FFFD}" + } else if ("A"..."Z").contains(c) { + c = c.asciiLowercase + } + if case .startTag(var s, let selfClosing, let attributes) = currentToken { + s.append(c) + currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + return tokenizeTagName() + } else if case .endTag(var s) = currentToken { + s.append(c) + currentToken = .endTag(s) + return tokenizeTagName() + } else { + fatalError("bad current token") + } + } + } + + mutating func tokenizeSelfClosingStartTag() -> Token? { + switch nextChar() { + case ">": + if case .startTag(let s, _, let attributes) = currentToken { + currentToken = .startTag(s, selfClosing: true, attributes: attributes) + } else { + fatalError("bad current token") + } + state = .data + return takeCurrentToken() + case nil: + // parse error: eof-in-tag + state = .endOfFile + return nil + case .some(let c): + // parse error: unexpected-solidus-in-tag + reconsume(c) + state = .beforeAttributeName + return tokenizeBeforeAttributeName() + } + } + + mutating func tokenizeBeforeAttributeName() -> Token? { + let c = nextChar() + switch c { + case "\t", "\n", "\u{000C}", " ": + // ignore the character + return next() + case "/", ">", nil: + reconsume(c) + state = .afterAttributeName + return tokenizeAfterAttributeName() + case "=": + // parse error: unexpected-equals-sign-before-attribute-name + if case .startTag(let s, let selfClosing, var attributes) = currentToken { + attributes.append(Attribute(name: "=", value: "")) + currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + state = .attributeName + return tokenizeAttributeName() + } else { + fatalError("bad current token") + } + default: + if case .startTag(let s, let selfClosing, var attributes) = currentToken { + attributes.append(Attribute(name: "", value: "")) + currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + reconsume(c) + state = .attributeName + return tokenizeAttributeName() + } else if case .endTag(_) = currentToken { + // ignore + reconsume(c) + state = .attributeName + return tokenizeAttributeName() + } else { + fatalError("bad current token") + } + } + } + + mutating func tokenizeAttributeName() -> Token? { + let c = nextChar() + switch c { + case "\t", "\n", "\u{000C}", " ", "/", ">", nil: + reconsume(c) + state = .afterAttributeName + return tokenizeAfterAttributeName() + case "=": + state = .beforeAttributeValue + return tokenizeBeforeAttributeValue() + case .some(var c): + if ("A"..."Z").contains(c) { + c = c.asciiLowercase + } + // if null, parse error: unexpected-null-character + if c == "\0" { + c = "\u{FFFD}" + } + // if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name + if case .startTag(let s, let selfClosing, var attributes) = currentToken { + attributes[attributes.count - 1].name.append(c) + currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + return tokenizeAttributeName() + } else if case .endTag(_) = currentToken { + return tokenizeAttributeName() + } else { + fatalError("bad curren token") + } + } + } + + mutating func tokenizeAfterAttributeName() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + // ignore the character + return tokenizeAttributeName() + case "/": + state = .selfClosingStartTag + return tokenizeSelfClosingStartTag() + case "=": + state = .beforeAttributeValue + return tokenizeBeforeAttributeValue() + case nil: + // parse error: eof-in-tag + state = .endOfFile + return nil + case .some(let c): + if case .startTag(let s, let selfClosing, var attributes) = currentToken { + attributes.append(Attribute(name: "", value: "")) + currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + reconsume(c) + state = .attributeName + return tokenizeAttributeName() + } else if case .endTag(_) = currentToken { + reconsume(c) + state = .attributeName + return tokenizeAttributeName() + } else { + fatalError("bad current token") + } + } + } + + mutating func tokenizeBeforeAttributeValue() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + // ignore the character + return tokenizeBeforeAttributeValue() + case "\"": + state = .attributeValue(.doubleQuoted) + return tokenizeAttributeValue(quotes: .doubleQuoted) + case "'": + state = .attributeValue(.singleQuoted) + return tokenizeAttributeValue(quotes: .singleQuoted) + case ">": + // parse error: missing-attribute-value + state = .data + return takeCurrentToken() + case let c: + reconsume(c) + state = .attributeValue(.unquoted) + return tokenizeAttributeValue(quotes: .unquoted) + } + } + + mutating func tokenizeAttributeValue(quotes: AttributeValueQuotation) -> Token? { + if quotes == .unquoted { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + state = .beforeAttributeName + return tokenizeBeforeAttributeName() + case "&": + returnState = .attributeValue(.unquoted) + state = .characterReference + return tokenizeCharacterReference() + case ">": + state = .data + return takeCurrentToken() + case nil: + // parse error: eof-in-tag + state = .endOfFile + return nil + case .some(let c): + // if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value + if case .startTag(let s, let selfClosing, var attributes) = currentToken { + attributes[attributes.count - 1].value.append(c) + currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + return tokenizeAttributeValue(quotes: quotes) + } else { + fatalError("bad current token") + } + } + } else { + let c = nextChar() + switch c { + case "\"" where quotes == .doubleQuoted: + state = .afterAttributeValueQuoted + return tokenizeAfterAttributeValueQuoted() + case "'" where quotes == .singleQuoted: + state = .afterAttributeValueQuoted + return tokenizeAfterAttributeValueQuoted() + case "&": + returnState = .attributeValue(quotes) + state = .characterReference + return tokenizeCharacterReference() + case nil: + // parse error: eof-in-tag + state = .endOfFile + return nil + case .some(var c): + if c == "\0" { + // parse error: unexpected-null-character + c = "\u{FFFD}" + } + if case .startTag(let s, let selfClosing, var attributes) = currentToken { + attributes[attributes.count - 1].value.append(c) + currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + return tokenizeAttributeValue(quotes: quotes) + } else if case .endTag(_) = currentToken { + return tokenizeAttributeValue(quotes: quotes) + } else { + fatalError("bad current token") + } + } + } + } + + mutating func tokenizeAfterAttributeValueQuoted() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + state = .beforeAttributeName + return tokenizeBeforeAttributeName() + case "/": + state = .selfClosingStartTag + return tokenizeSelfClosingStartTag() + case ">": + state = .data + return takeCurrentToken() + case nil: + // parse error: eof-in-tag + state = .endOfFile + return nil + case .some(let c): + // parse error: missing-whitespace-between-attributes + reconsume(c) + state = .beforeAttributeName + return tokenizeBeforeAttributeName() + } + } + + mutating func tokenizeBogusComment() -> Token? { + switch nextChar() { + case ">": + state = .data + return takeCurrentToken() + case nil: + state = .endOfFile + return takeCurrentToken() + case .some(var c): + if c == "\0" { + // parse error: unexpected-null-character + c = "\u{FFFD}" + } + if case .comment(var s) = currentToken { + s.append(c) + currentToken = .comment(s) + return tokenizeBogusComment() + } else { + fatalError("bad current token") + } + } + } + + mutating func tokenizeMarkupDeclarationOpen() -> Token? { + let peeked = peek(count: 7) + if peeked.starts(with: "--") { + consume(count: 2) + currentToken = .comment("") + state = .commentStart + return tokenizeCommentStart() + } else if peeked.lowercased() == "doctype" { + consume(count: 7) + state = .doctype + return tokenizeDoctype() + } else if peeked == "[CDATA[" { + // TODO: we don't do any of the tree construction stuff yet, so can't really handle this + // consume(count: 7) + currentToken = .comment("") + state = .bogusComment + return tokenizeBogusComment() + } else { + // parse error: incorrectly-opened-comment + currentToken = .comment("") + state = .bogusComment + return tokenizeBogusComment() + } + } + + mutating func tokenizeCommentStart() -> Token? { + switch nextChar() { + case "-": + state = .commentStartDash + return tokenizeCommentStartDash() + case ">": + // parse error: abrupt-closing-of-empty-comment + state = .data + return takeCurrentToken() + case let c: + reconsume(c) + state = .comment + return tokenizeComment() + } + } + + mutating func tokenizeCommentStartDash() -> Token? { + switch nextChar() { + case "-": + state = .commentEnd + return tokenizeCommentEnd() + case ">": + // parse error: abrupt-closing-of-empty-comment + state = .data + return takeCurrentToken() + case nil: + // parse error: eof-in-comment + return takeCurrentToken() + case .some(let c): + if case .comment(var s) = currentToken { + s.append("-") + currentToken = .comment(s) + reconsume(c) + state = .comment + return tokenizeComment() + } else { + fatalError("bad current token") + } + } + } + + mutating func tokenizeComment() -> Token? { + switch nextChar() { + case "<": + if case .comment(var s) = currentToken { + s.append("<") + currentToken = .comment(s) + state = .commentLessThanSign + return tokenizeCommentLessThanSign() + } else { + fatalError("bad current token") + } + case "-": + state = .commentEndDash + return tokenizeCommentEndDash() + case nil: + // parse error: eof-in-comment + state = .endOfFile + return takeCurrentToken() + case .some(var c): + if c == "\0" { + // parse error: unexpected-null-character + c = "\u{FFFD}" + } + if case .comment(var s) = currentToken { + s.append(c) + currentToken = .comment(s) + return tokenizeComment() + } else { + fatalError("bad current token") + } + } + } + + mutating func tokenizeCommentLessThanSign() -> Token? { + switch nextChar() { + case "!": + if case .comment(var s) = currentToken { + s.append("!") + currentToken = .comment(s) + state = .commentLessThanSignBang + return tokenizeCommentLessThanSignBang() + } else { + fatalError("bad current token") + } + case "<": + if case .comment(var s) = currentToken { + s.append("<") + currentToken = .comment(s) + return tokenizeComment() + } else { + fatalError("bad current token") + } + case let c: + reconsume(c) + state = .comment + return tokenizeComment() + } + } + + mutating func tokenizeCommentLessThanSignBang() -> Token? { + switch nextChar() { + case "-": + state = .commentLessThanSignBangDash + return tokenizeCommentLessThanSignBangDash() + case let c: + reconsume(c) + state = .comment + return tokenizeComment() + } + } + + mutating func tokenizeCommentLessThanSignBangDash() -> Token? { + switch nextChar() { + case "-": + state = .commentLessThanSignBangDashDash + return tokenizeCommentLessThanSignBangDashDash() + case let c: + reconsume(c) + state = .commentEndDash + return tokenizeCommentEndDash() + } + } + + mutating func tokenizeCommentLessThanSignBangDashDash() -> Token? { + let c = nextChar() + switch c { + case ">", nil: + reconsume(c) + state = .commentEnd + return tokenizeCommentEnd() + default: + // parse error: nested-comment + reconsume(c) + state = .commentEnd + return tokenizeCommentEnd() + } + } + + mutating func tokenizeCommentEndDash() -> Token? { + switch nextChar() { + case "-": + state = .commentEnd + return tokenizeCommentEnd() + case nil: + // parse error: eof-in-comment + state = .endOfFile + return takeCurrentToken() + case let c: + if case .comment(var s) = currentToken { + s.append("-") + currentToken = .comment(s) + } else { + fatalError("bad current token") + } + reconsume(c) + state = .comment + return next() + } + } + + mutating func tokenizeCommentEnd() -> Token? { + switch nextChar() { + case ">": + state = .data + return takeCurrentToken() + case "!": + state = .commentEndBang + return tokenizeCommentEndBang() + case "-": + if case .comment(var s) = currentToken { + s.append("-") + currentToken = .comment(s) + return tokenizeCommentEnd() + } else { + fatalError("bad current token") + } + case nil: + // parse error: eof-in-comment + state = .endOfFile + return takeCurrentToken() + case .some(let c): + if case .comment(var s) = currentToken { + s.append("--") + currentToken = .comment(s) + } else { + fatalError("bad current token") + } + reconsume(c) + state = .comment + return tokenizeComment() + } + } + + mutating func tokenizeCommentEndBang() -> Token? { + switch nextChar() { + case "-": + if case .comment(var s) = currentToken { + s.append("--!") + currentToken = .comment(s) + state = .commentEndDash + return tokenizeCommentEndDash() + } else { + fatalError("bad current token") + } + case ">": + // parse error: incorrectly-closed-comment + state = .data + return takeCurrentToken() + case nil: + // parse error: eof-in-comment + state = .endOfFile + return takeCurrentToken() + case .some(let c): + if case .comment(var s) = currentToken { + s.append("--!") + currentToken = .comment(s) + reconsume(c) + state = .comment + return tokenizeComment() + } else { + fatalError("bad current token") + } + } + } + + mutating func tokenizeDoctype() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + state = .beforeDoctypeName + return tokenizeBeforeDoctypeName() + case ">": + reconsume(">") + state = .beforeDoctypeName + return tokenizeBeforeDoctypeName() + case nil: + // parse error: eof-in-doctype + state = .endOfFile + return .doctype("", forceQuirks: true, publicIdentifier: nil, systemIdentifier: nil) + case .some(let c): + // parse error: missing-whitespace-before-doctype-name + reconsume(c) + state = .beforeDoctypeName + return tokenizeBeforeDoctypeName() + } + } + + mutating func tokenizeBeforeDoctypeName() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + // ignore the character + return tokenizeBeforeDoctypeName() + case .some(let c) where ("A"..."Z").contains(c): + currentToken = .doctype("\(c.asciiLowercase)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil) + state = .doctypeName + return tokenizeDoctypeName() + case "\0": + // parse error: unexpected-null-character + currentToken = .doctype("\u{FFFD}", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil) + state = .doctypeName + return tokenizeDoctypeName() + case ">": + // parse error: missing-doctype-name + state = .data + return .doctype("", forceQuirks: true, publicIdentifier: nil, systemIdentifier: nil) + case nil: + // parse error: eof-in-doctype + state = .endOfFile + return .doctype("", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil) + case .some(let c): + currentToken = .doctype("\(c)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil) + state = .doctypeName + return tokenizeDoctypeName() + } + } + + mutating func tokenizeDoctypeName() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + state = .afterDoctypeName + return tokenizeAfterDoctypeName() + case ">": + state = .data + return takeCurrentToken() + case nil: + // parse error: eof-in-doctype + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case .some(var c): + if c == "\0" { + c = "\u{FFFD}" + } else if ("A"..."Z").contains(c) { + c = c.asciiLowercase + } + if case .doctype(var s, let forceQuirks, _, _) = currentToken { + s.append(c) + currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: nil, systemIdentifier: nil) + return tokenizeDoctypeName() + } else { + fatalError("bad current token") + } + } + } + + mutating func tokenizeAfterDoctypeName() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + // ignore the character + return tokenizeAfterDoctypeName() + case ">": + state = .data + return takeCurrentToken() + case nil: + // parse error: eof-in-doctype + state = .endOfFile + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case .some(let c): + reconsume(c) + let peeked = peek(count: 6).lowercased() + if peeked == "public" { + consume(count: 6) + state = .afterDoctypePublicKeyword + return tokenizeAfterDoctypePublicKeyword() + } else if peeked == "system" { + consume(count: 6) + state = .afterDoctypeSystemKeyword + return tokenizeAfterDoctypeSystemKeyword() + } else { + // parse error: invalid-character-sequence-after-doctype-name + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + state = .bogusDoctype + return tokenizeBogusDoctype() + } + } + } + + mutating func tokenizeAfterDoctypePublicKeyword() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + state = .beforeDoctypePublicIdentifier + return tokenizeBeforeDoctypePublicIdentifier() + case .some(let c) where c == "\"" || c == "'": + // parse error: missing-whitespace-after-doctype-public-keyword + if case .doctype(let s, let forceQuirks, _, _) = currentToken { + currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil) + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypePublicIdentifier(quotes) + return tokenizeDoctypePublicIdentifier(quotes: quotes) + } else { + fatalError("bad current token") + } + case ">": + // parse error: missing-doctype-public-identifier + state = .data + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case nil: + // parse error: eof-in-doctype + state = .endOfFile + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case .some(let c): + // parse error: missing-quote-before-doctype-public-identifier + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + state = .bogusDoctype + reconsume(c) + return tokenizeBogusDoctype() + } + } + + mutating func tokenizeBeforeDoctypePublicIdentifier() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + // ignore the character + return tokenizeBeforeDoctypePublicIdentifier() + case .some(let c) where c == "\"" || c == "'": + if case .doctype(let s, let forceQuirks, _, _) = currentToken { + currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil) + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypePublicIdentifier(quotes) + return tokenizeDoctypePublicIdentifier(quotes: quotes) + } else { + fatalError("bad current token") + } + case ">": + // parse error: missing-doctype-public-identifier + state = .data + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case nil: + // parse error: eof-in-doctype + state = .endOfFile + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case .some(let c): + // parse error: missing-quote-before-doctype-public-identifier + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + reconsume(c) + state = .bogusDoctype + return tokenizeBogusDoctype() + } + } + + mutating func tokenizeDoctypePublicIdentifier(quotes: DoctypeIdentifierQuotation) -> Token? { + switch nextChar() { + case "\"" where quotes == .doubleQuoted: + state = .afterDoctypePublicIdentifier + return tokenizeAfterDoctypePublicIdentifier() + case "'" where quotes == .singleQuoted: + state = .afterDoctypePublicIdentifier + return tokenizeAfterDoctypePublicIdentifier() + case ">": + // parse error: abrupt-doctype-public-identifier + reconsume(">") + state = .data + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case nil: + // parse error: eof-in-doctype + state = .endOfFile + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case .some(var c): + if c == "\0" { + // parse error: unexpected-null-character + c = "\u{FFFD}" + } + if case .doctype(let s, let forceQuirks, var publicIdentifier, _) = currentToken { + publicIdentifier!.append(c) + currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: nil) + return tokenizeDoctypePublicIdentifier(quotes: quotes) + } else { + fatalError("bad current token") + } + } + } + + mutating func tokenizeAfterDoctypePublicIdentifier() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + state = .betweenDoctypePublicAndSystemIdentifiers + return tokenizeBetweenDoctypePublicAndSystemIdentifiers() + case ">": + state = .data + return takeCurrentToken() + case .some(let c) where c == "\"" || c == "'": + // parse error: missing-whitespace-between-doctype-public-and-system-identifiers + if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken { + currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "") + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypeSystemIdentifier(quotes) + return tokenizeDoctypeSystemIdentifier(quotes: quotes) + } else { + fatalError("bad current token") + } + case nil: + // parse error: eof-in-doctype + state = .endOfFile + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case .some(let c): + // parse error: missing-quote-before-doctype-system-identifier + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + reconsume(c) + state = .bogusDoctype + return tokenizeBogusDoctype() + } + } + + mutating func tokenizeBetweenDoctypePublicAndSystemIdentifiers() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + // ignore the character + return tokenizeBetweenDoctypePublicAndSystemIdentifiers() + case ">": + state = .data + return takeCurrentToken() + case .some(let c) where c == "\"" || c == "'": + if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken { + currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "") + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypeSystemIdentifier(quotes) + return tokenizeDoctypeSystemIdentifier(quotes: quotes) + } else { + fatalError("bad current token") + } + case nil: + // parse error: eof-in-doctype + state = .endOfFile + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case .some(let c): + // parse error: missing-quote-before-doctype-system-identifier + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + reconsume(c) + state = .bogusComment + return tokenizeBogusComment() + } + } + + mutating func tokenizeAfterDoctypeSystemKeyword() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + state = .beforeDoctypeSystemIdentifier + return tokenizeBeforeDoctypeSystemIdentifier() + case .some(let c) where c == "\"" || c == "'": + if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken { + currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "") + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypeSystemIdentifier(quotes) + return tokenizeDoctypeSystemIdentifier(quotes: quotes) + } else { + fatalError("bad current token") + } + case ">": + // parse error: missing-doctype-system-identifier + state = .data + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case nil: + // parse error: eof-in-doctype: + state = .endOfFile + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case .some(let c): + // parse error: missing-quote-before-doctype-system-identifier + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + reconsume(c) + state = .bogusDoctype + return tokenizeBogusDoctype() + } + } + + mutating func tokenizeBeforeDoctypeSystemIdentifier() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + // ignore the character + return tokenizeBeforeDoctypeSystemIdentifier() + case .some(let c) where c == "\"" || c == "'": + if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken { + currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: " ") + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypeSystemIdentifier(quotes) + return tokenizeDoctypeSystemIdentifier(quotes: quotes) + } else { + fatalError("bad current token") + } + case ">": + // parse error: missing-doctype-system-identifier + state = .data + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case nil: + // parse error: eof-in-doctype: + state = .endOfFile + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case .some(let c): + // parse error: missing-quote-before-doctype-system-identifier + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + reconsume(c) + state = .bogusDoctype + return tokenizeBogusDoctype() + } + } + + mutating func tokenizeDoctypeSystemIdentifier(quotes: DoctypeIdentifierQuotation) -> Token? { + switch nextChar() { + case "\"" where quotes == .doubleQuoted: + state = .afterDoctypeSystemIdentifier + return tokenizeAfterDoctypeSystemIdentifier() + case "'" where quotes == .singleQuoted: + state = .afterDoctypeSystemIdentifier + return tokenizeAfterDoctypeSystemIdentifier() + case ">": + // parse error: abrupt-doctype-system-identifier + state = .data + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case nil: + // parse error: eof-in-doctype + state = .endOfFile + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case .some(var c): + if c == "\0" { + // parse error: unexpected-null-character + c = "\u{FFFD}" + } + if case .doctype(let s, let forceQuirks, let publicIdentifier, var systemIdentifier) = currentToken { + systemIdentifier!.append(c) + currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + return tokenizeDoctypeSystemIdentifier(quotes: quotes) + } else { + fatalError("bad current token") + } + } + } + + mutating func tokenizeAfterDoctypeSystemIdentifier() -> Token? { + switch nextChar() { + case "\t", "\n", "\u{000C}", " ": + // ignore the character + return tokenizeAfterDoctypeSystemIdentifier() + case ">": + state = .data + return takeCurrentToken() + case nil: + // parse error: eof-in-doctype + state = .endOfFile + if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { + currentToken = nil + return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) + } else { + fatalError("bad current token") + } + case .some(let c): + // parse error: unexpected-character-after-doctype-system-identifier + // Note: This does not set the current DOCTYPE token's force-quirks flag to on. + reconsume(c) + state = .bogusDoctype + return tokenizeBogusDoctype() + } + } + + mutating func tokenizeBogusDoctype() -> Token? { + switch nextChar() { + case ">": + state = .data + return takeCurrentToken() + case "\0": + // parse error: unexpected-null-character, ignore the character + return tokenizeBogusDoctype() + case nil: + state = .endOfFile + return takeCurrentToken() + case _: + // ignore the character + return tokenizeBogusDoctype() + } + } +} + +private extension Character { + var asciiLowercase: Character { + assert(("A"..."Z").contains(self)) + return Character(Unicode.Scalar(asciiValue! + 0x20)) + } +} diff --git a/Tests/HTMLStreamerTests/InlineArray3Tests.swift b/Tests/HTMLStreamerTests/InlineArray3Tests.swift new file mode 100644 index 0000000..9cbb470 --- /dev/null +++ b/Tests/HTMLStreamerTests/InlineArray3Tests.swift @@ -0,0 +1,38 @@ +// +// InlineArray3Tests.swift +// +// +// Created by Shadowfacts on 11/19/23. +// + +import XCTest +@testable import HTMLStreamer + +final class InlineArray3Tests: XCTestCase { + + func testReplaceSubrange() { + // same size + var a: InlineArray3 = [0, 1, 2] + a.replaceSubrange(0..<2, with: [3, 4]) + XCTAssertEqual(a, [3, 4, 2]) + + // grow + a = [0, 1] + a.replaceSubrange(1..<2, with: [2, 3]) + XCTAssertEqual(a, [0, 2, 3]) + + // shrink + a = [0, 1, 2] + a.replaceSubrange(0..<2, with: []) + XCTAssertEqual(a, [2]) + a.removeFirst() + XCTAssertEqual(a, []) + } + + func testRemoveLast() { + var a: InlineArray3 = [0, 1, 2] + a.removeLast(2) + XCTAssertEqual(a, [0]) + } + +} diff --git a/Tests/HTMLStreamerTests/TokenizerTests.swift b/Tests/HTMLStreamerTests/TokenizerTests.swift new file mode 100644 index 0000000..dd6b6dd --- /dev/null +++ b/Tests/HTMLStreamerTests/TokenizerTests.swift @@ -0,0 +1,79 @@ +// +// TokenizerTests.swift +// +// +// Created by Shadowfacts on 11/22/23. +// + +import XCTest +@testable import HTMLStreamer + +final class TokenizerTests: XCTestCase { + + private func tokenize(_ s: String) -> [Token] { + let iterator = Tokenizer(chars: s.makeIterator()) +// let iterator = PrintIterator(inner: Tokenizer(chars: s.makeIterator())) + return Array(AnySequence({ iterator })) + } + + func testNamedCharacterReferences() { + XCTAssertEqual(tokenize("&"), [.character("&")]) + // missing-semicolon-after-character-reference: + XCTAssertEqual(tokenize("¬in"), [.character("¬"), .character("i"), .character("n")]) + XCTAssertEqual(tokenize("¬in"), [.character("∉")]) + // unknown-named-character-reference: + XCTAssertEqual(tokenize("&asdf"), "&asdf".map { .character($0) }) + } + + func testNumericCharacterReference() { + XCTAssertEqual(tokenize("!"), [.character("!")]) + XCTAssertEqual(tokenize("!"), [.character("!")]) + } + + func testStartTag() { + XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: false, attributes: [])]) + XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [])]) + XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [])]) + + // double-quoted attributes + XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])]) + XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "&")])]) + + // single-quoted attributes + XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])]) + XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: " ")])]) + + // unquoted attributes + XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])]) + XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: " ")])]) + } + + func testEndTag() { + XCTAssertEqual(tokenize(""), [.endTag("asdf")]) + XCTAssertEqual(tokenize(""), [.endTag("asdf")]) + } + + func testComment() { + XCTAssertEqual(tokenize(""), [.comment(" hello ")]) + XCTAssertEqual(tokenize(""), [.comment("- hello --")]) + XCTAssertEqual(tokenize("