#include #include using std::string; using std::unordered_map; enum TagType : char { AREA, BASE, BASEFONT, BGSOUND, BR, COL, COMMAND, EMBED, FRAME, HR, IMAGE, IMG, INPUT, ISINDEX, KEYGEN, LINK, MENUITEM, META, NEXTID, PARAM, SOURCE, TRACK, WBR, END_OF_VOID_TAGS, A, ABBR, ADDRESS, ARTICLE, ASIDE, AUDIO, B, BDI, BDO, BLOCKQUOTE, BODY, BUTTON, CANVAS, CAPTION, CITE, CODE, COLGROUP, DATA, DATALIST, DD, DEL, DETAILS, DFN, DIALOG, DIV, DL, DT, EM, FIELDSET, FIGCAPTION, FIGURE, FOOTER, FORM, H1, H2, H3, H4, H5, H6, HEAD, HEADER, HGROUP, HTML, I, IFRAME, INS, KBD, LABEL, LEGEND, LI, MAIN, MAP, MARK, MATH, MENU, METER, NAV, NOSCRIPT, OBJECT, OL, OPTGROUP, OPTION, OUTPUT, P, PICTURE, PRE, PROGRESS, Q, RB, RP, RT, RTC, RUBY, S, SAMP, SCRIPT, SECTION, SELECT, SLOT, SMALL, SPAN, STRONG, STYLE, SUB, SUMMARY, SUP, SVG, TABLE, TBODY, TD, TEMPLATE, TEXTAREA, TFOOT, TH, THEAD, TIME, TITLE, TR, U, UL, VAR, VIDEO, CUSTOM, }; static const unordered_map TAG_TYPES_BY_TAG_NAME = { {"AREA", AREA}, {"BASE", BASE}, {"BASEFONT", BASEFONT}, {"BGSOUND", BGSOUND}, {"BR", BR}, {"COL", COL}, {"COMMAND", COMMAND}, {"EMBED", EMBED}, {"FRAME", FRAME}, {"HR", HR}, {"IMAGE", IMAGE}, {"IMG", IMG}, {"INPUT", INPUT}, {"ISINDEX", ISINDEX}, {"KEYGEN", KEYGEN}, {"LINK", LINK}, {"MENUITEM", MENUITEM}, {"META", META}, {"NEXTID", NEXTID}, {"PARAM", PARAM}, {"SOURCE", SOURCE}, {"TRACK", TRACK}, {"WBR", WBR}, {"A", A}, {"ABBR", ABBR}, {"ADDRESS", ADDRESS}, {"ARTICLE", ARTICLE}, {"ASIDE", ASIDE}, {"AUDIO", AUDIO}, {"B", B}, {"BDI", BDI}, {"BDO", BDO}, {"BLOCKQUOTE", BLOCKQUOTE}, {"BODY", BODY}, {"BUTTON", BUTTON}, {"CANVAS", CANVAS}, {"CAPTION", CAPTION}, {"CITE", CITE}, {"CODE", CODE}, {"COLGROUP", COLGROUP}, {"DATA", DATA}, {"DATALIST", DATALIST}, {"DD", DD}, {"DEL", DEL}, {"DETAILS", DETAILS}, {"DFN", DFN}, {"DIALOG", DIALOG}, {"DIV", DIV}, {"DL", DL}, {"DT", DT}, {"EM", EM}, {"FIELDSET", FIELDSET}, {"FIGCAPTION", FIGCAPTION}, {"FIGURE", FIGURE}, {"FOOTER", FOOTER}, {"FORM", FORM}, {"H1", H1}, {"H2", H2}, {"H3", H3}, {"H4", H4}, {"H5", H5}, {"H6", H6}, {"HEAD", HEAD}, {"HEADER", HEADER}, {"HGROUP", HGROUP}, {"HTML", HTML}, {"I", I}, {"IFRAME", IFRAME}, {"INS", INS}, {"KBD", KBD}, {"LABEL", LABEL}, {"LEGEND", LEGEND}, {"LI", LI}, {"MAIN", MAIN}, {"MAP", MAP}, {"MARK", MARK}, {"MATH", MATH}, {"MENU", MENU}, {"METER", METER}, {"NAV", NAV}, {"NOSCRIPT", NOSCRIPT}, {"OBJECT", OBJECT}, {"OL", OL}, {"OPTGROUP", OPTGROUP}, {"OPTION", OPTION}, {"OUTPUT", OUTPUT}, {"P", P}, {"PICTURE", PICTURE}, {"PRE", PRE}, {"PROGRESS", PROGRESS}, {"Q", Q}, {"RB", RB}, {"RP", RP}, {"RT", RT}, {"RTC", RTC}, {"RUBY", RUBY}, {"S", S}, {"SAMP", SAMP}, {"SCRIPT", SCRIPT}, {"SECTION", SECTION}, {"SELECT", SELECT}, {"SLOT", SLOT}, {"SMALL", SMALL}, {"SPAN", SPAN}, {"STRONG", STRONG}, {"STYLE", STYLE}, {"SUB", SUB}, {"SUMMARY", SUMMARY}, {"SUP", SUP}, {"SVG", SVG}, {"TABLE", TABLE}, {"TBODY", TBODY}, {"TD", TD}, {"TEMPLATE", TEMPLATE}, {"TEXTAREA", TEXTAREA}, {"TFOOT", TFOOT}, {"TH", TH}, {"THEAD", THEAD}, {"TIME", TIME}, {"TITLE", TITLE}, {"TR", TR}, {"U", U}, {"UL", UL}, {"VAR", VAR}, {"VIDEO", VIDEO}, }; static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = { ADDRESS, ARTICLE, ASIDE, BLOCKQUOTE, DETAILS, DIV, DL, FIELDSET, FIGCAPTION, FIGURE, FOOTER, FORM, H1, H2, H3, H4, H5, H6, HEADER, HR, MAIN, NAV, OL, P, PRE, SECTION, }; static const TagType *TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END = ( TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS + sizeof(TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS) / sizeof(TagType) ); struct Tag { TagType type; string custom_tag_name; bool operator==(const Tag &other) const { if (type != other.type) return false; if (type == TagType::CUSTOM && custom_tag_name != other.custom_tag_name) return false; return true; } inline bool is_void() const { return type < END_OF_VOID_TAGS; } inline bool is_raw() const { return type == SCRIPT || type == STYLE; } inline bool can_contain(const Tag &tag) { TagType child = tag.type; switch (type) { case LI: return child != LI; case DT: case DD: return child != DT && child != DD; case P: return std::find( TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS, TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END, tag.type ) == TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END; case COLGROUP: return child == COL; case RB: case RT: case RP: return child != RB && child != RT && child != RP; case OPTGROUP: return child != OPTGROUP; case TR: return child != TR; case TD: case TH: return child != TD && child != TH && child != TR; default: return true; } } static inline Tag for_name(const string &name) { auto type = TAG_TYPES_BY_TAG_NAME.find(name); if (type != TAG_TYPES_BY_TAG_NAME.end()) { return Tag { type->second, "" }; } return Tag { CUSTOM, name }; } };