2018-06-11 22:12:01 +00:00
|
|
|
#include <string>
|
|
|
|
#include <unordered_map>
|
|
|
|
|
|
|
|
using std::string;
|
|
|
|
using std::unordered_map;
|
|
|
|
|
2018-06-15 22:34:03 +00:00
|
|
|
enum TagType {
|
2018-06-11 22:12:01 +00:00
|
|
|
AREA,
|
|
|
|
BASE,
|
|
|
|
BASEFONT,
|
|
|
|
BGSOUND,
|
|
|
|
BR,
|
|
|
|
COL,
|
|
|
|
COMMAND,
|
|
|
|
EMBED,
|
|
|
|
FRAME,
|
|
|
|
HR,
|
|
|
|
IMAGE,
|
|
|
|
IMG,
|
|
|
|
INPUT,
|
|
|
|
ISINDEX,
|
|
|
|
KEYGEN,
|
|
|
|
LINK,
|
|
|
|
MENUITEM,
|
|
|
|
META,
|
|
|
|
NEXTID,
|
|
|
|
PARAM,
|
|
|
|
SOURCE,
|
|
|
|
TRACK,
|
|
|
|
WBR,
|
|
|
|
END_OF_VOID_TAGS,
|
|
|
|
|
|
|
|
A,
|
|
|
|
ABBR,
|
|
|
|
ADDRESS,
|
|
|
|
ARTICLE,
|
|
|
|
ASIDE,
|
|
|
|
AUDIO,
|
|
|
|
B,
|
|
|
|
BDI,
|
|
|
|
BDO,
|
|
|
|
BLOCKQUOTE,
|
|
|
|
BODY,
|
|
|
|
BUTTON,
|
|
|
|
CANVAS,
|
|
|
|
CAPTION,
|
|
|
|
CITE,
|
|
|
|
CODE,
|
|
|
|
COLGROUP,
|
|
|
|
DATA,
|
|
|
|
DATALIST,
|
|
|
|
DD,
|
|
|
|
DEL,
|
|
|
|
DETAILS,
|
|
|
|
DFN,
|
|
|
|
DIALOG,
|
|
|
|
DIV,
|
|
|
|
DL,
|
|
|
|
DT,
|
|
|
|
EM,
|
|
|
|
FIELDSET,
|
|
|
|
FIGCAPTION,
|
|
|
|
FIGURE,
|
|
|
|
FOOTER,
|
|
|
|
FORM,
|
|
|
|
H1,
|
|
|
|
H2,
|
|
|
|
H3,
|
|
|
|
H4,
|
|
|
|
H5,
|
|
|
|
H6,
|
|
|
|
HEAD,
|
|
|
|
HEADER,
|
|
|
|
HGROUP,
|
|
|
|
HTML,
|
|
|
|
I,
|
|
|
|
IFRAME,
|
|
|
|
INS,
|
|
|
|
KBD,
|
|
|
|
LABEL,
|
|
|
|
LEGEND,
|
|
|
|
LI,
|
|
|
|
MAIN,
|
|
|
|
MAP,
|
|
|
|
MARK,
|
|
|
|
MATH,
|
|
|
|
MENU,
|
|
|
|
METER,
|
|
|
|
NAV,
|
|
|
|
NOSCRIPT,
|
|
|
|
OBJECT,
|
|
|
|
OL,
|
|
|
|
OPTGROUP,
|
|
|
|
OPTION,
|
|
|
|
OUTPUT,
|
|
|
|
P,
|
|
|
|
PICTURE,
|
|
|
|
PRE,
|
|
|
|
PROGRESS,
|
|
|
|
Q,
|
|
|
|
RB,
|
|
|
|
RP,
|
|
|
|
RT,
|
|
|
|
RTC,
|
|
|
|
RUBY,
|
|
|
|
S,
|
|
|
|
SAMP,
|
|
|
|
SCRIPT,
|
|
|
|
SECTION,
|
|
|
|
SELECT,
|
|
|
|
SLOT,
|
|
|
|
SMALL,
|
|
|
|
SPAN,
|
|
|
|
STRONG,
|
|
|
|
STYLE,
|
|
|
|
SUB,
|
|
|
|
SUMMARY,
|
|
|
|
SUP,
|
|
|
|
SVG,
|
|
|
|
TABLE,
|
|
|
|
TBODY,
|
|
|
|
TD,
|
|
|
|
TEMPLATE,
|
|
|
|
TEXTAREA,
|
|
|
|
TFOOT,
|
|
|
|
TH,
|
|
|
|
THEAD,
|
|
|
|
TIME,
|
|
|
|
TITLE,
|
|
|
|
TR,
|
|
|
|
U,
|
|
|
|
UL,
|
|
|
|
VAR,
|
|
|
|
VIDEO,
|
|
|
|
|
|
|
|
CUSTOM,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const unordered_map<string, TagType> TAG_TYPES_BY_TAG_NAME = {
|
|
|
|
{"AREA", AREA},
|
|
|
|
{"BASE", BASE},
|
|
|
|
{"BASEFONT", BASEFONT},
|
|
|
|
{"BGSOUND", BGSOUND},
|
|
|
|
{"BR", BR},
|
|
|
|
{"COL", COL},
|
|
|
|
{"COMMAND", COMMAND},
|
|
|
|
{"EMBED", EMBED},
|
|
|
|
{"FRAME", FRAME},
|
|
|
|
{"HR", HR},
|
|
|
|
{"IMAGE", IMAGE},
|
|
|
|
{"IMG", IMG},
|
|
|
|
{"INPUT", INPUT},
|
|
|
|
{"ISINDEX", ISINDEX},
|
|
|
|
{"KEYGEN", KEYGEN},
|
|
|
|
{"LINK", LINK},
|
|
|
|
{"MENUITEM", MENUITEM},
|
|
|
|
{"META", META},
|
|
|
|
{"NEXTID", NEXTID},
|
|
|
|
{"PARAM", PARAM},
|
|
|
|
{"SOURCE", SOURCE},
|
|
|
|
{"TRACK", TRACK},
|
|
|
|
{"WBR", WBR},
|
|
|
|
{"A", A},
|
|
|
|
{"ABBR", ABBR},
|
|
|
|
{"ADDRESS", ADDRESS},
|
|
|
|
{"ARTICLE", ARTICLE},
|
|
|
|
{"ASIDE", ASIDE},
|
|
|
|
{"AUDIO", AUDIO},
|
|
|
|
{"B", B},
|
|
|
|
{"BDI", BDI},
|
|
|
|
{"BDO", BDO},
|
|
|
|
{"BLOCKQUOTE", BLOCKQUOTE},
|
|
|
|
{"BODY", BODY},
|
|
|
|
{"BUTTON", BUTTON},
|
|
|
|
{"CANVAS", CANVAS},
|
|
|
|
{"CAPTION", CAPTION},
|
|
|
|
{"CITE", CITE},
|
|
|
|
{"CODE", CODE},
|
|
|
|
{"COLGROUP", COLGROUP},
|
|
|
|
{"DATA", DATA},
|
|
|
|
{"DATALIST", DATALIST},
|
|
|
|
{"DD", DD},
|
|
|
|
{"DEL", DEL},
|
|
|
|
{"DETAILS", DETAILS},
|
|
|
|
{"DFN", DFN},
|
|
|
|
{"DIALOG", DIALOG},
|
|
|
|
{"DIV", DIV},
|
|
|
|
{"DL", DL},
|
|
|
|
{"DT", DT},
|
|
|
|
{"EM", EM},
|
|
|
|
{"FIELDSET", FIELDSET},
|
|
|
|
{"FIGCAPTION", FIGCAPTION},
|
|
|
|
{"FIGURE", FIGURE},
|
|
|
|
{"FOOTER", FOOTER},
|
|
|
|
{"FORM", FORM},
|
|
|
|
{"H1", H1},
|
|
|
|
{"H2", H2},
|
|
|
|
{"H3", H3},
|
|
|
|
{"H4", H4},
|
|
|
|
{"H5", H5},
|
|
|
|
{"H6", H6},
|
|
|
|
{"HEAD", HEAD},
|
|
|
|
{"HEADER", HEADER},
|
|
|
|
{"HGROUP", HGROUP},
|
|
|
|
{"HTML", HTML},
|
|
|
|
{"I", I},
|
|
|
|
{"IFRAME", IFRAME},
|
|
|
|
{"INS", INS},
|
|
|
|
{"KBD", KBD},
|
|
|
|
{"LABEL", LABEL},
|
|
|
|
{"LEGEND", LEGEND},
|
|
|
|
{"LI", LI},
|
|
|
|
{"MAIN", MAIN},
|
|
|
|
{"MAP", MAP},
|
|
|
|
{"MARK", MARK},
|
|
|
|
{"MATH", MATH},
|
|
|
|
{"MENU", MENU},
|
|
|
|
{"METER", METER},
|
|
|
|
{"NAV", NAV},
|
|
|
|
{"NOSCRIPT", NOSCRIPT},
|
|
|
|
{"OBJECT", OBJECT},
|
|
|
|
{"OL", OL},
|
|
|
|
{"OPTGROUP", OPTGROUP},
|
|
|
|
{"OPTION", OPTION},
|
|
|
|
{"OUTPUT", OUTPUT},
|
|
|
|
{"P", P},
|
|
|
|
{"PICTURE", PICTURE},
|
|
|
|
{"PRE", PRE},
|
|
|
|
{"PROGRESS", PROGRESS},
|
|
|
|
{"Q", Q},
|
|
|
|
{"RB", RB},
|
|
|
|
{"RP", RP},
|
|
|
|
{"RT", RT},
|
|
|
|
{"RTC", RTC},
|
|
|
|
{"RUBY", RUBY},
|
|
|
|
{"S", S},
|
|
|
|
{"SAMP", SAMP},
|
|
|
|
{"SCRIPT", SCRIPT},
|
|
|
|
{"SECTION", SECTION},
|
|
|
|
{"SELECT", SELECT},
|
|
|
|
{"SLOT", SLOT},
|
|
|
|
{"SMALL", SMALL},
|
|
|
|
{"SPAN", SPAN},
|
|
|
|
{"STRONG", STRONG},
|
|
|
|
{"STYLE", STYLE},
|
|
|
|
{"SUB", SUB},
|
|
|
|
{"SUMMARY", SUMMARY},
|
|
|
|
{"SUP", SUP},
|
|
|
|
{"SVG", SVG},
|
|
|
|
{"TABLE", TABLE},
|
|
|
|
{"TBODY", TBODY},
|
|
|
|
{"TD", TD},
|
|
|
|
{"TEMPLATE", TEMPLATE},
|
|
|
|
{"TEXTAREA", TEXTAREA},
|
|
|
|
{"TFOOT", TFOOT},
|
|
|
|
{"TH", TH},
|
|
|
|
{"THEAD", THEAD},
|
|
|
|
{"TIME", TIME},
|
|
|
|
{"TITLE", TITLE},
|
|
|
|
{"TR", TR},
|
|
|
|
{"U", U},
|
|
|
|
{"UL", UL},
|
|
|
|
{"VAR", VAR},
|
|
|
|
{"VIDEO", VIDEO},
|
|
|
|
};
|
|
|
|
|
2018-06-12 20:49:54 +00:00
|
|
|
static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = {
|
|
|
|
ADDRESS,
|
|
|
|
ARTICLE,
|
|
|
|
ASIDE,
|
|
|
|
BLOCKQUOTE,
|
|
|
|
DETAILS,
|
|
|
|
DIV,
|
|
|
|
DL,
|
|
|
|
FIELDSET,
|
|
|
|
FIGCAPTION,
|
|
|
|
FIGURE,
|
|
|
|
FOOTER,
|
|
|
|
FORM,
|
|
|
|
H1,
|
|
|
|
H2,
|
|
|
|
H3,
|
|
|
|
H4,
|
|
|
|
H5,
|
|
|
|
H6,
|
|
|
|
HEADER,
|
|
|
|
HR,
|
|
|
|
MAIN,
|
|
|
|
NAV,
|
|
|
|
OL,
|
|
|
|
P,
|
|
|
|
PRE,
|
|
|
|
SECTION,
|
2018-06-12 17:51:03 +00:00
|
|
|
};
|
|
|
|
|
2018-06-12 20:49:54 +00:00
|
|
|
static const TagType *TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END = (
|
|
|
|
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS +
|
|
|
|
sizeof(TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS) /
|
|
|
|
sizeof(TagType)
|
|
|
|
);
|
|
|
|
|
2018-06-11 22:12:01 +00:00
|
|
|
struct Tag {
|
|
|
|
TagType type;
|
|
|
|
string custom_tag_name;
|
|
|
|
|
2018-06-15 22:33:32 +00:00
|
|
|
Tag(TagType type, const string &name) : type(type), custom_tag_name(name) {}
|
|
|
|
|
2018-06-11 22:12:01 +00:00
|
|
|
bool operator==(const Tag &other) const {
|
|
|
|
if (type != other.type) return false;
|
2018-06-15 22:34:03 +00:00
|
|
|
if (type == CUSTOM && custom_tag_name != other.custom_tag_name) return false;
|
2018-06-11 22:12:01 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool is_void() const {
|
|
|
|
return type < END_OF_VOID_TAGS;
|
|
|
|
}
|
|
|
|
|
2018-06-11 23:56:33 +00:00
|
|
|
inline bool is_raw() const {
|
|
|
|
return type == SCRIPT || type == STYLE;
|
|
|
|
}
|
|
|
|
|
2018-06-12 17:51:03 +00:00
|
|
|
inline bool can_contain(const Tag &tag) {
|
|
|
|
TagType child = tag.type;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case LI: return child != LI;
|
|
|
|
|
|
|
|
case DT:
|
|
|
|
case DD:
|
|
|
|
return child != DT && child != DD;
|
|
|
|
|
|
|
|
case P:
|
2018-06-12 20:49:54 +00:00
|
|
|
return std::find(
|
|
|
|
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS,
|
|
|
|
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END,
|
|
|
|
tag.type
|
|
|
|
) == TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END;
|
2018-06-12 17:51:03 +00:00
|
|
|
|
|
|
|
case COLGROUP:
|
|
|
|
return child == COL;
|
|
|
|
|
|
|
|
case RB:
|
|
|
|
case RT:
|
|
|
|
case RP:
|
|
|
|
return child != RB && child != RT && child != RP;
|
|
|
|
|
|
|
|
case OPTGROUP:
|
|
|
|
return child != OPTGROUP;
|
|
|
|
|
|
|
|
case TR:
|
|
|
|
return child != TR;
|
|
|
|
|
|
|
|
case TD:
|
|
|
|
case TH:
|
|
|
|
return child != TD && child != TH && child != TR;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2018-06-11 22:12:01 +00:00
|
|
|
|
2018-06-12 20:49:54 +00:00
|
|
|
static inline Tag for_name(const string &name) {
|
2018-06-15 22:32:21 +00:00
|
|
|
unordered_map<string, TagType>::const_iterator type = TAG_TYPES_BY_TAG_NAME.find(name);
|
2018-06-11 22:12:01 +00:00
|
|
|
if (type != TAG_TYPES_BY_TAG_NAME.end()) {
|
2018-06-15 22:33:32 +00:00
|
|
|
return Tag(type->second, string());
|
|
|
|
} else {
|
|
|
|
return Tag(CUSTOM, name);
|
2018-06-11 22:12:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|