2018-06-11 22:12:01 +00:00
|
|
|
#include <string>
|
2018-06-18 17:05:44 +00:00
|
|
|
#include <map>
|
2018-06-11 22:12:01 +00:00
|
|
|
|
|
|
|
using std::string;
|
2018-06-18 17:05:44 +00:00
|
|
|
using std::map;
|
2018-06-11 22:12:01 +00:00
|
|
|
|
2018-06-15 22:34:03 +00:00
|
|
|
enum TagType {
|
2018-06-11 22:12:01 +00:00
|
|
|
AREA,
|
|
|
|
BASE,
|
|
|
|
BASEFONT,
|
|
|
|
BGSOUND,
|
|
|
|
BR,
|
|
|
|
COL,
|
|
|
|
COMMAND,
|
|
|
|
EMBED,
|
|
|
|
FRAME,
|
|
|
|
HR,
|
|
|
|
IMAGE,
|
|
|
|
IMG,
|
|
|
|
INPUT,
|
|
|
|
ISINDEX,
|
|
|
|
KEYGEN,
|
|
|
|
LINK,
|
|
|
|
MENUITEM,
|
|
|
|
META,
|
|
|
|
NEXTID,
|
|
|
|
PARAM,
|
|
|
|
SOURCE,
|
|
|
|
TRACK,
|
|
|
|
WBR,
|
|
|
|
END_OF_VOID_TAGS,
|
|
|
|
|
|
|
|
A,
|
|
|
|
ABBR,
|
|
|
|
ADDRESS,
|
|
|
|
ARTICLE,
|
|
|
|
ASIDE,
|
|
|
|
AUDIO,
|
|
|
|
B,
|
|
|
|
BDI,
|
|
|
|
BDO,
|
|
|
|
BLOCKQUOTE,
|
|
|
|
BODY,
|
|
|
|
BUTTON,
|
|
|
|
CANVAS,
|
|
|
|
CAPTION,
|
|
|
|
CITE,
|
|
|
|
CODE,
|
|
|
|
COLGROUP,
|
|
|
|
DATA,
|
|
|
|
DATALIST,
|
|
|
|
DD,
|
|
|
|
DEL,
|
|
|
|
DETAILS,
|
|
|
|
DFN,
|
|
|
|
DIALOG,
|
|
|
|
DIV,
|
|
|
|
DL,
|
|
|
|
DT,
|
|
|
|
EM,
|
|
|
|
FIELDSET,
|
|
|
|
FIGCAPTION,
|
|
|
|
FIGURE,
|
|
|
|
FOOTER,
|
|
|
|
FORM,
|
|
|
|
H1,
|
|
|
|
H2,
|
|
|
|
H3,
|
|
|
|
H4,
|
|
|
|
H5,
|
|
|
|
H6,
|
|
|
|
HEAD,
|
|
|
|
HEADER,
|
|
|
|
HGROUP,
|
|
|
|
HTML,
|
|
|
|
I,
|
|
|
|
IFRAME,
|
|
|
|
INS,
|
|
|
|
KBD,
|
|
|
|
LABEL,
|
|
|
|
LEGEND,
|
|
|
|
LI,
|
|
|
|
MAIN,
|
|
|
|
MAP,
|
|
|
|
MARK,
|
|
|
|
MATH,
|
|
|
|
MENU,
|
|
|
|
METER,
|
|
|
|
NAV,
|
|
|
|
NOSCRIPT,
|
|
|
|
OBJECT,
|
|
|
|
OL,
|
|
|
|
OPTGROUP,
|
|
|
|
OPTION,
|
|
|
|
OUTPUT,
|
|
|
|
P,
|
|
|
|
PICTURE,
|
|
|
|
PRE,
|
|
|
|
PROGRESS,
|
|
|
|
Q,
|
|
|
|
RB,
|
|
|
|
RP,
|
|
|
|
RT,
|
|
|
|
RTC,
|
|
|
|
RUBY,
|
|
|
|
S,
|
|
|
|
SAMP,
|
|
|
|
SCRIPT,
|
|
|
|
SECTION,
|
|
|
|
SELECT,
|
|
|
|
SLOT,
|
|
|
|
SMALL,
|
|
|
|
SPAN,
|
|
|
|
STRONG,
|
|
|
|
STYLE,
|
|
|
|
SUB,
|
|
|
|
SUMMARY,
|
|
|
|
SUP,
|
|
|
|
SVG,
|
|
|
|
TABLE,
|
|
|
|
TBODY,
|
|
|
|
TD,
|
|
|
|
TEMPLATE,
|
|
|
|
TEXTAREA,
|
|
|
|
TFOOT,
|
|
|
|
TH,
|
|
|
|
THEAD,
|
|
|
|
TIME,
|
|
|
|
TITLE,
|
|
|
|
TR,
|
|
|
|
U,
|
|
|
|
UL,
|
|
|
|
VAR,
|
|
|
|
VIDEO,
|
|
|
|
|
|
|
|
CUSTOM,
|
|
|
|
};
|
|
|
|
|
2018-06-15 22:34:56 +00:00
|
|
|
|
2018-06-18 17:05:44 +00:00
|
|
|
static const map<string, TagType> get_tag_map() {
|
|
|
|
map<string, TagType> result;
|
2018-06-15 22:34:56 +00:00
|
|
|
#define TAG(name) result[#name] = name
|
|
|
|
TAG(AREA);
|
|
|
|
TAG(BASE);
|
|
|
|
TAG(BASEFONT);
|
|
|
|
TAG(BGSOUND);
|
|
|
|
TAG(BR);
|
|
|
|
TAG(COL);
|
|
|
|
TAG(COMMAND);
|
|
|
|
TAG(EMBED);
|
|
|
|
TAG(FRAME);
|
|
|
|
TAG(HR);
|
|
|
|
TAG(IMAGE);
|
|
|
|
TAG(IMG);
|
|
|
|
TAG(INPUT);
|
|
|
|
TAG(ISINDEX);
|
|
|
|
TAG(KEYGEN);
|
|
|
|
TAG(LINK);
|
|
|
|
TAG(MENUITEM);
|
|
|
|
TAG(META);
|
|
|
|
TAG(NEXTID);
|
|
|
|
TAG(PARAM);
|
|
|
|
TAG(SOURCE);
|
|
|
|
TAG(TRACK);
|
|
|
|
TAG(WBR);
|
|
|
|
TAG(A);
|
|
|
|
TAG(ABBR);
|
|
|
|
TAG(ADDRESS);
|
|
|
|
TAG(ARTICLE);
|
|
|
|
TAG(ASIDE);
|
|
|
|
TAG(AUDIO);
|
|
|
|
TAG(B);
|
|
|
|
TAG(BDI);
|
|
|
|
TAG(BDO);
|
|
|
|
TAG(BLOCKQUOTE);
|
|
|
|
TAG(BODY);
|
|
|
|
TAG(BUTTON);
|
|
|
|
TAG(CANVAS);
|
|
|
|
TAG(CAPTION);
|
|
|
|
TAG(CITE);
|
|
|
|
TAG(CODE);
|
|
|
|
TAG(COLGROUP);
|
|
|
|
TAG(DATA);
|
|
|
|
TAG(DATALIST);
|
|
|
|
TAG(DD);
|
|
|
|
TAG(DEL);
|
|
|
|
TAG(DETAILS);
|
|
|
|
TAG(DFN);
|
|
|
|
TAG(DIALOG);
|
|
|
|
TAG(DIV);
|
|
|
|
TAG(DL);
|
|
|
|
TAG(DT);
|
|
|
|
TAG(EM);
|
|
|
|
TAG(FIELDSET);
|
|
|
|
TAG(FIGCAPTION);
|
|
|
|
TAG(FIGURE);
|
|
|
|
TAG(FOOTER);
|
|
|
|
TAG(FORM);
|
|
|
|
TAG(H1);
|
|
|
|
TAG(H2);
|
|
|
|
TAG(H3);
|
|
|
|
TAG(H4);
|
|
|
|
TAG(H5);
|
|
|
|
TAG(H6);
|
|
|
|
TAG(HEAD);
|
|
|
|
TAG(HEADER);
|
|
|
|
TAG(HGROUP);
|
|
|
|
TAG(HTML);
|
|
|
|
TAG(I);
|
|
|
|
TAG(IFRAME);
|
|
|
|
TAG(INS);
|
|
|
|
TAG(KBD);
|
|
|
|
TAG(LABEL);
|
|
|
|
TAG(LEGEND);
|
|
|
|
TAG(LI);
|
|
|
|
TAG(MAIN);
|
|
|
|
TAG(MAP);
|
|
|
|
TAG(MARK);
|
|
|
|
TAG(MATH);
|
|
|
|
TAG(MENU);
|
|
|
|
TAG(METER);
|
|
|
|
TAG(NAV);
|
|
|
|
TAG(NOSCRIPT);
|
|
|
|
TAG(OBJECT);
|
|
|
|
TAG(OL);
|
|
|
|
TAG(OPTGROUP);
|
|
|
|
TAG(OPTION);
|
|
|
|
TAG(OUTPUT);
|
|
|
|
TAG(P);
|
|
|
|
TAG(PICTURE);
|
|
|
|
TAG(PRE);
|
|
|
|
TAG(PROGRESS);
|
|
|
|
TAG(Q);
|
|
|
|
TAG(RB);
|
|
|
|
TAG(RP);
|
|
|
|
TAG(RT);
|
|
|
|
TAG(RTC);
|
|
|
|
TAG(RUBY);
|
|
|
|
TAG(S);
|
|
|
|
TAG(SAMP);
|
|
|
|
TAG(SCRIPT);
|
|
|
|
TAG(SECTION);
|
|
|
|
TAG(SELECT);
|
|
|
|
TAG(SLOT);
|
|
|
|
TAG(SMALL);
|
|
|
|
TAG(SPAN);
|
|
|
|
TAG(STRONG);
|
|
|
|
TAG(STYLE);
|
|
|
|
TAG(SUB);
|
|
|
|
TAG(SUMMARY);
|
|
|
|
TAG(SUP);
|
|
|
|
TAG(SVG);
|
|
|
|
TAG(TABLE);
|
|
|
|
TAG(TBODY);
|
|
|
|
TAG(TD);
|
|
|
|
TAG(TEMPLATE);
|
|
|
|
TAG(TEXTAREA);
|
|
|
|
TAG(TFOOT);
|
|
|
|
TAG(TH);
|
|
|
|
TAG(THEAD);
|
|
|
|
TAG(TIME);
|
|
|
|
TAG(TITLE);
|
|
|
|
TAG(TR);
|
|
|
|
TAG(U);
|
|
|
|
TAG(UL);
|
|
|
|
TAG(VAR);
|
|
|
|
TAG(VIDEO);
|
|
|
|
#undef TAG
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-06-18 17:05:44 +00:00
|
|
|
static const map<string, TagType> TAG_TYPES_BY_TAG_NAME = get_tag_map();
|
2018-06-11 22:12:01 +00:00
|
|
|
|
2018-06-12 20:49:54 +00:00
|
|
|
static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = {
|
|
|
|
ADDRESS,
|
|
|
|
ARTICLE,
|
|
|
|
ASIDE,
|
|
|
|
BLOCKQUOTE,
|
|
|
|
DETAILS,
|
|
|
|
DIV,
|
|
|
|
DL,
|
|
|
|
FIELDSET,
|
|
|
|
FIGCAPTION,
|
|
|
|
FIGURE,
|
|
|
|
FOOTER,
|
|
|
|
FORM,
|
|
|
|
H1,
|
|
|
|
H2,
|
|
|
|
H3,
|
|
|
|
H4,
|
|
|
|
H5,
|
|
|
|
H6,
|
|
|
|
HEADER,
|
|
|
|
HR,
|
|
|
|
MAIN,
|
|
|
|
NAV,
|
|
|
|
OL,
|
|
|
|
P,
|
|
|
|
PRE,
|
|
|
|
SECTION,
|
2018-06-12 17:51:03 +00:00
|
|
|
};
|
|
|
|
|
2018-06-12 20:49:54 +00:00
|
|
|
static const TagType *TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END = (
|
|
|
|
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS +
|
|
|
|
sizeof(TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS) /
|
|
|
|
sizeof(TagType)
|
|
|
|
);
|
|
|
|
|
2018-06-11 22:12:01 +00:00
|
|
|
struct Tag {
|
|
|
|
TagType type;
|
|
|
|
string custom_tag_name;
|
|
|
|
|
2018-08-29 18:01:00 +00:00
|
|
|
// This default constructor is used in the case where there is not enough space
|
|
|
|
// in the serialization buffer to store all of the tags. In that case, tags
|
|
|
|
// that cannot be serialized will be treated as having an unknown type. These
|
|
|
|
// tags will be closed via implicit end tags regardless of the next closing
|
|
|
|
// tag is encountered.
|
|
|
|
Tag() : type(END_OF_VOID_TAGS) {}
|
|
|
|
|
2018-06-15 22:33:32 +00:00
|
|
|
Tag(TagType type, const string &name) : type(type), custom_tag_name(name) {}
|
|
|
|
|
2018-06-11 22:12:01 +00:00
|
|
|
bool operator==(const Tag &other) const {
|
|
|
|
if (type != other.type) return false;
|
2018-06-15 22:34:03 +00:00
|
|
|
if (type == CUSTOM && custom_tag_name != other.custom_tag_name) return false;
|
2018-06-11 22:12:01 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool is_void() const {
|
|
|
|
return type < END_OF_VOID_TAGS;
|
|
|
|
}
|
|
|
|
|
2018-06-12 17:51:03 +00:00
|
|
|
inline bool can_contain(const Tag &tag) {
|
|
|
|
TagType child = tag.type;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case LI: return child != LI;
|
|
|
|
|
|
|
|
case DT:
|
|
|
|
case DD:
|
|
|
|
return child != DT && child != DD;
|
|
|
|
|
|
|
|
case P:
|
2018-06-12 20:49:54 +00:00
|
|
|
return std::find(
|
|
|
|
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS,
|
|
|
|
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END,
|
|
|
|
tag.type
|
|
|
|
) == TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END;
|
2018-06-12 17:51:03 +00:00
|
|
|
|
|
|
|
case COLGROUP:
|
|
|
|
return child == COL;
|
|
|
|
|
|
|
|
case RB:
|
|
|
|
case RT:
|
|
|
|
case RP:
|
|
|
|
return child != RB && child != RT && child != RP;
|
|
|
|
|
|
|
|
case OPTGROUP:
|
|
|
|
return child != OPTGROUP;
|
|
|
|
|
|
|
|
case TR:
|
|
|
|
return child != TR;
|
|
|
|
|
|
|
|
case TD:
|
|
|
|
case TH:
|
|
|
|
return child != TD && child != TH && child != TR;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2018-06-11 22:12:01 +00:00
|
|
|
|
2018-06-12 20:49:54 +00:00
|
|
|
static inline Tag for_name(const string &name) {
|
2018-06-18 17:05:44 +00:00
|
|
|
map<string, TagType>::const_iterator type = TAG_TYPES_BY_TAG_NAME.find(name);
|
2018-06-11 22:12:01 +00:00
|
|
|
if (type != TAG_TYPES_BY_TAG_NAME.end()) {
|
2018-06-15 22:33:32 +00:00
|
|
|
return Tag(type->second, string());
|
|
|
|
} else {
|
|
|
|
return Tag(CUSTOM, name);
|
2018-06-11 22:12:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|