From 5def8f2a865e08057a1416f5766a87a9d63874f2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 15 Jun 2018 15:28:25 -0700 Subject: [PATCH 1/6] Use string methods for copying bytes, not loops --- src/scanner.cc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index 8421e50..c49858d 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -32,9 +32,8 @@ struct Scanner { if (tag.type == CUSTOM) { buffer[i++] = tag.custom_tag_name.size(); - for (char c : tag.custom_tag_name) { - buffer[i++] = c; - } + tag.custom_tag_name.copy(&buffer[i], tag.custom_tag_name.size()); + i += tag.custom_tag_name.size(); } } @@ -49,10 +48,9 @@ struct Scanner { Tag tag { static_cast(buffer[i]), "" }; i++; if (tag.type == CUSTOM) { - tag.custom_tag_name.resize(buffer[i++]); - for (unsigned j = 0; j < tag.custom_tag_name.size(); j++) { - tag.custom_tag_name[j] = buffer[i++]; - } + unsigned length = buffer[i++]; + tag.custom_tag_name.assign(&buffer[i], &buffer[i + length]); + i += length; } tags.push_back(tag); } From 6923b13394673f256c55830d19656f57be8bcb81 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 15 Jun 2018 15:30:43 -0700 Subject: [PATCH 2/6] Remove one range-based for loop --- src/scanner.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/scanner.cc b/src/scanner.cc index c49858d..2af89a4 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -26,7 +26,9 @@ struct Scanner { unsigned serialize(char *buffer) { unsigned i = 0; - for (Tag &tag : tags) { + + for (unsigned j = 0, n = tags.size(); j < n; j++) { + Tag &tag = tags[j]; buffer[i] = static_cast(tag.type); i++; From 8afd99e35e7a1c72c5fdb06b0fefdb8190944580 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 15 Jun 2018 15:32:21 -0700 Subject: [PATCH 3/6] Use explicit types instead of auto --- src/scanner.cc | 12 +++++------- src/tag.h | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index 2af89a4..6458031 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -74,9 +74,8 @@ struct Scanner { lexer->advance(lexer, false); unsigned dashes = 0; - auto c = lexer->lookahead; - while (c) { - switch (c) { + while (lexer->lookahead) { + switch (lexer->lookahead) { case '-': ++dashes; break; @@ -92,7 +91,6 @@ struct Scanner { dashes = 0; } lexer->advance(lexer, false); - c = lexer->lookahead; } return false; } @@ -137,7 +135,7 @@ struct Scanner { } } - auto tag_name = scan_tag_name(lexer); + string tag_name = scan_tag_name(lexer); if (tag_name.empty()) return false; Tag next_tag = Tag::for_name(tag_name); @@ -163,7 +161,7 @@ struct Scanner { } bool scan_start_tag_name(TSLexer *lexer) { - auto tag_name = scan_tag_name(lexer); + string tag_name = scan_tag_name(lexer); if (tag_name.empty()) return false; Tag tag = Tag::for_name(tag_name); tags.push_back(tag); @@ -176,7 +174,7 @@ struct Scanner { } bool scan_end_tag_name(TSLexer *lexer) { - auto tag_name = scan_tag_name(lexer); + string tag_name = scan_tag_name(lexer); if (tag_name.empty()) return false; Tag tag = Tag::for_name(tag_name); if (!tags.empty() && tags.back() == tag) { diff --git a/src/tag.h b/src/tag.h index 83c6df8..8eb1046 100644 --- a/src/tag.h +++ b/src/tag.h @@ -358,7 +358,7 @@ struct Tag { } static inline Tag for_name(const string &name) { - auto type = TAG_TYPES_BY_TAG_NAME.find(name); + unordered_map::const_iterator type = TAG_TYPES_BY_TAG_NAME.find(name); if (type != TAG_TYPES_BY_TAG_NAME.end()) { return Tag { type->second, "" }; } From 3fce691faeb8e158042d3e6b8dc6fd8ce8c74c62 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 15 Jun 2018 15:33:32 -0700 Subject: [PATCH 4/6] Use a regular constructor instead of aggregate initialization --- src/scanner.cc | 2 +- src/tag.h | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index 6458031..058e48f 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -47,7 +47,7 @@ struct Scanner { unsigned i = 0; while (i < length) { - Tag tag { static_cast(buffer[i]), "" }; + Tag tag(static_cast(buffer[i]), ""); i++; if (tag.type == CUSTOM) { unsigned length = buffer[i++]; diff --git a/src/tag.h b/src/tag.h index 8eb1046..c4f48fb 100644 --- a/src/tag.h +++ b/src/tag.h @@ -303,6 +303,8 @@ struct Tag { TagType type; string custom_tag_name; + Tag(TagType type, const string &name) : type(type), custom_tag_name(name) {} + bool operator==(const Tag &other) const { if (type != other.type) return false; if (type == TagType::CUSTOM && custom_tag_name != other.custom_tag_name) return false; @@ -360,8 +362,9 @@ struct Tag { static inline Tag for_name(const string &name) { unordered_map::const_iterator type = TAG_TYPES_BY_TAG_NAME.find(name); if (type != TAG_TYPES_BY_TAG_NAME.end()) { - return Tag { type->second, "" }; + return Tag(type->second, string()); + } else { + return Tag(CUSTOM, name); } - return Tag { CUSTOM, name }; } }; From 8e6d71bd5480f1b933c27188582839c7e8ea0982 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 15 Jun 2018 15:34:03 -0700 Subject: [PATCH 5/6] Don't use fancy enum features --- src/tag.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tag.h b/src/tag.h index c4f48fb..e367529 100644 --- a/src/tag.h +++ b/src/tag.h @@ -4,7 +4,7 @@ using std::string; using std::unordered_map; -enum TagType : char { +enum TagType { AREA, BASE, BASEFONT, @@ -307,7 +307,7 @@ struct Tag { bool operator==(const Tag &other) const { if (type != other.type) return false; - if (type == TagType::CUSTOM && custom_tag_name != other.custom_tag_name) return false; + if (type == CUSTOM && custom_tag_name != other.custom_tag_name) return false; return true; } From 2a3ef3b7b1f1070b74818ce22c0651b18ab72d59 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 15 Jun 2018 15:34:56 -0700 Subject: [PATCH 6/6] Build tag map with a static method instead of an initializer list --- src/tag.h | 261 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 134 insertions(+), 127 deletions(-) diff --git a/src/tag.h b/src/tag.h index e367529..0dc4440 100644 --- a/src/tag.h +++ b/src/tag.h @@ -136,133 +136,140 @@ enum TagType { CUSTOM, }; -static const unordered_map TAG_TYPES_BY_TAG_NAME = { - {"AREA", AREA}, - {"BASE", BASE}, - {"BASEFONT", BASEFONT}, - {"BGSOUND", BGSOUND}, - {"BR", BR}, - {"COL", COL}, - {"COMMAND", COMMAND}, - {"EMBED", EMBED}, - {"FRAME", FRAME}, - {"HR", HR}, - {"IMAGE", IMAGE}, - {"IMG", IMG}, - {"INPUT", INPUT}, - {"ISINDEX", ISINDEX}, - {"KEYGEN", KEYGEN}, - {"LINK", LINK}, - {"MENUITEM", MENUITEM}, - {"META", META}, - {"NEXTID", NEXTID}, - {"PARAM", PARAM}, - {"SOURCE", SOURCE}, - {"TRACK", TRACK}, - {"WBR", WBR}, - {"A", A}, - {"ABBR", ABBR}, - {"ADDRESS", ADDRESS}, - {"ARTICLE", ARTICLE}, - {"ASIDE", ASIDE}, - {"AUDIO", AUDIO}, - {"B", B}, - {"BDI", BDI}, - {"BDO", BDO}, - {"BLOCKQUOTE", BLOCKQUOTE}, - {"BODY", BODY}, - {"BUTTON", BUTTON}, - {"CANVAS", CANVAS}, - {"CAPTION", CAPTION}, - {"CITE", CITE}, - {"CODE", CODE}, - {"COLGROUP", COLGROUP}, - {"DATA", DATA}, - {"DATALIST", DATALIST}, - {"DD", DD}, - {"DEL", DEL}, - {"DETAILS", DETAILS}, - {"DFN", DFN}, - {"DIALOG", DIALOG}, - {"DIV", DIV}, - {"DL", DL}, - {"DT", DT}, - {"EM", EM}, - {"FIELDSET", FIELDSET}, - {"FIGCAPTION", FIGCAPTION}, - {"FIGURE", FIGURE}, - {"FOOTER", FOOTER}, - {"FORM", FORM}, - {"H1", H1}, - {"H2", H2}, - {"H3", H3}, - {"H4", H4}, - {"H5", H5}, - {"H6", H6}, - {"HEAD", HEAD}, - {"HEADER", HEADER}, - {"HGROUP", HGROUP}, - {"HTML", HTML}, - {"I", I}, - {"IFRAME", IFRAME}, - {"INS", INS}, - {"KBD", KBD}, - {"LABEL", LABEL}, - {"LEGEND", LEGEND}, - {"LI", LI}, - {"MAIN", MAIN}, - {"MAP", MAP}, - {"MARK", MARK}, - {"MATH", MATH}, - {"MENU", MENU}, - {"METER", METER}, - {"NAV", NAV}, - {"NOSCRIPT", NOSCRIPT}, - {"OBJECT", OBJECT}, - {"OL", OL}, - {"OPTGROUP", OPTGROUP}, - {"OPTION", OPTION}, - {"OUTPUT", OUTPUT}, - {"P", P}, - {"PICTURE", PICTURE}, - {"PRE", PRE}, - {"PROGRESS", PROGRESS}, - {"Q", Q}, - {"RB", RB}, - {"RP", RP}, - {"RT", RT}, - {"RTC", RTC}, - {"RUBY", RUBY}, - {"S", S}, - {"SAMP", SAMP}, - {"SCRIPT", SCRIPT}, - {"SECTION", SECTION}, - {"SELECT", SELECT}, - {"SLOT", SLOT}, - {"SMALL", SMALL}, - {"SPAN", SPAN}, - {"STRONG", STRONG}, - {"STYLE", STYLE}, - {"SUB", SUB}, - {"SUMMARY", SUMMARY}, - {"SUP", SUP}, - {"SVG", SVG}, - {"TABLE", TABLE}, - {"TBODY", TBODY}, - {"TD", TD}, - {"TEMPLATE", TEMPLATE}, - {"TEXTAREA", TEXTAREA}, - {"TFOOT", TFOOT}, - {"TH", TH}, - {"THEAD", THEAD}, - {"TIME", TIME}, - {"TITLE", TITLE}, - {"TR", TR}, - {"U", U}, - {"UL", UL}, - {"VAR", VAR}, - {"VIDEO", VIDEO}, -}; + +static const unordered_map get_tag_map() { + unordered_map result; +#define TAG(name) result[#name] = name + TAG(AREA); + TAG(BASE); + TAG(BASEFONT); + TAG(BGSOUND); + TAG(BR); + TAG(COL); + TAG(COMMAND); + TAG(EMBED); + TAG(FRAME); + TAG(HR); + TAG(IMAGE); + TAG(IMG); + TAG(INPUT); + TAG(ISINDEX); + TAG(KEYGEN); + TAG(LINK); + TAG(MENUITEM); + TAG(META); + TAG(NEXTID); + TAG(PARAM); + TAG(SOURCE); + TAG(TRACK); + TAG(WBR); + TAG(A); + TAG(ABBR); + TAG(ADDRESS); + TAG(ARTICLE); + TAG(ASIDE); + TAG(AUDIO); + TAG(B); + TAG(BDI); + TAG(BDO); + TAG(BLOCKQUOTE); + TAG(BODY); + TAG(BUTTON); + TAG(CANVAS); + TAG(CAPTION); + TAG(CITE); + TAG(CODE); + TAG(COLGROUP); + TAG(DATA); + TAG(DATALIST); + TAG(DD); + TAG(DEL); + TAG(DETAILS); + TAG(DFN); + TAG(DIALOG); + TAG(DIV); + TAG(DL); + TAG(DT); + TAG(EM); + TAG(FIELDSET); + TAG(FIGCAPTION); + TAG(FIGURE); + TAG(FOOTER); + TAG(FORM); + TAG(H1); + TAG(H2); + TAG(H3); + TAG(H4); + TAG(H5); + TAG(H6); + TAG(HEAD); + TAG(HEADER); + TAG(HGROUP); + TAG(HTML); + TAG(I); + TAG(IFRAME); + TAG(INS); + TAG(KBD); + TAG(LABEL); + TAG(LEGEND); + TAG(LI); + TAG(MAIN); + TAG(MAP); + TAG(MARK); + TAG(MATH); + TAG(MENU); + TAG(METER); + TAG(NAV); + TAG(NOSCRIPT); + TAG(OBJECT); + TAG(OL); + TAG(OPTGROUP); + TAG(OPTION); + TAG(OUTPUT); + TAG(P); + TAG(PICTURE); + TAG(PRE); + TAG(PROGRESS); + TAG(Q); + TAG(RB); + TAG(RP); + TAG(RT); + TAG(RTC); + TAG(RUBY); + TAG(S); + TAG(SAMP); + TAG(SCRIPT); + TAG(SECTION); + TAG(SELECT); + TAG(SLOT); + TAG(SMALL); + TAG(SPAN); + TAG(STRONG); + TAG(STYLE); + TAG(SUB); + TAG(SUMMARY); + TAG(SUP); + TAG(SVG); + TAG(TABLE); + TAG(TBODY); + TAG(TD); + TAG(TEMPLATE); + TAG(TEXTAREA); + TAG(TFOOT); + TAG(TH); + TAG(THEAD); + TAG(TIME); + TAG(TITLE); + TAG(TR); + TAG(U); + TAG(UL); + TAG(VAR); + TAG(VIDEO); +#undef TAG + return result; +} + +static const unordered_map TAG_TYPES_BY_TAG_NAME = get_tag_map(); static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = { ADDRESS,