From e56df0fc7f9308323d28db1a175c29c525d07e09 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 11 Jun 2018 15:36:18 -0700 Subject: [PATCH] Add comments, allow slashes in unquoted attribute values Co-Authored-By: Ashi Krishan --- corpus/main.txt | 16 + grammar.js | 14 +- src/grammar.json | 34 +- src/parser.c | 930 +++++++++++++++++++++++++---------------------- src/scanner.cc | 48 ++- test.html | 1 - 6 files changed, 591 insertions(+), 452 deletions(-) delete mode 100644 test.html diff --git a/corpus/main.txt b/corpus/main.txt index e2f06db..be9be4f 100644 --- a/corpus/main.txt +++ b/corpus/main.txt @@ -71,3 +71,19 @@ Void tags (attribute (attribute_name) (attribute_value)) (attribute (attribute_name) (attribute_value)))) (end_tag))) + +================================== +Comments +================================== + +
+ +
+--- + +(fragment + (comment) + (element + (start_tag) + (comment) + (end_tag))) diff --git a/grammar.js b/grammar.js index e696dbd..536ce49 100644 --- a/grammar.js +++ b/grammar.js @@ -1,6 +1,11 @@ module.exports = grammar({ name: 'html', + extras: $ => [ + $.comment, + /\s+/, + ], + externals: $ => [ $._open_start_tag, $._close_start_tag, @@ -8,6 +13,7 @@ module.exports = grammar({ $.end_tag, $._implicit_end_tag, $._erroneous_end_tag, + $.comment, ], rules: { @@ -41,17 +47,19 @@ module.exports = grammar({ ), attribute: $ => seq( - alias($._attribute_part, $.attribute_name), + $.attribute_name, optional(seq( '=', choice( - alias($._attribute_part, $.attribute_value), + $.attribute_value, $.quoted_attribute_value ) )) ), - _attribute_part: $ => /[^<>"'/=\s]+/, + attribute_name: $ => /[^<>"'/=\s]+/, + + attribute_value: $ => /[^<>"'=\s]+/, quoted_attribute_value: $ => choice( seq("'", optional(alias(/[^']+/, $.attribute_value)), "'"), diff --git a/src/grammar.json b/src/grammar.json index 5ca074d..542482d 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -107,13 +107,8 @@ "type": "SEQ", "members": [ { - "type": "ALIAS", - "content": { - "type": "SYMBOL", - "name": "_attribute_part" - }, - "named": true, - "value": "attribute_name" + "type": "SYMBOL", + "name": "attribute_name" }, { "type": "CHOICE", @@ -129,13 +124,8 @@ "type": "CHOICE", "members": [ { - "type": "ALIAS", - "content": { - "type": "SYMBOL", - "name": "_attribute_part" - }, - "named": true, - "value": "attribute_value" + "type": "SYMBOL", + "name": "attribute_value" }, { "type": "SYMBOL", @@ -152,10 +142,14 @@ } ] }, - "_attribute_part": { + "attribute_name": { "type": "PATTERN", "value": "[^<>\"'\\/=\\s]+" }, + "attribute_value": { + "type": "PATTERN", + "value": "[^<>\"'=\\s]+" + }, "quoted_attribute_value": { "type": "CHOICE", "members": [ @@ -227,9 +221,13 @@ } }, "extras": [ + { + "type": "SYMBOL", + "name": "comment" + }, { "type": "PATTERN", - "value": "\\s" + "value": "\\s+" } ], "conflicts": [], @@ -257,6 +255,10 @@ { "type": "SYMBOL", "name": "_erroneous_end_tag" + }, + { + "type": "SYMBOL", + "name": "comment" } ], "inline": [] diff --git a/src/parser.c b/src/parser.c index a874a2c..73efc11 100644 --- a/src/parser.c +++ b/src/parser.c @@ -6,11 +6,11 @@ #endif #define LANGUAGE_VERSION 8 -#define STATE_COUNT 39 -#define SYMBOL_COUNT 23 -#define ALIAS_COUNT 2 -#define TOKEN_COUNT 14 -#define EXTERNAL_TOKEN_COUNT 6 +#define STATE_COUNT 36 +#define SYMBOL_COUNT 25 +#define ALIAS_COUNT 1 +#define TOKEN_COUNT 16 +#define EXTERNAL_TOKEN_COUNT 7 #define MAX_ALIAS_SEQUENCE_LENGTH 3 enum { @@ -20,24 +20,25 @@ enum { sym_end_tag = 4, sym__implicit_end_tag = 5, sym__erroneous_end_tag = 6, - anon_sym_EQ = 7, - sym__attribute_part = 8, - anon_sym_SQUOTE = 9, - aux_sym_SLASH_LBRACK_CARET_SQUOTE_RBRACK_PLUS_SLASH = 10, - anon_sym_DQUOTE = 11, - aux_sym_SLASH_LBRACK_CARET_DQUOTE_RBRACK_PLUS_SLASH = 12, - sym_text = 13, - sym_fragment = 14, - sym__node = 15, - sym_element = 16, - sym_start_tag = 17, - sym_self_closing_tag = 18, - sym_attribute = 19, - sym_quoted_attribute_value = 20, - aux_sym_fragment_repeat1 = 21, - aux_sym_start_tag_repeat1 = 22, - alias_sym_attribute_name = 23, - alias_sym_attribute_value = 24, + sym_comment = 7, + anon_sym_EQ = 8, + sym_attribute_name = 9, + sym_attribute_value = 10, + anon_sym_SQUOTE = 11, + aux_sym_SLASH_LBRACK_CARET_SQUOTE_RBRACK_PLUS_SLASH = 12, + anon_sym_DQUOTE = 13, + aux_sym_SLASH_LBRACK_CARET_DQUOTE_RBRACK_PLUS_SLASH = 14, + sym_text = 15, + sym_fragment = 16, + sym__node = 17, + sym_element = 18, + sym_start_tag = 19, + sym_self_closing_tag = 20, + sym_attribute = 21, + sym_quoted_attribute_value = 22, + aux_sym_fragment_repeat1 = 23, + aux_sym_start_tag_repeat1 = 24, + alias_sym_attribute_value = 25, }; static const char *ts_symbol_names[] = { @@ -47,9 +48,11 @@ static const char *ts_symbol_names[] = { [sym_end_tag] = "end_tag", [sym__implicit_end_tag] = "_implicit_end_tag", [sym__erroneous_end_tag] = "_erroneous_end_tag", + [sym_comment] = "comment", [ts_builtin_sym_end] = "END", [anon_sym_EQ] = "=", - [sym__attribute_part] = "_attribute_part", + [sym_attribute_name] = "attribute_name", + [sym_attribute_value] = "attribute_value", [anon_sym_SQUOTE] = "'", [aux_sym_SLASH_LBRACK_CARET_SQUOTE_RBRACK_PLUS_SLASH] = "/[^']+/", [anon_sym_DQUOTE] = "\"", @@ -64,7 +67,6 @@ static const char *ts_symbol_names[] = { [sym_quoted_attribute_value] = "quoted_attribute_value", [aux_sym_fragment_repeat1] = "fragment_repeat1", [aux_sym_start_tag_repeat1] = "start_tag_repeat1", - [alias_sym_attribute_name] = "attribute_name", [alias_sym_attribute_value] = "attribute_value", }; @@ -93,6 +95,10 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = false, .named = true, }, + [sym_comment] = { + .visible = true, + .named = true, + }, [ts_builtin_sym_end] = { .visible = false, .named = true, @@ -101,8 +107,12 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = true, .named = false, }, - [sym__attribute_part] = { - .visible = false, + [sym_attribute_name] = { + .visible = true, + .named = true, + }, + [sym_attribute_value] = { + .visible = true, .named = true, }, [anon_sym_SQUOTE] = { @@ -161,25 +171,14 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = false, .named = false, }, - [alias_sym_attribute_name] = { - .visible = true, - .named = true, - }, [alias_sym_attribute_value] = { .visible = true, .named = true, }, }; -static TSSymbol ts_alias_sequences[4][MAX_ALIAS_SEQUENCE_LENGTH] = { +static TSSymbol ts_alias_sequences[2][MAX_ALIAS_SEQUENCE_LENGTH] = { [1] = { - [0] = alias_sym_attribute_name, - }, - [2] = { - [0] = alias_sym_attribute_name, - [2] = alias_sym_attribute_value, - }, - [3] = { [1] = alias_sym_attribute_value, }, }; @@ -194,16 +193,17 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { ADVANCE(2); if (lookahead == '\'') ADVANCE(3); - if (lookahead == '=') + if (lookahead == '/') ADVANCE(4); + if (lookahead == '=') + ADVANCE(5); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || lookahead == ' ') SKIP(0); - if (lookahead != '/' && - (lookahead < '<' || lookahead > '>')) - ADVANCE(5); + if ((lookahead < '<' || lookahead > '>')) + ADVANCE(6); END_STATE(); case 1: ACCEPT_TOKEN(ts_builtin_sym_end); @@ -215,10 +215,80 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { ACCEPT_TOKEN(anon_sym_SQUOTE); END_STATE(); case 4: - ACCEPT_TOKEN(anon_sym_EQ); + ACCEPT_TOKEN(sym_attribute_value); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != '\r' && + lookahead != ' ' && + lookahead != '\"' && + lookahead != '\'' && + (lookahead < '<' || lookahead > '>')) + ADVANCE(4); END_STATE(); case 5: - ACCEPT_TOKEN(sym__attribute_part); + ACCEPT_TOKEN(anon_sym_EQ); + END_STATE(); + case 6: + ACCEPT_TOKEN(sym_attribute_name); + if (lookahead == '/') + ADVANCE(4); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != '\r' && + lookahead != ' ' && + lookahead != '\"' && + lookahead != '\'' && + (lookahead < '<' || lookahead > '>')) + ADVANCE(6); + END_STATE(); + case 7: + if (lookahead == 0) + ADVANCE(1); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + ADVANCE(8); + if (lookahead != '<' && + lookahead != '>') + ADVANCE(9); + END_STATE(); + case 8: + ACCEPT_TOKEN(sym_text); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + ADVANCE(8); + if (lookahead != 0 && + lookahead != '<' && + lookahead != '>') + ADVANCE(9); + END_STATE(); + case 9: + ACCEPT_TOKEN(sym_text); + if (lookahead != 0 && + lookahead != '<' && + lookahead != '>') + ADVANCE(9); + END_STATE(); + case 10: + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + SKIP(10); + if (lookahead != 0 && + lookahead != '\"' && + lookahead != '\'' && + lookahead != '/' && + (lookahead < '<' || lookahead > '>')) + ADVANCE(11); + END_STATE(); + case 11: + ACCEPT_TOKEN(sym_attribute_name); if (lookahead != 0 && lookahead != '\t' && lookahead != '\n' && @@ -228,147 +298,119 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { lookahead != '\'' && lookahead != '/' && (lookahead < '<' || lookahead > '>')) - ADVANCE(5); - END_STATE(); - case 6: - if (lookahead == 0) - ADVANCE(1); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') - ADVANCE(7); - if (lookahead != '<' && - lookahead != '>') - ADVANCE(8); - END_STATE(); - case 7: - ACCEPT_TOKEN(sym_text); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') - ADVANCE(7); - if (lookahead != 0 && - lookahead != '<' && - lookahead != '>') - ADVANCE(8); - END_STATE(); - case 8: - ACCEPT_TOKEN(sym_text); - if (lookahead != 0 && - lookahead != '<' && - lookahead != '>') - ADVANCE(8); - END_STATE(); - case 9: - if (lookahead == '\"') - ADVANCE(2); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') - SKIP(9); - if (lookahead != 0 && - lookahead != '\'' && - lookahead != '/' && - (lookahead < '<' || lookahead > '>')) - ADVANCE(5); - END_STATE(); - case 10: - if (lookahead == 0) - ADVANCE(1); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') - SKIP(10); - END_STATE(); - case 11: - if (lookahead == '=') - ADVANCE(4); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') - SKIP(11); - if (lookahead != 0 && - lookahead != '\"' && - lookahead != '\'' && - lookahead != '/' && - (lookahead < '<' || lookahead > '>')) - ADVANCE(5); + ADVANCE(11); END_STATE(); case 12: - if (lookahead == '\"') - ADVANCE(2); - if (lookahead == '\'') - ADVANCE(3); + if (lookahead == 0) + ADVANCE(1); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || lookahead == ' ') SKIP(12); - if (lookahead != 0 && - lookahead != '/' && - (lookahead < '<' || lookahead > '>')) - ADVANCE(5); END_STATE(); case 13: + if (lookahead == '=') + ADVANCE(5); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + SKIP(13); + if (lookahead != 0 && + lookahead != '\"' && + lookahead != '\'' && + lookahead != '/' && + (lookahead < '<' || lookahead > '>')) + ADVANCE(11); + END_STATE(); + case 14: + if (lookahead == '\"') + ADVANCE(2); if (lookahead == '\'') ADVANCE(3); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || lookahead == ' ') - ADVANCE(14); - if (lookahead != 0) - ADVANCE(15); + SKIP(14); + if (lookahead != 0 && + (lookahead < '<' || lookahead > '>')) + ADVANCE(4); END_STATE(); - case 14: + case 15: + if (lookahead == '\'') + ADVANCE(3); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + ADVANCE(16); + if (lookahead != 0) + ADVANCE(17); + END_STATE(); + case 16: ACCEPT_TOKEN(aux_sym_SLASH_LBRACK_CARET_SQUOTE_RBRACK_PLUS_SLASH); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || lookahead == ' ') - ADVANCE(14); + ADVANCE(16); if (lookahead != 0 && lookahead != '\'') - ADVANCE(15); + ADVANCE(17); END_STATE(); - case 15: + case 17: ACCEPT_TOKEN(aux_sym_SLASH_LBRACK_CARET_SQUOTE_RBRACK_PLUS_SLASH); if (lookahead != 0 && lookahead != '\'') - ADVANCE(15); + ADVANCE(17); END_STATE(); - case 16: + case 18: if (lookahead == '\"') ADVANCE(2); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || lookahead == ' ') - ADVANCE(17); + ADVANCE(19); if (lookahead != 0) - ADVANCE(18); + ADVANCE(20); END_STATE(); - case 17: + case 19: ACCEPT_TOKEN(aux_sym_SLASH_LBRACK_CARET_DQUOTE_RBRACK_PLUS_SLASH); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || lookahead == ' ') - ADVANCE(17); + ADVANCE(19); if (lookahead != 0 && lookahead != '\"') - ADVANCE(18); + ADVANCE(20); END_STATE(); - case 18: + case 20: ACCEPT_TOKEN(aux_sym_SLASH_LBRACK_CARET_DQUOTE_RBRACK_PLUS_SLASH); if (lookahead != 0 && lookahead != '\"') - ADVANCE(18); + ADVANCE(20); + END_STATE(); + case 21: + if (lookahead == '\'') + ADVANCE(3); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + SKIP(21); + END_STATE(); + case 22: + if (lookahead == '\"') + ADVANCE(2); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + SKIP(22); END_STATE(); default: return false; @@ -377,44 +419,41 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { static TSLexMode ts_lex_modes[STATE_COUNT] = { [0] = {.lex_state = 0, .external_lex_state = 1}, - [1] = {.lex_state = 6, .external_lex_state = 2}, - [2] = {.lex_state = 9, .external_lex_state = 3}, - [3] = {.lex_state = 6, .external_lex_state = 2}, - [4] = {.lex_state = 10}, - [5] = {.lex_state = 6, .external_lex_state = 4}, - [6] = {.lex_state = 6, .external_lex_state = 2}, - [7] = {.lex_state = 6, .external_lex_state = 2}, - [8] = {.lex_state = 6, .external_lex_state = 4}, - [9] = {.lex_state = 6, .external_lex_state = 2}, - [10] = {.lex_state = 11, .external_lex_state = 3}, - [11] = {.lex_state = 9, .external_lex_state = 3}, - [12] = {.lex_state = 9, .external_lex_state = 3}, - [13] = {.lex_state = 6, .external_lex_state = 2}, - [14] = {.lex_state = 6, .external_lex_state = 4}, - [15] = {.lex_state = 6, .external_lex_state = 4}, - [16] = {.lex_state = 6, .external_lex_state = 4}, - [17] = {.lex_state = 6, .external_lex_state = 4}, - [18] = {.lex_state = 6, .external_lex_state = 2}, - [19] = {.lex_state = 12}, - [20] = {.lex_state = 6, .external_lex_state = 4}, - [21] = {.lex_state = 6, .external_lex_state = 2}, - [22] = {.lex_state = 9, .external_lex_state = 3}, - [23] = {.lex_state = 6, .external_lex_state = 4}, - [24] = {.lex_state = 9, .external_lex_state = 3}, - [25] = {.lex_state = 6, .external_lex_state = 4}, - [26] = {.lex_state = 6, .external_lex_state = 4}, - [27] = {.lex_state = 6, .external_lex_state = 2}, - [28] = {.lex_state = 6, .external_lex_state = 4}, - [29] = {.lex_state = 9, .external_lex_state = 3}, - [30] = {.lex_state = 13}, - [31] = {.lex_state = 16}, - [32] = {.lex_state = 9, .external_lex_state = 3}, - [33] = {.lex_state = 6, .external_lex_state = 4}, - [34] = {.lex_state = 6, .external_lex_state = 4}, - [35] = {.lex_state = 9, .external_lex_state = 3}, - [36] = {.lex_state = 12}, - [37] = {.lex_state = 9}, - [38] = {.lex_state = 9, .external_lex_state = 3}, + [1] = {.lex_state = 7, .external_lex_state = 2}, + [2] = {.lex_state = 10, .external_lex_state = 3}, + [3] = {.lex_state = 12, .external_lex_state = 4}, + [4] = {.lex_state = 7, .external_lex_state = 5}, + [5] = {.lex_state = 7, .external_lex_state = 2}, + [6] = {.lex_state = 7, .external_lex_state = 2}, + [7] = {.lex_state = 7, .external_lex_state = 5}, + [8] = {.lex_state = 7, .external_lex_state = 2}, + [9] = {.lex_state = 13, .external_lex_state = 3}, + [10] = {.lex_state = 10, .external_lex_state = 3}, + [11] = {.lex_state = 10, .external_lex_state = 3}, + [12] = {.lex_state = 7, .external_lex_state = 2}, + [13] = {.lex_state = 7, .external_lex_state = 5}, + [14] = {.lex_state = 7, .external_lex_state = 5}, + [15] = {.lex_state = 7, .external_lex_state = 5}, + [16] = {.lex_state = 7, .external_lex_state = 2}, + [17] = {.lex_state = 14, .external_lex_state = 4}, + [18] = {.lex_state = 7, .external_lex_state = 5}, + [19] = {.lex_state = 7, .external_lex_state = 2}, + [20] = {.lex_state = 10, .external_lex_state = 3}, + [21] = {.lex_state = 7, .external_lex_state = 5}, + [22] = {.lex_state = 10, .external_lex_state = 3}, + [23] = {.lex_state = 7, .external_lex_state = 5}, + [24] = {.lex_state = 7, .external_lex_state = 5}, + [25] = {.lex_state = 7, .external_lex_state = 2}, + [26] = {.lex_state = 7, .external_lex_state = 5}, + [27] = {.lex_state = 10, .external_lex_state = 3}, + [28] = {.lex_state = 15, .external_lex_state = 4}, + [29] = {.lex_state = 18, .external_lex_state = 4}, + [30] = {.lex_state = 7, .external_lex_state = 5}, + [31] = {.lex_state = 7, .external_lex_state = 5}, + [32] = {.lex_state = 10, .external_lex_state = 3}, + [33] = {.lex_state = 21, .external_lex_state = 4}, + [34] = {.lex_state = 22, .external_lex_state = 4}, + [35] = {.lex_state = 10, .external_lex_state = 3}, }; enum { @@ -424,6 +463,7 @@ enum { ts_external_token_end_tag, ts_external_token__implicit_end_tag, ts_external_token__erroneous_end_tag, + ts_external_token_comment, }; static TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = { @@ -433,9 +473,10 @@ static TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = { [ts_external_token_end_tag] = sym_end_tag, [ts_external_token__implicit_end_tag] = sym__implicit_end_tag, [ts_external_token__erroneous_end_tag] = sym__erroneous_end_tag, + [ts_external_token_comment] = sym_comment, }; -static bool ts_external_scanner_states[5][EXTERNAL_TOKEN_COUNT] = { +static bool ts_external_scanner_states[6][EXTERNAL_TOKEN_COUNT] = { [1] = { [ts_external_token__open_start_tag] = true, [ts_external_token__close_start_tag] = true, @@ -443,20 +484,27 @@ static bool ts_external_scanner_states[5][EXTERNAL_TOKEN_COUNT] = { [ts_external_token_end_tag] = true, [ts_external_token__implicit_end_tag] = true, [ts_external_token__erroneous_end_tag] = true, + [ts_external_token_comment] = true, }, [2] = { [ts_external_token__open_start_tag] = true, [ts_external_token__erroneous_end_tag] = true, + [ts_external_token_comment] = true, }, [3] = { [ts_external_token__close_start_tag] = true, [ts_external_token__self_close_start_tag] = true, + [ts_external_token_comment] = true, }, [4] = { + [ts_external_token_comment] = true, + }, + [5] = { [ts_external_token__open_start_tag] = true, [ts_external_token_end_tag] = true, [ts_external_token__implicit_end_tag] = true, [ts_external_token__erroneous_end_tag] = true, + [ts_external_token_comment] = true, }, }; @@ -468,333 +516,355 @@ static uint16_t ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = { [sym_end_tag] = ACTIONS(1), [sym__implicit_end_tag] = ACTIONS(1), [sym__erroneous_end_tag] = ACTIONS(1), + [sym_comment] = ACTIONS(1), [ts_builtin_sym_end] = ACTIONS(1), [anon_sym_EQ] = ACTIONS(1), - [sym__attribute_part] = ACTIONS(1), + [sym_attribute_name] = ACTIONS(3), + [sym_attribute_value] = ACTIONS(3), [anon_sym_SQUOTE] = ACTIONS(1), [anon_sym_DQUOTE] = ACTIONS(1), }, [1] = { - [sym_fragment] = STATE(4), - [sym__node] = STATE(7), - [sym_element] = STATE(3), - [sym_start_tag] = STATE(5), - [sym_self_closing_tag] = STATE(6), - [aux_sym_fragment_repeat1] = STATE(7), - [sym__open_start_tag] = ACTIONS(3), - [sym__erroneous_end_tag] = ACTIONS(5), - [ts_builtin_sym_end] = ACTIONS(7), - [sym_text] = ACTIONS(5), + [sym_fragment] = STATE(3), + [sym__node] = STATE(6), + [sym_element] = STATE(6), + [sym_start_tag] = STATE(4), + [sym_self_closing_tag] = STATE(5), + [aux_sym_fragment_repeat1] = STATE(6), + [sym__open_start_tag] = ACTIONS(5), + [sym__erroneous_end_tag] = ACTIONS(7), + [sym_comment] = ACTIONS(9), + [ts_builtin_sym_end] = ACTIONS(11), + [sym_text] = ACTIONS(7), }, [2] = { - [sym_attribute] = STATE(11), - [aux_sym_start_tag_repeat1] = STATE(11), - [sym__close_start_tag] = ACTIONS(9), - [sym__self_close_start_tag] = ACTIONS(11), - [sym__attribute_part] = ACTIONS(13), + [sym_attribute] = STATE(10), + [aux_sym_start_tag_repeat1] = STATE(10), + [sym__close_start_tag] = ACTIONS(13), + [sym__self_close_start_tag] = ACTIONS(15), + [sym_comment] = ACTIONS(9), + [sym_attribute_name] = ACTIONS(17), }, [3] = { - [sym__open_start_tag] = ACTIONS(15), - [sym__erroneous_end_tag] = ACTIONS(15), - [ts_builtin_sym_end] = ACTIONS(15), - [sym_text] = ACTIONS(15), + [sym_comment] = ACTIONS(9), + [ts_builtin_sym_end] = ACTIONS(19), }, [4] = { - [ts_builtin_sym_end] = ACTIONS(17), - }, - [5] = { - [sym__node] = STATE(17), - [sym_element] = STATE(14), - [sym_start_tag] = STATE(15), - [sym_self_closing_tag] = STATE(16), - [aux_sym_fragment_repeat1] = STATE(17), - [sym__open_start_tag] = ACTIONS(19), - [sym_end_tag] = ACTIONS(21), - [sym__implicit_end_tag] = ACTIONS(21), - [sym__erroneous_end_tag] = ACTIONS(23), - [sym_text] = ACTIONS(23), - }, - [6] = { - [sym__open_start_tag] = ACTIONS(25), + [sym__node] = STATE(15), + [sym_element] = STATE(15), + [sym_start_tag] = STATE(13), + [sym_self_closing_tag] = STATE(14), + [aux_sym_fragment_repeat1] = STATE(15), + [sym__open_start_tag] = ACTIONS(21), + [sym_end_tag] = ACTIONS(23), + [sym__implicit_end_tag] = ACTIONS(23), [sym__erroneous_end_tag] = ACTIONS(25), - [ts_builtin_sym_end] = ACTIONS(25), + [sym_comment] = ACTIONS(9), [sym_text] = ACTIONS(25), }, - [7] = { - [sym__node] = STATE(18), - [sym_element] = STATE(3), - [sym_start_tag] = STATE(5), - [sym_self_closing_tag] = STATE(6), - [aux_sym_fragment_repeat1] = STATE(18), - [sym__open_start_tag] = ACTIONS(3), - [sym__erroneous_end_tag] = ACTIONS(5), + [5] = { + [sym__open_start_tag] = ACTIONS(27), + [sym__erroneous_end_tag] = ACTIONS(27), + [sym_comment] = ACTIONS(9), [ts_builtin_sym_end] = ACTIONS(27), - [sym_text] = ACTIONS(5), + [sym_text] = ACTIONS(27), }, - [8] = { - [sym__open_start_tag] = ACTIONS(29), - [sym_end_tag] = ACTIONS(29), - [sym__implicit_end_tag] = ACTIONS(29), + [6] = { + [sym__node] = STATE(16), + [sym_element] = STATE(16), + [sym_start_tag] = STATE(4), + [sym_self_closing_tag] = STATE(5), + [aux_sym_fragment_repeat1] = STATE(16), + [sym__open_start_tag] = ACTIONS(5), [sym__erroneous_end_tag] = ACTIONS(29), + [sym_comment] = ACTIONS(9), + [ts_builtin_sym_end] = ACTIONS(31), [sym_text] = ACTIONS(29), }, + [7] = { + [sym__open_start_tag] = ACTIONS(33), + [sym_end_tag] = ACTIONS(33), + [sym__implicit_end_tag] = ACTIONS(33), + [sym__erroneous_end_tag] = ACTIONS(33), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(33), + }, + [8] = { + [sym__open_start_tag] = ACTIONS(35), + [sym__erroneous_end_tag] = ACTIONS(35), + [sym_comment] = ACTIONS(9), + [ts_builtin_sym_end] = ACTIONS(35), + [sym_text] = ACTIONS(35), + }, [9] = { - [sym__open_start_tag] = ACTIONS(31), - [sym__erroneous_end_tag] = ACTIONS(31), - [ts_builtin_sym_end] = ACTIONS(31), - [sym_text] = ACTIONS(31), + [sym__close_start_tag] = ACTIONS(37), + [sym__self_close_start_tag] = ACTIONS(37), + [sym_comment] = ACTIONS(9), + [anon_sym_EQ] = ACTIONS(39), + [sym_attribute_name] = ACTIONS(37), }, [10] = { - [sym__close_start_tag] = ACTIONS(33), - [sym__self_close_start_tag] = ACTIONS(33), - [anon_sym_EQ] = ACTIONS(35), - [sym__attribute_part] = ACTIONS(33), + [sym_attribute] = STATE(20), + [aux_sym_start_tag_repeat1] = STATE(20), + [sym__close_start_tag] = ACTIONS(41), + [sym__self_close_start_tag] = ACTIONS(43), + [sym_comment] = ACTIONS(9), + [sym_attribute_name] = ACTIONS(17), }, [11] = { [sym_attribute] = STATE(22), [aux_sym_start_tag_repeat1] = STATE(22), - [sym__close_start_tag] = ACTIONS(37), - [sym__self_close_start_tag] = ACTIONS(39), - [sym__attribute_part] = ACTIONS(13), + [sym__close_start_tag] = ACTIONS(13), + [sym__self_close_start_tag] = ACTIONS(45), + [sym_comment] = ACTIONS(9), + [sym_attribute_name] = ACTIONS(17), }, [12] = { - [sym_attribute] = STATE(24), - [aux_sym_start_tag_repeat1] = STATE(24), - [sym__close_start_tag] = ACTIONS(9), - [sym__self_close_start_tag] = ACTIONS(41), - [sym__attribute_part] = ACTIONS(13), + [sym__open_start_tag] = ACTIONS(47), + [sym__erroneous_end_tag] = ACTIONS(47), + [sym_comment] = ACTIONS(9), + [ts_builtin_sym_end] = ACTIONS(47), + [sym_text] = ACTIONS(47), }, [13] = { - [sym__open_start_tag] = ACTIONS(43), - [sym__erroneous_end_tag] = ACTIONS(43), - [ts_builtin_sym_end] = ACTIONS(43), - [sym_text] = ACTIONS(43), + [sym__node] = STATE(24), + [sym_element] = STATE(24), + [sym_start_tag] = STATE(13), + [sym_self_closing_tag] = STATE(14), + [aux_sym_fragment_repeat1] = STATE(24), + [sym__open_start_tag] = ACTIONS(21), + [sym_end_tag] = ACTIONS(49), + [sym__implicit_end_tag] = ACTIONS(49), + [sym__erroneous_end_tag] = ACTIONS(51), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(51), }, [14] = { - [sym__open_start_tag] = ACTIONS(15), - [sym_end_tag] = ACTIONS(15), - [sym__implicit_end_tag] = ACTIONS(15), - [sym__erroneous_end_tag] = ACTIONS(15), - [sym_text] = ACTIONS(15), + [sym__open_start_tag] = ACTIONS(27), + [sym_end_tag] = ACTIONS(27), + [sym__implicit_end_tag] = ACTIONS(27), + [sym__erroneous_end_tag] = ACTIONS(27), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(27), }, [15] = { [sym__node] = STATE(26), - [sym_element] = STATE(14), - [sym_start_tag] = STATE(15), - [sym_self_closing_tag] = STATE(16), + [sym_element] = STATE(26), + [sym_start_tag] = STATE(13), + [sym_self_closing_tag] = STATE(14), [aux_sym_fragment_repeat1] = STATE(26), - [sym__open_start_tag] = ACTIONS(19), - [sym_end_tag] = ACTIONS(45), - [sym__implicit_end_tag] = ACTIONS(45), - [sym__erroneous_end_tag] = ACTIONS(23), - [sym_text] = ACTIONS(23), + [sym__open_start_tag] = ACTIONS(21), + [sym_end_tag] = ACTIONS(53), + [sym__implicit_end_tag] = ACTIONS(53), + [sym__erroneous_end_tag] = ACTIONS(55), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(55), }, [16] = { - [sym__open_start_tag] = ACTIONS(25), - [sym_end_tag] = ACTIONS(25), - [sym__implicit_end_tag] = ACTIONS(25), - [sym__erroneous_end_tag] = ACTIONS(25), - [sym_text] = ACTIONS(25), + [sym__node] = STATE(16), + [sym_element] = STATE(16), + [sym_start_tag] = STATE(4), + [sym_self_closing_tag] = STATE(5), + [aux_sym_fragment_repeat1] = STATE(16), + [sym__open_start_tag] = ACTIONS(57), + [sym__erroneous_end_tag] = ACTIONS(60), + [sym_comment] = ACTIONS(9), + [ts_builtin_sym_end] = ACTIONS(63), + [sym_text] = ACTIONS(60), }, [17] = { - [sym__node] = STATE(28), - [sym_element] = STATE(14), - [sym_start_tag] = STATE(15), - [sym_self_closing_tag] = STATE(16), - [aux_sym_fragment_repeat1] = STATE(28), - [sym__open_start_tag] = ACTIONS(19), - [sym_end_tag] = ACTIONS(47), - [sym__implicit_end_tag] = ACTIONS(47), - [sym__erroneous_end_tag] = ACTIONS(23), - [sym_text] = ACTIONS(23), + [sym_quoted_attribute_value] = STATE(27), + [sym_comment] = ACTIONS(9), + [sym_attribute_value] = ACTIONS(65), + [anon_sym_SQUOTE] = ACTIONS(67), + [anon_sym_DQUOTE] = ACTIONS(69), }, [18] = { - [sym__node] = STATE(18), - [sym_element] = STATE(3), - [sym_start_tag] = STATE(5), - [sym_self_closing_tag] = STATE(6), - [aux_sym_fragment_repeat1] = STATE(18), - [sym__open_start_tag] = ACTIONS(49), - [sym__erroneous_end_tag] = ACTIONS(52), - [ts_builtin_sym_end] = ACTIONS(55), - [sym_text] = ACTIONS(52), + [sym__open_start_tag] = ACTIONS(71), + [sym_end_tag] = ACTIONS(71), + [sym__implicit_end_tag] = ACTIONS(71), + [sym__erroneous_end_tag] = ACTIONS(71), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(71), }, [19] = { - [sym_quoted_attribute_value] = STATE(32), - [sym__attribute_part] = ACTIONS(57), - [anon_sym_SQUOTE] = ACTIONS(59), - [anon_sym_DQUOTE] = ACTIONS(61), + [sym__open_start_tag] = ACTIONS(73), + [sym__erroneous_end_tag] = ACTIONS(73), + [sym_comment] = ACTIONS(9), + [ts_builtin_sym_end] = ACTIONS(73), + [sym_text] = ACTIONS(73), }, [20] = { - [sym__open_start_tag] = ACTIONS(63), - [sym_end_tag] = ACTIONS(63), - [sym__implicit_end_tag] = ACTIONS(63), - [sym__erroneous_end_tag] = ACTIONS(63), - [sym_text] = ACTIONS(63), + [sym_attribute] = STATE(20), + [aux_sym_start_tag_repeat1] = STATE(20), + [sym__close_start_tag] = ACTIONS(75), + [sym__self_close_start_tag] = ACTIONS(75), + [sym_comment] = ACTIONS(9), + [sym_attribute_name] = ACTIONS(77), }, [21] = { - [sym__open_start_tag] = ACTIONS(65), - [sym__erroneous_end_tag] = ACTIONS(65), - [ts_builtin_sym_end] = ACTIONS(65), - [sym_text] = ACTIONS(65), + [sym__open_start_tag] = ACTIONS(35), + [sym_end_tag] = ACTIONS(35), + [sym__implicit_end_tag] = ACTIONS(35), + [sym__erroneous_end_tag] = ACTIONS(35), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(35), }, [22] = { - [sym_attribute] = STATE(22), - [aux_sym_start_tag_repeat1] = STATE(22), - [sym__close_start_tag] = ACTIONS(67), - [sym__self_close_start_tag] = ACTIONS(67), - [sym__attribute_part] = ACTIONS(69), + [sym_attribute] = STATE(20), + [aux_sym_start_tag_repeat1] = STATE(20), + [sym__close_start_tag] = ACTIONS(41), + [sym__self_close_start_tag] = ACTIONS(80), + [sym_comment] = ACTIONS(9), + [sym_attribute_name] = ACTIONS(17), }, [23] = { - [sym__open_start_tag] = ACTIONS(31), - [sym_end_tag] = ACTIONS(31), - [sym__implicit_end_tag] = ACTIONS(31), - [sym__erroneous_end_tag] = ACTIONS(31), - [sym_text] = ACTIONS(31), + [sym__open_start_tag] = ACTIONS(47), + [sym_end_tag] = ACTIONS(47), + [sym__implicit_end_tag] = ACTIONS(47), + [sym__erroneous_end_tag] = ACTIONS(47), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(47), }, [24] = { - [sym_attribute] = STATE(22), - [aux_sym_start_tag_repeat1] = STATE(22), - [sym__close_start_tag] = ACTIONS(37), - [sym__self_close_start_tag] = ACTIONS(72), - [sym__attribute_part] = ACTIONS(13), + [sym__node] = STATE(26), + [sym_element] = STATE(26), + [sym_start_tag] = STATE(13), + [sym_self_closing_tag] = STATE(14), + [aux_sym_fragment_repeat1] = STATE(26), + [sym__open_start_tag] = ACTIONS(21), + [sym_end_tag] = ACTIONS(82), + [sym__implicit_end_tag] = ACTIONS(82), + [sym__erroneous_end_tag] = ACTIONS(55), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(55), }, [25] = { - [sym__open_start_tag] = ACTIONS(43), - [sym_end_tag] = ACTIONS(43), - [sym__implicit_end_tag] = ACTIONS(43), - [sym__erroneous_end_tag] = ACTIONS(43), - [sym_text] = ACTIONS(43), + [sym__open_start_tag] = ACTIONS(84), + [sym__erroneous_end_tag] = ACTIONS(84), + [sym_comment] = ACTIONS(9), + [ts_builtin_sym_end] = ACTIONS(84), + [sym_text] = ACTIONS(84), }, [26] = { - [sym__node] = STATE(28), - [sym_element] = STATE(14), - [sym_start_tag] = STATE(15), - [sym_self_closing_tag] = STATE(16), - [aux_sym_fragment_repeat1] = STATE(28), - [sym__open_start_tag] = ACTIONS(19), - [sym_end_tag] = ACTIONS(74), - [sym__implicit_end_tag] = ACTIONS(74), - [sym__erroneous_end_tag] = ACTIONS(23), - [sym_text] = ACTIONS(23), + [sym__node] = STATE(26), + [sym_element] = STATE(26), + [sym_start_tag] = STATE(13), + [sym_self_closing_tag] = STATE(14), + [aux_sym_fragment_repeat1] = STATE(26), + [sym__open_start_tag] = ACTIONS(86), + [sym_end_tag] = ACTIONS(63), + [sym__implicit_end_tag] = ACTIONS(63), + [sym__erroneous_end_tag] = ACTIONS(89), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(89), }, [27] = { - [sym__open_start_tag] = ACTIONS(76), - [sym__erroneous_end_tag] = ACTIONS(76), - [ts_builtin_sym_end] = ACTIONS(76), - [sym_text] = ACTIONS(76), - }, - [28] = { - [sym__node] = STATE(28), - [sym_element] = STATE(14), - [sym_start_tag] = STATE(15), - [sym_self_closing_tag] = STATE(16), - [aux_sym_fragment_repeat1] = STATE(28), - [sym__open_start_tag] = ACTIONS(78), - [sym_end_tag] = ACTIONS(55), - [sym__implicit_end_tag] = ACTIONS(55), - [sym__erroneous_end_tag] = ACTIONS(81), - [sym_text] = ACTIONS(81), - }, - [29] = { - [sym__close_start_tag] = ACTIONS(84), - [sym__self_close_start_tag] = ACTIONS(84), - [sym__attribute_part] = ACTIONS(84), - }, - [30] = { - [anon_sym_SQUOTE] = ACTIONS(86), - [aux_sym_SLASH_LBRACK_CARET_SQUOTE_RBRACK_PLUS_SLASH] = ACTIONS(88), - }, - [31] = { - [anon_sym_DQUOTE] = ACTIONS(86), - [aux_sym_SLASH_LBRACK_CARET_DQUOTE_RBRACK_PLUS_SLASH] = ACTIONS(90), - }, - [32] = { [sym__close_start_tag] = ACTIONS(92), [sym__self_close_start_tag] = ACTIONS(92), - [sym__attribute_part] = ACTIONS(92), + [sym_comment] = ACTIONS(9), + [sym_attribute_name] = ACTIONS(92), + }, + [28] = { + [sym_comment] = ACTIONS(9), + [anon_sym_SQUOTE] = ACTIONS(94), + [aux_sym_SLASH_LBRACK_CARET_SQUOTE_RBRACK_PLUS_SLASH] = ACTIONS(96), + }, + [29] = { + [sym_comment] = ACTIONS(9), + [anon_sym_DQUOTE] = ACTIONS(94), + [aux_sym_SLASH_LBRACK_CARET_DQUOTE_RBRACK_PLUS_SLASH] = ACTIONS(98), + }, + [30] = { + [sym__open_start_tag] = ACTIONS(73), + [sym_end_tag] = ACTIONS(73), + [sym__implicit_end_tag] = ACTIONS(73), + [sym__erroneous_end_tag] = ACTIONS(73), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(73), + }, + [31] = { + [sym__open_start_tag] = ACTIONS(84), + [sym_end_tag] = ACTIONS(84), + [sym__implicit_end_tag] = ACTIONS(84), + [sym__erroneous_end_tag] = ACTIONS(84), + [sym_comment] = ACTIONS(9), + [sym_text] = ACTIONS(84), + }, + [32] = { + [sym__close_start_tag] = ACTIONS(100), + [sym__self_close_start_tag] = ACTIONS(100), + [sym_comment] = ACTIONS(9), + [sym_attribute_name] = ACTIONS(100), }, [33] = { - [sym__open_start_tag] = ACTIONS(65), - [sym_end_tag] = ACTIONS(65), - [sym__implicit_end_tag] = ACTIONS(65), - [sym__erroneous_end_tag] = ACTIONS(65), - [sym_text] = ACTIONS(65), + [sym_comment] = ACTIONS(9), + [anon_sym_SQUOTE] = ACTIONS(102), }, [34] = { - [sym__open_start_tag] = ACTIONS(76), - [sym_end_tag] = ACTIONS(76), - [sym__implicit_end_tag] = ACTIONS(76), - [sym__erroneous_end_tag] = ACTIONS(76), - [sym_text] = ACTIONS(76), + [sym_comment] = ACTIONS(9), + [anon_sym_DQUOTE] = ACTIONS(102), }, [35] = { - [sym__close_start_tag] = ACTIONS(94), - [sym__self_close_start_tag] = ACTIONS(94), - [sym__attribute_part] = ACTIONS(94), - }, - [36] = { - [anon_sym_SQUOTE] = ACTIONS(96), - }, - [37] = { - [anon_sym_DQUOTE] = ACTIONS(96), - }, - [38] = { - [sym__close_start_tag] = ACTIONS(98), - [sym__self_close_start_tag] = ACTIONS(98), - [sym__attribute_part] = ACTIONS(98), + [sym__close_start_tag] = ACTIONS(104), + [sym__self_close_start_tag] = ACTIONS(104), + [sym_comment] = ACTIONS(9), + [sym_attribute_name] = ACTIONS(104), }, }; static TSParseActionEntry ts_parse_actions[] = { [0] = {.count = 0, .reusable = false}, [1] = {.count = 1, .reusable = true}, RECOVER(), - [3] = {.count = 1, .reusable = true}, SHIFT(2), - [5] = {.count = 1, .reusable = true}, SHIFT(3), - [7] = {.count = 1, .reusable = true}, REDUCE(sym_fragment, 0), - [9] = {.count = 1, .reusable = true}, SHIFT(8), - [11] = {.count = 1, .reusable = true}, SHIFT(9), - [13] = {.count = 1, .reusable = true}, SHIFT(10), - [15] = {.count = 1, .reusable = true}, REDUCE(sym__node, 1), - [17] = {.count = 1, .reusable = true}, ACCEPT_INPUT(), - [19] = {.count = 1, .reusable = true}, SHIFT(12), - [21] = {.count = 1, .reusable = true}, SHIFT(13), - [23] = {.count = 1, .reusable = true}, SHIFT(14), - [25] = {.count = 1, .reusable = true}, REDUCE(sym_element, 1), - [27] = {.count = 1, .reusable = true}, REDUCE(sym_fragment, 1), - [29] = {.count = 1, .reusable = true}, REDUCE(sym_start_tag, 2), - [31] = {.count = 1, .reusable = true}, REDUCE(sym_self_closing_tag, 2), - [33] = {.count = 1, .reusable = true}, REDUCE(sym_attribute, 1, .alias_sequence_id = 1), - [35] = {.count = 1, .reusable = true}, SHIFT(19), - [37] = {.count = 1, .reusable = true}, SHIFT(20), - [39] = {.count = 1, .reusable = true}, SHIFT(21), - [41] = {.count = 1, .reusable = true}, SHIFT(23), - [43] = {.count = 1, .reusable = true}, REDUCE(sym_element, 2), - [45] = {.count = 1, .reusable = true}, SHIFT(25), - [47] = {.count = 1, .reusable = true}, SHIFT(27), - [49] = {.count = 2, .reusable = true}, REDUCE(aux_sym_fragment_repeat1, 2), SHIFT_REPEAT(2), - [52] = {.count = 2, .reusable = true}, REDUCE(aux_sym_fragment_repeat1, 2), SHIFT_REPEAT(3), - [55] = {.count = 1, .reusable = true}, REDUCE(aux_sym_fragment_repeat1, 2), - [57] = {.count = 1, .reusable = true}, SHIFT(29), - [59] = {.count = 1, .reusable = true}, SHIFT(30), - [61] = {.count = 1, .reusable = true}, SHIFT(31), - [63] = {.count = 1, .reusable = true}, REDUCE(sym_start_tag, 3), - [65] = {.count = 1, .reusable = true}, REDUCE(sym_self_closing_tag, 3), - [67] = {.count = 1, .reusable = true}, REDUCE(aux_sym_start_tag_repeat1, 2), - [69] = {.count = 2, .reusable = true}, REDUCE(aux_sym_start_tag_repeat1, 2), SHIFT_REPEAT(10), - [72] = {.count = 1, .reusable = true}, SHIFT(33), - [74] = {.count = 1, .reusable = true}, SHIFT(34), - [76] = {.count = 1, .reusable = true}, REDUCE(sym_element, 3), - [78] = {.count = 2, .reusable = true}, REDUCE(aux_sym_fragment_repeat1, 2), SHIFT_REPEAT(12), - [81] = {.count = 2, .reusable = true}, REDUCE(aux_sym_fragment_repeat1, 2), SHIFT_REPEAT(14), - [84] = {.count = 1, .reusable = true}, REDUCE(sym_attribute, 3, .alias_sequence_id = 2), - [86] = {.count = 1, .reusable = false}, SHIFT(35), - [88] = {.count = 1, .reusable = true}, SHIFT(36), - [90] = {.count = 1, .reusable = true}, SHIFT(37), - [92] = {.count = 1, .reusable = true}, REDUCE(sym_attribute, 3, .alias_sequence_id = 1), - [94] = {.count = 1, .reusable = true}, REDUCE(sym_quoted_attribute_value, 2), - [96] = {.count = 1, .reusable = true}, SHIFT(38), - [98] = {.count = 1, .reusable = true}, REDUCE(sym_quoted_attribute_value, 3, .alias_sequence_id = 3), + [3] = {.count = 1, .reusable = false}, RECOVER(), + [5] = {.count = 1, .reusable = true}, SHIFT(2), + [7] = {.count = 1, .reusable = true}, SHIFT(6), + [9] = {.count = 1, .reusable = true}, SHIFT_EXTRA(), + [11] = {.count = 1, .reusable = true}, REDUCE(sym_fragment, 0), + [13] = {.count = 1, .reusable = true}, SHIFT(7), + [15] = {.count = 1, .reusable = true}, SHIFT(8), + [17] = {.count = 1, .reusable = true}, SHIFT(9), + [19] = {.count = 1, .reusable = true}, ACCEPT_INPUT(), + [21] = {.count = 1, .reusable = true}, SHIFT(11), + [23] = {.count = 1, .reusable = true}, SHIFT(12), + [25] = {.count = 1, .reusable = true}, SHIFT(15), + [27] = {.count = 1, .reusable = true}, REDUCE(sym_element, 1), + [29] = {.count = 1, .reusable = true}, SHIFT(16), + [31] = {.count = 1, .reusable = true}, REDUCE(sym_fragment, 1), + [33] = {.count = 1, .reusable = true}, REDUCE(sym_start_tag, 2), + [35] = {.count = 1, .reusable = true}, REDUCE(sym_self_closing_tag, 2), + [37] = {.count = 1, .reusable = true}, REDUCE(sym_attribute, 1), + [39] = {.count = 1, .reusable = true}, SHIFT(17), + [41] = {.count = 1, .reusable = true}, SHIFT(18), + [43] = {.count = 1, .reusable = true}, SHIFT(19), + [45] = {.count = 1, .reusable = true}, SHIFT(21), + [47] = {.count = 1, .reusable = true}, REDUCE(sym_element, 2), + [49] = {.count = 1, .reusable = true}, SHIFT(23), + [51] = {.count = 1, .reusable = true}, SHIFT(24), + [53] = {.count = 1, .reusable = true}, SHIFT(25), + [55] = {.count = 1, .reusable = true}, SHIFT(26), + [57] = {.count = 2, .reusable = true}, REDUCE(aux_sym_fragment_repeat1, 2), SHIFT_REPEAT(2), + [60] = {.count = 2, .reusable = true}, REDUCE(aux_sym_fragment_repeat1, 2), SHIFT_REPEAT(16), + [63] = {.count = 1, .reusable = true}, REDUCE(aux_sym_fragment_repeat1, 2), + [65] = {.count = 1, .reusable = true}, SHIFT(27), + [67] = {.count = 1, .reusable = true}, SHIFT(28), + [69] = {.count = 1, .reusable = true}, SHIFT(29), + [71] = {.count = 1, .reusable = true}, REDUCE(sym_start_tag, 3), + [73] = {.count = 1, .reusable = true}, REDUCE(sym_self_closing_tag, 3), + [75] = {.count = 1, .reusable = true}, REDUCE(aux_sym_start_tag_repeat1, 2), + [77] = {.count = 2, .reusable = true}, REDUCE(aux_sym_start_tag_repeat1, 2), SHIFT_REPEAT(9), + [80] = {.count = 1, .reusable = true}, SHIFT(30), + [82] = {.count = 1, .reusable = true}, SHIFT(31), + [84] = {.count = 1, .reusable = true}, REDUCE(sym_element, 3), + [86] = {.count = 2, .reusable = true}, REDUCE(aux_sym_fragment_repeat1, 2), SHIFT_REPEAT(11), + [89] = {.count = 2, .reusable = true}, REDUCE(aux_sym_fragment_repeat1, 2), SHIFT_REPEAT(26), + [92] = {.count = 1, .reusable = true}, REDUCE(sym_attribute, 3), + [94] = {.count = 1, .reusable = false}, SHIFT(32), + [96] = {.count = 1, .reusable = true}, SHIFT(33), + [98] = {.count = 1, .reusable = true}, SHIFT(34), + [100] = {.count = 1, .reusable = true}, REDUCE(sym_quoted_attribute_value, 2), + [102] = {.count = 1, .reusable = true}, SHIFT(35), + [104] = {.count = 1, .reusable = true}, REDUCE(sym_quoted_attribute_value, 3, .alias_sequence_id = 1), }; void *tree_sitter_html_external_scanner_create(); diff --git a/src/scanner.cc b/src/scanner.cc index 008b856..5e3e0ad 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -18,6 +18,7 @@ enum TokenType { END_TAG, IMPLICIT_END_TAG, ERRONEOUS_END_TAG, + COMMENT, }; struct Scanner { @@ -66,7 +67,43 @@ struct Scanner { return tag_name; } + bool comment(TSLexer *lexer) { + if (lexer->lookahead != '-') return false; + lexer->advance(lexer, false); + if (lexer->lookahead != '-') return false; + lexer->advance(lexer, false); + + unsigned dashes = 0; + auto c = lexer->lookahead; + while (c) { + switch (c) { + case '-': + ++dashes; + break; + case '>': + if (dashes >= 2) { + lexer->result_symbol = COMMENT; + lexer->advance(lexer, false); + lexer->mark_end(lexer); + return true; + } + break; + default: + dashes = 0; + } + lexer->advance(lexer, false); + c = lexer->lookahead; + } + return false; + } + bool start_tag(TSLexer *lexer) { + if (!tags.empty() && tags.back().is_void()) { + tags.pop_back(); + lexer->result_symbol = IMPLICIT_END_TAG; + return true; + } + auto tag_name = scan_tag_name(lexer); if (tag_name.empty()) return false; @@ -115,15 +152,22 @@ struct Scanner { switch (lexer->lookahead) { case '<': - if (valid_symbols[OPEN_START_TAG] || valid_symbols[END_TAG]) { - lexer->mark_end(lexer); + lexer->mark_end(lexer); + lexer->advance(lexer, false); + + if (lexer->lookahead == '!') { lexer->advance(lexer, false); + return comment(lexer); + } + + if (valid_symbols[OPEN_START_TAG] || valid_symbols[END_TAG]) { if (lexer->lookahead == '/') { lexer->advance(lexer, false); return end_tag(lexer); } return start_tag(lexer); } + break; case '>': diff --git a/test.html b/test.html deleted file mode 100644 index 3f121b3..0000000 --- a/test.html +++ /dev/null @@ -1 +0,0 @@ -