Parse tag names as separate tokens
This commit is contained in:
parent
4d11a75675
commit
5f2a122de7
147
corpus/main.txt
147
corpus/main.txt
|
@ -6,9 +6,9 @@ Tags
|
|||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag)
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(end_tag)))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
===================================
|
||||
Tags with attributes
|
||||
|
@ -19,6 +19,7 @@ Tags with attributes
|
|||
(fragment
|
||||
(element
|
||||
(start_tag
|
||||
(tag_name)
|
||||
(attribute
|
||||
(attribute_name)
|
||||
(attribute_value))
|
||||
|
@ -27,7 +28,7 @@ Tags with attributes
|
|||
(quoted_attribute_value (attribute_value)))
|
||||
(attribute
|
||||
(attribute_name)))
|
||||
(end_tag)))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
===================================
|
||||
Nested tags
|
||||
|
@ -41,17 +42,19 @@ Nested tags
|
|||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag)
|
||||
(element
|
||||
(start_tag)
|
||||
(text)
|
||||
(end_tag))
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element
|
||||
(start_tag)
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(end_tag))
|
||||
(end_tag)))
|
||||
(end_tag (tag_name)))
|
||||
(text)
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(end_tag (tag_name)))
|
||||
(text)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
Void tags
|
||||
|
@ -61,16 +64,18 @@ Void tags
|
|||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag)
|
||||
(start_tag (tag_name))
|
||||
(element
|
||||
(start_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (quoted_attribute_value (attribute_value)))))
|
||||
(element (start_tag))
|
||||
(element (start_tag (tag_name)))
|
||||
(element
|
||||
(self_closing_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (attribute_value))
|
||||
(attribute (attribute_name) (attribute_value))))
|
||||
(end_tag)))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
Comments
|
||||
|
@ -83,15 +88,16 @@ Comments
|
|||
|
||||
(fragment
|
||||
(comment)
|
||||
(text)
|
||||
(element
|
||||
(start_tag)
|
||||
(start_tag (tag_name))
|
||||
(comment)
|
||||
(end_tag)))
|
||||
(text)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
Raw text elements
|
||||
==================================
|
||||
|
||||
<script>
|
||||
</s
|
||||
</sc
|
||||
|
@ -108,11 +114,14 @@ Raw text elements
|
|||
|
||||
(fragment
|
||||
(raw_element
|
||||
(start_tag)
|
||||
(end_tag))
|
||||
(start_tag (tag_name))
|
||||
(raw_text)
|
||||
(end_tag (tag_name)))
|
||||
(text)
|
||||
(raw_element
|
||||
(start_tag)
|
||||
(end_tag))
|
||||
(start_tag (tag_name))
|
||||
(raw_text)
|
||||
(end_tag (tag_name)))
|
||||
(text))
|
||||
|
||||
==================================
|
||||
|
@ -146,10 +155,11 @@ LI elements without close tags
|
|||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag)
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text))
|
||||
(end_tag)))
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
======================================
|
||||
DT and DL elements without close tags
|
||||
|
@ -165,13 +175,14 @@ DT and DL elements without close tags
|
|||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag)
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text))
|
||||
(end_tag)))
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
======================================
|
||||
P elements without close tags
|
||||
|
@ -184,11 +195,12 @@ P elements without close tags
|
|||
---
|
||||
|
||||
(fragment
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text) (end_tag))
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text) (end_tag)))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(text)
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name))))
|
||||
|
||||
======================================
|
||||
Ruby annotation elements without close tags
|
||||
|
@ -198,17 +210,16 @@ Ruby annotation elements without close tags
|
|||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag)
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text))
|
||||
(end_tag)))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
=======================================
|
||||
COLGROUP elements without end tags
|
||||
=======================================
|
||||
|
||||
<table>
|
||||
<colgroup>
|
||||
<col style="background-color: #0f0">
|
||||
|
@ -223,18 +234,29 @@ COLGROUP elements without end tags
|
|||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag)
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element
|
||||
(start_tag)
|
||||
(element (start_tag (attribute (attribute_name) (quoted_attribute_value (attribute_value)))))
|
||||
(element (start_tag (attribute (attribute_name) (quoted_attribute_value (attribute_value))))))
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element (start_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (quoted_attribute_value (attribute_value)))))
|
||||
(element (start_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (quoted_attribute_value (attribute_value))))))
|
||||
(element
|
||||
(start_tag)
|
||||
(element (start_tag) (text) (end_tag))
|
||||
(element (start_tag) (text) (end_tag))
|
||||
(element (start_tag) (text) (end_tag))
|
||||
(end_tag))
|
||||
(end_tag)))
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(text)
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(text)
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(text)
|
||||
(end_tag (tag_name)))
|
||||
(text)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
=========================================
|
||||
TR, TD, and TH elements without end tags
|
||||
|
@ -251,13 +273,16 @@ TR, TD, and TH elements without end tags
|
|||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag)
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element
|
||||
(start_tag)
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text)))
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text)))
|
||||
(element
|
||||
(start_tag)
|
||||
(element (start_tag) (text))
|
||||
(element (start_tag) (text)))
|
||||
(end_tag)))
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text)))
|
||||
(end_tag (tag_name))))
|
||||
|
|
44
grammar.js
44
grammar.js
|
@ -7,14 +7,13 @@ module.exports = grammar({
|
|||
],
|
||||
|
||||
externals: $ => [
|
||||
$._open_start_tag,
|
||||
$._open_raw_start_tag,
|
||||
$._close_start_tag,
|
||||
$._self_close_start_tag,
|
||||
$.end_tag,
|
||||
$._start_tag_name,
|
||||
$._start_raw_tag_name,
|
||||
$._end_tag_name,
|
||||
$.erroneous_end_tag_name,
|
||||
'/>',
|
||||
$._implicit_end_tag,
|
||||
$._erroneous_end_tag,
|
||||
$._raw_text,
|
||||
$.raw_text,
|
||||
$.comment,
|
||||
],
|
||||
|
||||
|
@ -31,8 +30,8 @@ module.exports = grammar({
|
|||
_node: $ => choice(
|
||||
$.doctype,
|
||||
$.text,
|
||||
$._erroneous_end_tag,
|
||||
$.element,
|
||||
$.erroneous_end_tag,
|
||||
$.raw_element
|
||||
),
|
||||
|
||||
|
@ -47,26 +46,41 @@ module.exports = grammar({
|
|||
|
||||
raw_element: $ => seq(
|
||||
alias($._raw_start_tag, $.start_tag),
|
||||
optional($._raw_text),
|
||||
optional($.raw_text),
|
||||
$.end_tag
|
||||
),
|
||||
|
||||
start_tag: $ => seq(
|
||||
$._open_start_tag,
|
||||
'<',
|
||||
alias($._start_tag_name, $.tag_name),
|
||||
repeat($.attribute),
|
||||
$._close_start_tag
|
||||
'>'
|
||||
),
|
||||
|
||||
_raw_start_tag: $ => seq(
|
||||
$._open_raw_start_tag,
|
||||
'<',
|
||||
alias($._start_raw_tag_name, $.tag_name),
|
||||
repeat($.attribute),
|
||||
$._close_start_tag
|
||||
'>'
|
||||
),
|
||||
|
||||
self_closing_tag: $ => seq(
|
||||
$._open_start_tag,
|
||||
'<',
|
||||
alias($._start_tag_name, $.tag_name),
|
||||
repeat($.attribute),
|
||||
$._self_close_start_tag
|
||||
'/>'
|
||||
),
|
||||
|
||||
end_tag: $ => seq(
|
||||
'</',
|
||||
alias($._end_tag_name, $.tag_name),
|
||||
'>'
|
||||
),
|
||||
|
||||
erroneous_end_tag: $ => seq(
|
||||
'</',
|
||||
$.erroneous_end_tag_name,
|
||||
'>'
|
||||
),
|
||||
|
||||
attribute: $ => seq(
|
||||
|
|
|
@ -42,11 +42,11 @@
|
|||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_erroneous_end_tag"
|
||||
"name": "element"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "element"
|
||||
"name": "erroneous_end_tag"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
|
@ -109,7 +109,7 @@
|
|||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_raw_text"
|
||||
"name": "raw_text"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
|
@ -126,8 +126,17 @@
|
|||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "<"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_open_start_tag"
|
||||
"name": "_start_tag_name"
|
||||
},
|
||||
"named": true,
|
||||
"value": "tag_name"
|
||||
},
|
||||
{
|
||||
"type": "REPEAT",
|
||||
|
@ -137,8 +146,8 @@
|
|||
}
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_close_start_tag"
|
||||
"type": "STRING",
|
||||
"value": ">"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -146,8 +155,17 @@
|
|||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "<"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_open_raw_start_tag"
|
||||
"name": "_start_raw_tag_name"
|
||||
},
|
||||
"named": true,
|
||||
"value": "tag_name"
|
||||
},
|
||||
{
|
||||
"type": "REPEAT",
|
||||
|
@ -157,8 +175,8 @@
|
|||
}
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_close_start_tag"
|
||||
"type": "STRING",
|
||||
"value": ">"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -166,8 +184,17 @@
|
|||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "<"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_open_start_tag"
|
||||
"name": "_start_tag_name"
|
||||
},
|
||||
"named": true,
|
||||
"value": "tag_name"
|
||||
},
|
||||
{
|
||||
"type": "REPEAT",
|
||||
|
@ -177,8 +204,47 @@
|
|||
}
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "/>"
|
||||
}
|
||||
]
|
||||
},
|
||||
"end_tag": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "</"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_self_close_start_tag"
|
||||
"name": "_end_tag_name"
|
||||
},
|
||||
"named": true,
|
||||
"value": "tag_name"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ">"
|
||||
}
|
||||
]
|
||||
},
|
||||
"erroneous_end_tag": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "</"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "erroneous_end_tag_name"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ">"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -313,23 +379,23 @@
|
|||
"externals": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_open_start_tag"
|
||||
"name": "_start_tag_name"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_open_raw_start_tag"
|
||||
"name": "_start_raw_tag_name"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_close_start_tag"
|
||||
"name": "_end_tag_name"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_self_close_start_tag"
|
||||
"name": "erroneous_end_tag_name"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "end_tag"
|
||||
"type": "STRING",
|
||||
"value": "/>"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
|
@ -337,11 +403,7 @@
|
|||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_erroneous_end_tag"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_raw_text"
|
||||
"name": "raw_text"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -11,13 +11,12 @@ using std::vector;
|
|||
using std::string;
|
||||
|
||||
enum TokenType {
|
||||
OPEN_START_TAG,
|
||||
OPEN_RAW_START_TAG,
|
||||
CLOSE_START_TAG,
|
||||
SELF_CLOSE_START_TAG,
|
||||
END_TAG,
|
||||
START_TAG_NAME,
|
||||
START_RAW_TAG_NAME,
|
||||
END_TAG_NAME,
|
||||
ERRONEOUS_END_TAG_NAME,
|
||||
SELF_CLOSING_TAG_DELIMITER,
|
||||
IMPLICIT_END_TAG,
|
||||
ERRONEOUS_END_TAG,
|
||||
RAW_TEXT,
|
||||
COMMENT
|
||||
};
|
||||
|
@ -68,7 +67,7 @@ struct Scanner {
|
|||
return tag_name;
|
||||
}
|
||||
|
||||
bool comment(TSLexer *lexer) {
|
||||
bool scan_comment(TSLexer *lexer) {
|
||||
if (lexer->lookahead != '-') return false;
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead != '-') return false;
|
||||
|
@ -98,7 +97,7 @@ struct Scanner {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool raw_text(TSLexer *lexer) {
|
||||
bool scan_raw_text(TSLexer *lexer) {
|
||||
if (!tags.size()) return false;
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
|
@ -123,69 +122,90 @@ struct Scanner {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool start_tag(TSLexer *lexer) {
|
||||
bool scan_implicit_end_tag(TSLexer *lexer) {
|
||||
Tag *parent = tags.empty() ? nullptr : &tags.back();
|
||||
|
||||
bool is_closing_tag = false;
|
||||
if (lexer->lookahead == '/') {
|
||||
is_closing_tag = true;
|
||||
lexer->advance(lexer, false);
|
||||
} else {
|
||||
if (parent && parent->is_void()) {
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = IMPLICIT_END_TAG;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
auto tag_name = scan_tag_name(lexer);
|
||||
if (tag_name.empty()) return false;
|
||||
|
||||
Tag next_tag = Tag::for_name(tag_name);
|
||||
|
||||
if (parent && !parent->can_contain(next_tag)) {
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = IMPLICIT_END_TAG;
|
||||
return true;
|
||||
}
|
||||
|
||||
tags.push_back(next_tag);
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = next_tag.is_raw() ? OPEN_RAW_START_TAG : OPEN_START_TAG;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool end_tag(TSLexer *lexer) {
|
||||
auto tag_name = scan_tag_name(lexer);
|
||||
if (tag_name.empty()) return false;
|
||||
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
Tag tag = Tag::for_name(tag_name);
|
||||
|
||||
if (is_closing_tag) {
|
||||
// The tag correctly closes the topmost element on the stack
|
||||
if (tag == tags.back()) {
|
||||
tags.pop_back();
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = END_TAG;
|
||||
return true;
|
||||
}
|
||||
if (next_tag == tags.back()) return false;
|
||||
|
||||
// Otherwise, dig deeper and queue implicit end tags (to be nice in
|
||||
// the case of malformed HTML)
|
||||
if (std::find(tags.begin(), tags.end(), tag) != tags.end()) {
|
||||
if (std::find(tags.begin(), tags.end(), next_tag) != tags.end()) {
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = IMPLICIT_END_TAG;
|
||||
return true;
|
||||
}
|
||||
} else if (parent && !parent->can_contain(next_tag)) {
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = IMPLICIT_END_TAG;
|
||||
return true;
|
||||
}
|
||||
|
||||
// You closed a tag you never opened 😭
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = ERRONEOUS_END_TAG;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool scan_start_tag_name(TSLexer *lexer) {
|
||||
auto tag_name = scan_tag_name(lexer);
|
||||
if (tag_name.empty()) return false;
|
||||
Tag tag = Tag::for_name(tag_name);
|
||||
tags.push_back(tag);
|
||||
if (tag.is_raw()) {
|
||||
lexer->result_symbol = START_RAW_TAG_NAME;
|
||||
} else {
|
||||
lexer->result_symbol = START_TAG_NAME;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool scan_end_tag_name(TSLexer *lexer) {
|
||||
auto tag_name = scan_tag_name(lexer);
|
||||
if (tag_name.empty()) return false;
|
||||
Tag tag = Tag::for_name(tag_name);
|
||||
if (!tags.empty() && tags.back() == tag) {
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = END_TAG_NAME;
|
||||
} else {
|
||||
lexer->result_symbol = ERRONEOUS_END_TAG_NAME;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool scan_self_closing_tag_delimiter(TSLexer *lexer) {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '>') {
|
||||
lexer->advance(lexer, false);
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool scan(TSLexer *lexer, const bool *valid_symbols) {
|
||||
while (iswspace(lexer->lookahead)) {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
if (valid_symbols[RAW_TEXT] && !valid_symbols[OPEN_START_TAG] && !valid_symbols[CLOSE_START_TAG]) {
|
||||
return raw_text(lexer);
|
||||
if (valid_symbols[RAW_TEXT] && !valid_symbols[START_TAG_NAME] && !valid_symbols[END_TAG_NAME]) {
|
||||
return scan_raw_text(lexer);
|
||||
}
|
||||
|
||||
switch (lexer->lookahead) {
|
||||
|
@ -195,38 +215,26 @@ struct Scanner {
|
|||
|
||||
if (lexer->lookahead == '!') {
|
||||
lexer->advance(lexer, false);
|
||||
return comment(lexer);
|
||||
return scan_comment(lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[OPEN_START_TAG] || valid_symbols[END_TAG]) {
|
||||
if (lexer->lookahead == '/') {
|
||||
lexer->advance(lexer, false);
|
||||
return end_tag(lexer);
|
||||
}
|
||||
return start_tag(lexer);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case '>':
|
||||
if (valid_symbols[CLOSE_START_TAG]) {
|
||||
lexer->advance(lexer, false);
|
||||
lexer->result_symbol = CLOSE_START_TAG;
|
||||
return true;
|
||||
if (valid_symbols[IMPLICIT_END_TAG]) {
|
||||
return scan_implicit_end_tag(lexer);
|
||||
}
|
||||
break;
|
||||
|
||||
case '/':
|
||||
if (valid_symbols[SELF_CLOSE_START_TAG]) {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '>') {
|
||||
lexer->advance(lexer, false);
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = SELF_CLOSE_START_TAG;
|
||||
return true;
|
||||
}
|
||||
if (valid_symbols[SELF_CLOSING_TAG_DELIMITER]) {
|
||||
return scan_self_closing_tag_delimiter(lexer);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
if (valid_symbols[START_TAG_NAME] || valid_symbols[END_TAG_NAME]) {
|
||||
return valid_symbols[START_TAG_NAME]
|
||||
? scan_start_tag_name(lexer)
|
||||
: scan_end_tag_name(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
Loading…
Reference in New Issue