Add comments, allow slashes in unquoted attribute values

Co-Authored-By: Ashi Krishan <queerviolet@github.com>
This commit is contained in:
Max Brunsfeld 2018-06-11 15:36:18 -07:00
parent 6ee8f55084
commit e56df0fc7f
6 changed files with 591 additions and 452 deletions

View File

@ -71,3 +71,19 @@ Void tags
(attribute (attribute_name) (attribute_value))
(attribute (attribute_name) (attribute_value))))
(end_tag)))
==================================
Comments
==================================
<!-- hello -->
<div>
<!-- <span>something</span> -->
</div>
---
(fragment
(comment)
(element
(start_tag)
(comment)
(end_tag)))

View File

@ -1,6 +1,11 @@
module.exports = grammar({
name: 'html',
extras: $ => [
$.comment,
/\s+/,
],
externals: $ => [
$._open_start_tag,
$._close_start_tag,
@ -8,6 +13,7 @@ module.exports = grammar({
$.end_tag,
$._implicit_end_tag,
$._erroneous_end_tag,
$.comment,
],
rules: {
@ -41,17 +47,19 @@ module.exports = grammar({
),
attribute: $ => seq(
alias($._attribute_part, $.attribute_name),
$.attribute_name,
optional(seq(
'=',
choice(
alias($._attribute_part, $.attribute_value),
$.attribute_value,
$.quoted_attribute_value
)
))
),
_attribute_part: $ => /[^<>"'/=\s]+/,
attribute_name: $ => /[^<>"'/=\s]+/,
attribute_value: $ => /[^<>"'=\s]+/,
quoted_attribute_value: $ => choice(
seq("'", optional(alias(/[^']+/, $.attribute_value)), "'"),

30
src/grammar.json vendored
View File

@ -107,13 +107,8 @@
"type": "SEQ",
"members": [
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_attribute_part"
},
"named": true,
"value": "attribute_name"
"name": "attribute_name"
},
{
"type": "CHOICE",
@ -129,13 +124,8 @@
"type": "CHOICE",
"members": [
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_attribute_part"
},
"named": true,
"value": "attribute_value"
"name": "attribute_value"
},
{
"type": "SYMBOL",
@ -152,10 +142,14 @@
}
]
},
"_attribute_part": {
"attribute_name": {
"type": "PATTERN",
"value": "[^<>\"'\\/=\\s]+"
},
"attribute_value": {
"type": "PATTERN",
"value": "[^<>\"'=\\s]+"
},
"quoted_attribute_value": {
"type": "CHOICE",
"members": [
@ -227,9 +221,13 @@
}
},
"extras": [
{
"type": "SYMBOL",
"name": "comment"
},
{
"type": "PATTERN",
"value": "\\s"
"value": "\\s+"
}
],
"conflicts": [],
@ -257,6 +255,10 @@
{
"type": "SYMBOL",
"name": "_erroneous_end_tag"
},
{
"type": "SYMBOL",
"name": "comment"
}
],
"inline": []

930
src/parser.c vendored

File diff suppressed because it is too large Load Diff

46
src/scanner.cc vendored
View File

@ -18,6 +18,7 @@ enum TokenType {
END_TAG,
IMPLICIT_END_TAG,
ERRONEOUS_END_TAG,
COMMENT,
};
struct Scanner {
@ -66,7 +67,43 @@ struct Scanner {
return tag_name;
}
bool comment(TSLexer *lexer) {
if (lexer->lookahead != '-') return false;
lexer->advance(lexer, false);
if (lexer->lookahead != '-') return false;
lexer->advance(lexer, false);
unsigned dashes = 0;
auto c = lexer->lookahead;
while (c) {
switch (c) {
case '-':
++dashes;
break;
case '>':
if (dashes >= 2) {
lexer->result_symbol = COMMENT;
lexer->advance(lexer, false);
lexer->mark_end(lexer);
return true;
}
break;
default:
dashes = 0;
}
lexer->advance(lexer, false);
c = lexer->lookahead;
}
return false;
}
bool start_tag(TSLexer *lexer) {
if (!tags.empty() && tags.back().is_void()) {
tags.pop_back();
lexer->result_symbol = IMPLICIT_END_TAG;
return true;
}
auto tag_name = scan_tag_name(lexer);
if (tag_name.empty()) return false;
@ -115,15 +152,22 @@ struct Scanner {
switch (lexer->lookahead) {
case '<':
if (valid_symbols[OPEN_START_TAG] || valid_symbols[END_TAG]) {
lexer->mark_end(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '!') {
lexer->advance(lexer, false);
return comment(lexer);
}
if (valid_symbols[OPEN_START_TAG] || valid_symbols[END_TAG]) {
if (lexer->lookahead == '/') {
lexer->advance(lexer, false);
return end_tag(lexer);
}
return start_tag(lexer);
}
break;
case '>':

View File

@ -1 +0,0 @@
<form><img src=something.png><br><input type=submit value=Ok /></form>