Add comments, allow slashes in unquoted attribute values

Co-Authored-By: Ashi Krishan <queerviolet@github.com>
This commit is contained in:
Max Brunsfeld 2018-06-11 15:36:18 -07:00
parent 6ee8f55084
commit e56df0fc7f
6 changed files with 591 additions and 452 deletions

View File

@ -71,3 +71,19 @@ Void tags
(attribute (attribute_name) (attribute_value)) (attribute (attribute_name) (attribute_value))
(attribute (attribute_name) (attribute_value)))) (attribute (attribute_name) (attribute_value))))
(end_tag))) (end_tag)))
==================================
Comments
==================================
<!-- hello -->
<div>
<!-- <span>something</span> -->
</div>
---
(fragment
(comment)
(element
(start_tag)
(comment)
(end_tag)))

View File

@ -1,6 +1,11 @@
module.exports = grammar({ module.exports = grammar({
name: 'html', name: 'html',
extras: $ => [
$.comment,
/\s+/,
],
externals: $ => [ externals: $ => [
$._open_start_tag, $._open_start_tag,
$._close_start_tag, $._close_start_tag,
@ -8,6 +13,7 @@ module.exports = grammar({
$.end_tag, $.end_tag,
$._implicit_end_tag, $._implicit_end_tag,
$._erroneous_end_tag, $._erroneous_end_tag,
$.comment,
], ],
rules: { rules: {
@ -41,17 +47,19 @@ module.exports = grammar({
), ),
attribute: $ => seq( attribute: $ => seq(
alias($._attribute_part, $.attribute_name), $.attribute_name,
optional(seq( optional(seq(
'=', '=',
choice( choice(
alias($._attribute_part, $.attribute_value), $.attribute_value,
$.quoted_attribute_value $.quoted_attribute_value
) )
)) ))
), ),
_attribute_part: $ => /[^<>"'/=\s]+/, attribute_name: $ => /[^<>"'/=\s]+/,
attribute_value: $ => /[^<>"'=\s]+/,
quoted_attribute_value: $ => choice( quoted_attribute_value: $ => choice(
seq("'", optional(alias(/[^']+/, $.attribute_value)), "'"), seq("'", optional(alias(/[^']+/, $.attribute_value)), "'"),

34
src/grammar.json vendored
View File

@ -107,13 +107,8 @@
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
{ {
"type": "ALIAS", "type": "SYMBOL",
"content": { "name": "attribute_name"
"type": "SYMBOL",
"name": "_attribute_part"
},
"named": true,
"value": "attribute_name"
}, },
{ {
"type": "CHOICE", "type": "CHOICE",
@ -129,13 +124,8 @@
"type": "CHOICE", "type": "CHOICE",
"members": [ "members": [
{ {
"type": "ALIAS", "type": "SYMBOL",
"content": { "name": "attribute_value"
"type": "SYMBOL",
"name": "_attribute_part"
},
"named": true,
"value": "attribute_value"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@ -152,10 +142,14 @@
} }
] ]
}, },
"_attribute_part": { "attribute_name": {
"type": "PATTERN", "type": "PATTERN",
"value": "[^<>\"'\\/=\\s]+" "value": "[^<>\"'\\/=\\s]+"
}, },
"attribute_value": {
"type": "PATTERN",
"value": "[^<>\"'=\\s]+"
},
"quoted_attribute_value": { "quoted_attribute_value": {
"type": "CHOICE", "type": "CHOICE",
"members": [ "members": [
@ -227,9 +221,13 @@
} }
}, },
"extras": [ "extras": [
{
"type": "SYMBOL",
"name": "comment"
},
{ {
"type": "PATTERN", "type": "PATTERN",
"value": "\\s" "value": "\\s+"
} }
], ],
"conflicts": [], "conflicts": [],
@ -257,6 +255,10 @@
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "_erroneous_end_tag" "name": "_erroneous_end_tag"
},
{
"type": "SYMBOL",
"name": "comment"
} }
], ],
"inline": [] "inline": []

930
src/parser.c vendored

File diff suppressed because it is too large Load Diff

48
src/scanner.cc vendored
View File

@ -18,6 +18,7 @@ enum TokenType {
END_TAG, END_TAG,
IMPLICIT_END_TAG, IMPLICIT_END_TAG,
ERRONEOUS_END_TAG, ERRONEOUS_END_TAG,
COMMENT,
}; };
struct Scanner { struct Scanner {
@ -66,7 +67,43 @@ struct Scanner {
return tag_name; return tag_name;
} }
bool comment(TSLexer *lexer) {
if (lexer->lookahead != '-') return false;
lexer->advance(lexer, false);
if (lexer->lookahead != '-') return false;
lexer->advance(lexer, false);
unsigned dashes = 0;
auto c = lexer->lookahead;
while (c) {
switch (c) {
case '-':
++dashes;
break;
case '>':
if (dashes >= 2) {
lexer->result_symbol = COMMENT;
lexer->advance(lexer, false);
lexer->mark_end(lexer);
return true;
}
break;
default:
dashes = 0;
}
lexer->advance(lexer, false);
c = lexer->lookahead;
}
return false;
}
bool start_tag(TSLexer *lexer) { bool start_tag(TSLexer *lexer) {
if (!tags.empty() && tags.back().is_void()) {
tags.pop_back();
lexer->result_symbol = IMPLICIT_END_TAG;
return true;
}
auto tag_name = scan_tag_name(lexer); auto tag_name = scan_tag_name(lexer);
if (tag_name.empty()) return false; if (tag_name.empty()) return false;
@ -115,15 +152,22 @@ struct Scanner {
switch (lexer->lookahead) { switch (lexer->lookahead) {
case '<': case '<':
if (valid_symbols[OPEN_START_TAG] || valid_symbols[END_TAG]) { lexer->mark_end(lexer);
lexer->mark_end(lexer); lexer->advance(lexer, false);
if (lexer->lookahead == '!') {
lexer->advance(lexer, false); lexer->advance(lexer, false);
return comment(lexer);
}
if (valid_symbols[OPEN_START_TAG] || valid_symbols[END_TAG]) {
if (lexer->lookahead == '/') { if (lexer->lookahead == '/') {
lexer->advance(lexer, false); lexer->advance(lexer, false);
return end_tag(lexer); return end_tag(lexer);
} }
return start_tag(lexer); return start_tag(lexer);
} }
break; break;
case '>': case '>':

View File

@ -1 +0,0 @@
<form><img src=something.png><br><input type=submit value=Ok /></form>