Ignore heredoc end delimiter unless in a new line (#28)

This commit is contained in:
Jonatan Kłosko 2022-03-04 16:33:24 +01:00 committed by GitHub
parent a11a686303
commit 8c8c14af1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 86 additions and 16 deletions

View File

@ -888,11 +888,11 @@ function defineQuoted(start, end, name) {
[`_quoted_i_${name}`]: ($) => [`_quoted_i_${name}`]: ($) =>
seq( seq(
field("quoted_start", start), field("quoted_start", start),
optional(alias($[`_quoted_content_i_${name}`], $.quoted_content)),
repeat( repeat(
choice( seq(
alias($[`_quoted_content_i_${name}`], $.quoted_content), choice($.interpolation, $.escape_sequence),
$.interpolation, optional(alias($[`_quoted_content_i_${name}`], $.quoted_content))
$.escape_sequence
) )
), ),
field("quoted_end", end) field("quoted_end", end)
@ -901,11 +901,12 @@ function defineQuoted(start, end, name) {
[`_quoted_${name}`]: ($) => [`_quoted_${name}`]: ($) =>
seq( seq(
field("quoted_start", start), field("quoted_start", start),
optional(alias($[`_quoted_content_${name}`], $.quoted_content)),
repeat( repeat(
choice( seq(
alias($[`_quoted_content_${name}`], $.quoted_content), // The end delimiter may be escaped in non-interpolating strings too
// The end delimiter may always be escaped $.escape_sequence,
$.escape_sequence optional(alias($[`_quoted_content_${name}`], $.quoted_content))
) )
), ),
field("quoted_end", end) field("quoted_end", end)

View File

@ -174,7 +174,22 @@ int8_t find_quoted_token_info(const bool* valid_symbols) {
bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) { bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) {
lexer->result_symbol = info.token_type; lexer->result_symbol = info.token_type;
bool is_heredoc = (info.delimiter_length == 3);
for (bool has_content = false; true; has_content = true) { for (bool has_content = false; true; has_content = true) {
bool newline = false;
if (is_newline(lexer->lookahead)) {
advance(lexer);
has_content = true;
newline = true;
while (is_whitespace(lexer->lookahead)) {
advance(lexer);
}
}
lexer->mark_end(lexer); lexer->mark_end(lexer);
if (lexer->lookahead == info.end_delimiter) { if (lexer->lookahead == info.end_delimiter) {
@ -189,7 +204,7 @@ bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) {
} }
} }
if (length == info.delimiter_length) { if (length == info.delimiter_length && (!is_heredoc || newline)) {
return has_content; return has_content;
} }
} else { } else {
@ -199,16 +214,18 @@ bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) {
return has_content; return has_content;
} }
} else if (lexer->lookahead == '\\') { } else if (lexer->lookahead == '\\') {
if (info.supports_interpol) { advance(lexer);
if (is_heredoc && lexer->lookahead == '\n') {
// We need to know about the newline to correctly recognise
// heredoc end delimiter, so we intentionally ignore escaping
} else if (info.supports_interpol || lexer->lookahead == info.end_delimiter) {
return has_content; return has_content;
} else {
advance(lexer);
if (lexer->lookahead == info.end_delimiter) {
return has_content;
}
} }
} else if (lexer->lookahead == '\0') { } else if (lexer->lookahead == '\0') {
return false; // If we reached the end of the file, this means there is no
// end delimiter, so the syntax is invalid. In that case we
// want to treat all the scanned content as quoted content.
return has_content;
} else { } else {
advance(lexer); advance(lexer);
} }

View File

@ -1,3 +1,14 @@
=====================================
empty
=====================================
""
---
(source
(string))
===================================== =====================================
single line single line
===================================== =====================================
@ -171,6 +182,47 @@ this is #{
(quoted_content))) (quoted_content)))
(quoted_content))) (quoted_content)))
=====================================
heredoc / delimiter in the middle
=====================================
"""
hey """
"""
---
(source
(string
(quoted_content)))
=====================================
heredoc / escaped newline (ignored)
=====================================
"""
hey \
"""
"""
hey \
"""
"""
hey \
there
"""
---
(source
(string
(quoted_content))
(string
(quoted_content))
(string
(quoted_content)))
===================================== =====================================
heredoc / escaped delimiter heredoc / escaped delimiter
===================================== =====================================