Ignore heredoc end delimiter unless in a new line (#28)

2022-03-04 16:33:24 +01:00 · 2022-03-04 16:33:24 +01:00 · 8c8c14af1a
parent a11a686303
commit 8c8c14af1a
3 changed files with 86 additions and 16 deletions
--- a/grammar.js
+++ b/grammar.js
@ -888,11 +888,11 @@ function defineQuoted(start, end, name) {
    [`_quoted_i_${name}`]: ($) =>
      seq(
        field("quoted_start", start),
        optional(alias($[`_quoted_content_i_${name}`], $.quoted_content)),
        repeat(
-          choice(
+          seq(
-            alias($[`_quoted_content_i_${name}`], $.quoted_content),
+            choice($.interpolation, $.escape_sequence),
-            $.interpolation,
+            optional(alias($[`_quoted_content_i_${name}`], $.quoted_content))
            $.escape_sequence
          )
        ),
        field("quoted_end", end)
@ -901,11 +901,12 @@ function defineQuoted(start, end, name) {
    [`_quoted_${name}`]: ($) =>
      seq(
        field("quoted_start", start),
        optional(alias($[`_quoted_content_${name}`], $.quoted_content)),
        repeat(
-          choice(
+          seq(
-            alias($[`_quoted_content_${name}`], $.quoted_content),
+            // The end delimiter may be escaped in non-interpolating strings too
-            // The end delimiter may always be escaped
+            $.escape_sequence,
-            $.escape_sequence
+            optional(alias($[`_quoted_content_${name}`], $.quoted_content))
          )
        ),
        field("quoted_end", end)
--- a/src/scanner.cc
+++ b/src/scanner.cc
@ -174,7 +174,22 @@ int8_t find_quoted_token_info(const bool* valid_symbols) {
 bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) {
  lexer->result_symbol = info.token_type;
  bool is_heredoc = (info.delimiter_length == 3);
  for (bool has_content = false; true; has_content = true) {
    bool newline = false;
    if (is_newline(lexer->lookahead)) {
      advance(lexer);
      has_content = true;
      newline = true;
      while (is_whitespace(lexer->lookahead)) {
        advance(lexer);
      }
    }
    lexer->mark_end(lexer);
    if (lexer->lookahead == info.end_delimiter) {
@ -189,7 +204,7 @@ bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) {
        }
      }
-      if (length == info.delimiter_length) {
+      if (length == info.delimiter_length && (!is_heredoc || newline)) {
        return has_content;
      }
    } else {
@ -199,16 +214,18 @@ bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) {
          return has_content;
        }
      } else if (lexer->lookahead == '\\') {
        if (info.supports_interpol) {
          return has_content;
        } else {
        advance(lexer);
-          if (lexer->lookahead == info.end_delimiter) {
+        if (is_heredoc && lexer->lookahead == '\n') {
          // We need to know about the newline to correctly recognise
          // heredoc end delimiter, so we intentionally ignore escaping
        } else if (info.supports_interpol || lexer->lookahead == info.end_delimiter) {
          return has_content;
        }
        }
      } else if (lexer->lookahead == '\0') {
-        return false;
+        // If we reached the end of the file, this means there is no
        // end delimiter, so the syntax is invalid. In that case we
        // want to treat all the scanned content as quoted content.
        return has_content;
      } else {
        advance(lexer);
      }
--- a/test/corpus/term/string.txt
+++ b/test/corpus/term/string.txt
@ -1,3 +1,14 @@
 =====================================
 empty
 =====================================
 ""
 ---
 (source
  (string))
 =====================================
 single line
 =====================================
@ -171,6 +182,47 @@ this is #{
        (quoted_content)))
    (quoted_content)))
 =====================================
 heredoc / delimiter in the middle
 =====================================
 """
 hey """
 """
 ---
 (source
  (string
    (quoted_content)))
 =====================================
 heredoc / escaped newline (ignored)
 =====================================
 """
 hey \
 """
  """
  hey \
  """
 """
 hey \
 there
 """
 ---
 (source
  (string
    (quoted_content))
  (string
    (quoted_content))
  (string
    (quoted_content)))
 =====================================
 heredoc / escaped delimiter
 =====================================