From 4f3cbc36b91868b8d9c819543acd2f3f3135eb4c Mon Sep 17 00:00:00 2001 From: Jason Rudolph Date: Wed, 1 May 2019 16:59:21 -0400 Subject: [PATCH] Handle heredoc delimiters that include spaces (#47) * Handle heredoc delimiters that include spaces * Update test to use multi-line heredoc to demonstrate bug This change demonstrates the bug described in https://github.com/tree-sitter/tree-sitter-bash/pull/47#discussion_r280183823. * Handle multi-line heredocs with spaces in delimiter Fixes the failing test introduced in 0d8adbc335. /xref https://github.com/tree-sitter/tree-sitter-bash/pull/47#discussion_r280183823 --- corpus/commands.txt | 6 ++++++ src/scanner.cc | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/corpus/commands.txt b/corpus/commands.txt index 114f3a8..dca8148 100644 --- a/corpus/commands.txt +++ b/corpus/commands.txt @@ -212,6 +212,11 @@ cat << "EOF" a=$b EOF +cat <<"END OF FILE" +hello, +world +END OF FILE + cat << \EOF EOF @@ -220,6 +225,7 @@ EOF (program (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start))) (heredoc_body) (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start))) (heredoc_body (simple_expansion (variable_name))) + (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start))) (heredoc_body) (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start))) (heredoc_body)) ========================================== diff --git a/src/scanner.cc b/src/scanner.cc index b79c02c..ed9fb8c 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -74,7 +74,7 @@ struct Scanner { advance(lexer); } - while (iswalpha(lexer->lookahead)) { + while (iswalpha(lexer->lookahead) || (quote != 0 && iswspace(lexer->lookahead))) { heredoc_delimiter += lexer->lookahead; advance(lexer); } @@ -88,7 +88,12 @@ struct Scanner { bool scan_heredoc_end_identifier(TSLexer *lexer) { current_leading_word.clear(); - while (iswalpha(lexer->lookahead)) { + // Scan the first 'n' characters on this line, to see if they match the heredoc delimiter + while ( + lexer->lookahead != '\0' && + lexer->lookahead != '\n' && + current_leading_word.length() < heredoc_delimiter.length() + ) { current_leading_word += lexer->lookahead; advance(lexer); }