From da116a1941ae577fe0ac12112e61a3efd38d5526 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 19 Oct 2018 13:06:56 -0700 Subject: [PATCH] Handle heredocs w/ indented close delimiters Fixes atom/language-shellscript#127 --- corpus/commands.txt | 21 +++++++++++++++++++++ grammar.js | 2 ++ src/grammar.json | 8 ++++++++ src/parser.c | 32 +++++++++++++++++++++++++++++++- src/scanner.cc | 42 +++++++++++++++++++++++++++++++++++++----- 5 files changed, 99 insertions(+), 6 deletions(-) diff --git a/corpus/commands.txt b/corpus/commands.txt index ec9ce0c..cd605e6 100644 --- a/corpus/commands.txt +++ b/corpus/commands.txt @@ -203,3 +203,24 @@ EOF (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start))) (heredoc_body) (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start))) (heredoc_body (simple_expansion (variable_name))) (redirected_statement (command (command_name (word))) (heredoc_redirect (heredoc_start))) (heredoc_body)) + +========================================== +Heredocs with indented closing delimiters +========================================== + +usage() { + cat <<-EOF + Usage: ${0##*/} FOO BAR + EOF +} + +--- + +(program + (function_definition + (word) + (compound_statement + (redirected_statement + (command (command_name (word))) + (heredoc_redirect (heredoc_start))) + (heredoc_body (expansion (special_variable_name) (word)))))) diff --git a/grammar.js b/grammar.js index 64cf276..c95ac31 100644 --- a/grammar.js +++ b/grammar.js @@ -36,6 +36,8 @@ module.exports = grammar({ $.regex, '}', ']', + '<<', + '<<-', '\n', ], diff --git a/src/grammar.json b/src/grammar.json index 3bc53c4..7351fef 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -2254,6 +2254,14 @@ "type": "STRING", "value": "]" }, + { + "type": "STRING", + "value": "<<" + }, + { + "type": "STRING", + "value": "<<-" + }, { "type": "STRING", "value": "\n" diff --git a/src/parser.c b/src/parser.c index 219501a..3a6ddd5 100644 --- a/src/parser.c +++ b/src/parser.c @@ -10,7 +10,7 @@ #define SYMBOL_COUNT 155 #define ALIAS_COUNT 2 #define TOKEN_COUNT 97 -#define EXTERNAL_TOKEN_COUNT 13 +#define EXTERNAL_TOKEN_COUNT 15 #define MAX_ALIAS_SEQUENCE_LENGTH 8 enum { @@ -11187,6 +11187,8 @@ enum { ts_external_token_regex, ts_external_token_RBRACE, ts_external_token_RBRACK, + ts_external_token_LT_LT, + ts_external_token_LT_LT_DASH, ts_external_token_LF, }; @@ -11203,6 +11205,8 @@ static TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = { [ts_external_token_regex] = sym_regex, [ts_external_token_RBRACE] = anon_sym_RBRACE, [ts_external_token_RBRACK] = anon_sym_RBRACK, + [ts_external_token_LT_LT] = anon_sym_LT_LT, + [ts_external_token_LT_LT_DASH] = anon_sym_LT_LT_DASH, [ts_external_token_LF] = anon_sym_LF, }; @@ -11220,6 +11224,8 @@ static bool ts_external_scanner_states[31][EXTERNAL_TOKEN_COUNT] = { [ts_external_token_regex] = true, [ts_external_token_RBRACE] = true, [ts_external_token_RBRACK] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [2] = { @@ -11236,12 +11242,16 @@ static bool ts_external_scanner_states[31][EXTERNAL_TOKEN_COUNT] = { [ts_external_token__heredoc_body_beginning] = true, [ts_external_token_file_descriptor] = true, [ts_external_token_variable_name] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [5] = { [ts_external_token__simple_heredoc_body] = true, [ts_external_token__heredoc_body_beginning] = true, [ts_external_token_file_descriptor] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [6] = { @@ -11249,6 +11259,8 @@ static bool ts_external_scanner_states[31][EXTERNAL_TOKEN_COUNT] = { [ts_external_token__heredoc_body_beginning] = true, [ts_external_token_file_descriptor] = true, [ts_external_token__concat] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [7] = { @@ -11266,15 +11278,21 @@ static bool ts_external_scanner_states[31][EXTERNAL_TOKEN_COUNT] = { [11] = { [ts_external_token_file_descriptor] = true, [ts_external_token_variable_name] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [12] = { [ts_external_token_file_descriptor] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [13] = { [ts_external_token_file_descriptor] = true, [ts_external_token__concat] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [14] = { @@ -11287,6 +11305,8 @@ static bool ts_external_scanner_states[31][EXTERNAL_TOKEN_COUNT] = { [ts_external_token_file_descriptor] = true, [ts_external_token_variable_name] = true, [ts_external_token_RBRACE] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [16] = { @@ -11294,6 +11314,8 @@ static bool ts_external_scanner_states[31][EXTERNAL_TOKEN_COUNT] = { [ts_external_token__heredoc_body_beginning] = true, [ts_external_token_file_descriptor] = true, [ts_external_token_RBRACE] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [17] = { @@ -11302,6 +11324,8 @@ static bool ts_external_scanner_states[31][EXTERNAL_TOKEN_COUNT] = { [ts_external_token_file_descriptor] = true, [ts_external_token__concat] = true, [ts_external_token_RBRACE] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [18] = { @@ -11320,6 +11344,8 @@ static bool ts_external_scanner_states[31][EXTERNAL_TOKEN_COUNT] = { [ts_external_token_file_descriptor] = true, [ts_external_token__concat] = true, [ts_external_token_variable_name] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [22] = { @@ -11341,6 +11367,8 @@ static bool ts_external_scanner_states[31][EXTERNAL_TOKEN_COUNT] = { [ts_external_token_file_descriptor] = true, [ts_external_token__concat] = true, [ts_external_token_variable_name] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [27] = { @@ -11350,6 +11378,8 @@ static bool ts_external_scanner_states[31][EXTERNAL_TOKEN_COUNT] = { [ts_external_token__concat] = true, [ts_external_token_variable_name] = true, [ts_external_token_RBRACE] = true, + [ts_external_token_LT_LT] = true, + [ts_external_token_LT_LT_DASH] = true, [ts_external_token_LF] = true, }, [28] = { diff --git a/src/scanner.cc b/src/scanner.cc index 534357c..1e310eb 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -19,6 +19,8 @@ enum TokenType { REGEX, CLOSING_BRACE, CLOSING_BRACKET, + HEREDOC_ARROW, + HEREDOC_ARROW_DASH, NEWLINE, }; @@ -32,22 +34,25 @@ struct Scanner { } unsigned serialize(char *buffer) { - if (heredoc_delimiter.length() + 2 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return 0; + if (heredoc_delimiter.length() + 3 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return 0; buffer[0] = heredoc_is_raw; buffer[1] = started_heredoc; - heredoc_delimiter.copy(&buffer[2], heredoc_delimiter.length()); - return heredoc_delimiter.length() + 2; + buffer[2] = heredoc_allows_indent; + heredoc_delimiter.copy(&buffer[3], heredoc_delimiter.length()); + return heredoc_delimiter.length() + 3; } void deserialize(const char *buffer, unsigned length) { if (length == 0) { heredoc_is_raw = false; started_heredoc = false; + heredoc_allows_indent = false; heredoc_delimiter.clear(); } else { heredoc_is_raw = buffer[0]; started_heredoc = buffer[1]; - heredoc_delimiter.assign(&buffer[2], &buffer[length]); + heredoc_allows_indent = buffer[2]; + heredoc_delimiter.assign(&buffer[3], &buffer[length]); } } @@ -99,6 +104,7 @@ struct Scanner { if (did_advance) { heredoc_is_raw = false; started_heredoc = false; + heredoc_allows_indent = false; heredoc_delimiter.clear(); lexer->result_symbol = end_type; return true; @@ -131,9 +137,15 @@ struct Scanner { case '\n': { did_advance = true; advance(lexer); + if (heredoc_allows_indent) { + while (iswspace(lexer->lookahead)) { + advance(lexer); + } + } if (scan_heredoc_end_identifier(lexer)) { heredoc_is_raw = false; started_heredoc = false; + heredoc_allows_indent = false; heredoc_delimiter.clear(); lexer->result_symbol = end_type; return true; @@ -191,7 +203,7 @@ struct Scanner { return scan_heredoc_start(lexer); } - if (valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR]) { + if (valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) { for (;;) { if ( lexer->lookahead == ' ' || @@ -211,6 +223,25 @@ struct Scanner { } } + if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') { + advance(lexer); + if (lexer->lookahead == '<') { + advance(lexer); + if (lexer->lookahead == '-') { + advance(lexer); + heredoc_allows_indent = true; + lexer->result_symbol = HEREDOC_ARROW_DASH; + } else if (lexer->lookahead == '<') { + return false; + } else { + heredoc_allows_indent = false; + lexer->result_symbol = HEREDOC_ARROW; + } + return true; + } + return false; + } + bool is_number = true; if (iswdigit(lexer->lookahead)) { advance(lexer); @@ -321,6 +352,7 @@ struct Scanner { string heredoc_delimiter; bool heredoc_is_raw; bool started_heredoc; + bool heredoc_allows_indent; string current_leading_word; };