Don't parse square bracket commands as special syntax

This commit is contained in:
Max Brunsfeld 2018-02-27 22:22:57 -08:00
parent c410548caf
commit c34619a1c4
6 changed files with 32525 additions and 32077 deletions

View File

@ -75,10 +75,12 @@ fi
(program
(if_statement
(bracket_command
(command
(command_name (word))
(string (command_substitution (command (command_name (word)))))
(word)
(raw_string))
(raw_string)
(word))
(command (command_name (word)) (word))))
====================================

View File

@ -25,7 +25,7 @@ echo ]]] ===
(command (command_name (word)) (word))
(command (command_name (word)) (word) (word))
(list
(bracket_command (string) (word) (word))
(command (command_name (word)) (string) (word) (word) (word))
(command (command_name (word)) (word))))
=============================

View File

@ -23,7 +23,6 @@ module.exports = grammar({
$.variable_name, // Variable name followed by an operator like '=' or '+='
'\n',
']',
']]',
'}',
],
@ -46,7 +45,6 @@ module.exports = grammar({
$.variable_assignment,
$.command,
$.declaration_command,
$.bracket_command,
$.for_statement,
$.while_statement,
$.if_statement,
@ -153,11 +151,6 @@ module.exports = grammar({
$._statement
)),
bracket_command: $ => choice(
seq('[', repeat1($._expression), ']'),
seq('[[', repeat1($._expression), ']]')
),
// Commands
command: $ => prec.left(seq(

53
src/grammar.json vendored
View File

@ -36,10 +36,6 @@
"type": "SYMBOL",
"name": "declaration_command"
},
{
"type": "SYMBOL",
"name": "bracket_command"
},
{
"type": "SYMBOL",
"name": "for_statement"
@ -507,51 +503,6 @@
]
}
},
"bracket_command": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "["
},
{
"type": "REPEAT1",
"content": {
"type": "SYMBOL",
"name": "_expression"
}
},
{
"type": "STRING",
"value": "]"
}
]
},
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "[["
},
{
"type": "REPEAT1",
"content": {
"type": "SYMBOL",
"name": "_expression"
}
},
{
"type": "STRING",
"value": "]]"
}
]
}
]
},
"command": {
"type": "PREC_LEFT",
"value": 0,
@ -1409,10 +1360,6 @@
"type": "STRING",
"value": "]"
},
{
"type": "STRING",
"value": "]]"
},
{
"type": "STRING",
"value": "}"

64412
src/parser.c vendored

File diff suppressed because it is too large Load Diff

92
src/scanner.cc vendored
View File

@ -18,7 +18,6 @@ enum TokenType {
VARIABLE_NAME,
NEWLINE,
CLOSING_BRACKET,
CLOSING_DOUBLE_BRACKET,
CLOSING_BRACE,
};
@ -158,50 +157,58 @@ struct Scanner {
}
}
bool is_number = true;
bool is_alphanumeric = true;
bool is_numeric = iswdigit(lexer->lookahead);
bool is_alphanumeric = iswalpha(lexer->lookahead);
for (;;) {
if (iswdigit(lexer->lookahead)) {
} else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
is_number = false;
} else if (
!iswspace(lexer->lookahead) &&
lexer->lookahead != 0 &&
lexer->lookahead != '"' &&
lexer->lookahead != '\'' &&
lexer->lookahead != '`' &&
lexer->lookahead != '>' &&
lexer->lookahead != '<' &&
lexer->lookahead != '#' &&
lexer->lookahead != '|' &&
lexer->lookahead != '(' &&
lexer->lookahead != ')' &&
lexer->lookahead != ';' &&
lexer->lookahead != '&' &&
lexer->lookahead != '$'
) {
if (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) break;
if (lexer->lookahead == ']' && length == 0 && (valid_symbols[CLOSING_BRACKET] || valid_symbols[CLOSING_DOUBLE_BRACKET])) break;
if (is_alphanumeric && valid_symbols[VARIABLE_NAME] && (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == '+')) break;
is_alphanumeric = false;
} else {
break;
}
// These characters are not allowed in unquoted arguments
// or environment variable names
if (
lexer->lookahead == 0 ||
lexer->lookahead == ';' ||
lexer->lookahead == '"' ||
lexer->lookahead == '(' ||
lexer->lookahead == ')' ||
lexer->lookahead == '\'' ||
lexer->lookahead == '&' ||
lexer->lookahead == '#' ||
lexer->lookahead == '`' ||
lexer->lookahead == '|' ||
lexer->lookahead == '$' ||
iswspace(lexer->lookahead)
) break;
advance(lexer);
length++;
}
// Curly braces are not allowed in unquoted arguments within curly braces
// (e.g. inside of a variable expansion like `${key:arg}`).
if (
lexer->lookahead == '}' &&
valid_symbols[CLOSING_BRACE]
) break;
if (length == 0) return false;
// Square brackets are not allowed in unquoted arguments within square brackets
// (e.g. inside of an array subscript like `a[arg]`).
if (
lexer->lookahead == ']' &&
valid_symbols[CLOSING_BRACKET]
) break;
if (is_number &&
valid_symbols[FILE_DESCRIPTOR] &&
(lexer->lookahead == '>' || lexer->lookahead == '<')) {
// Numbers followed by '<' and '>' at the beginning of commands
// are parsed as file descriptors.
if (lexer->lookahead == '<' || lexer->lookahead == '>') {
if (is_numeric && valid_symbols[FILE_DESCRIPTOR]) {
lexer->result_symbol = FILE_DESCRIPTOR;
return true;
}
break;
}
if (valid_symbols[VARIABLE_NAME]) {
if (!iswdigit(lexer->lookahead)) is_numeric = false;
if (!iswalnum(lexer->lookahead) && lexer->lookahead != '_') {
// Alphanumeric strings followed by '=', '[', or '+=' are treated
// as environment variable names.
if (is_alphanumeric && valid_symbols[VARIABLE_NAME] && length > 0) {
if (lexer->lookahead == '+') {
lexer->mark_end(lexer);
advance(lexer);
@ -217,7 +224,16 @@ struct Scanner {
}
}
if (valid_symbols[WORD] && !is_alphanumeric) {
is_alphanumeric = false;
}
advance(lexer);
length++;
}
// Do not handle strings containing only letters, because those
// might be keywords. Let the normal lexer handle those.
if (length > 0 && valid_symbols[WORD] && !is_alphanumeric) {
lexer->result_symbol = WORD;
return true;
}