Always lex braces and brackets as separate tokens
The lexer needs to always return braces and brackets separately so that the parser can decide if they are part of some construct like an array subscript or a variable expansion. This means that there was no point in moving bare-word tokenization into the external scanner. I've moved it back into the normal scanner. The tricky part is how to deal with the separate '}' and ']' tokens in the case where they are *not* part of a subscript or an expansion. For example, in code like `echo {hi}`, the syntax tree should still clearly indicate that only *one* argument is being passed to echo. For now, we achieve this by grouping the '{', hi, and '}' tokens into a single `concatenation` node, which is a bit odd, but it's the best we can do.
This commit is contained in:
parent
c34619a1c4
commit
6f81608535
|
@ -139,12 +139,12 @@ function do_yet_another_thing {
|
|||
|
||||
(program
|
||||
(function_definition
|
||||
(variable_name)
|
||||
(word)
|
||||
(compound_statement (command (command_name (word)) (word))))
|
||||
(function_definition
|
||||
(variable_name)
|
||||
(word)
|
||||
(compound_statement (command (command_name (word)) (word))))
|
||||
(function_definition
|
||||
(variable_name)
|
||||
(word)
|
||||
(compound_statement (command (command_name (word)) (word)))
|
||||
(file_redirect (file_descriptor) (word))))
|
||||
|
|
|
@ -15,6 +15,7 @@ echo a b
|
|||
Words with special characters
|
||||
=============================
|
||||
|
||||
echo {o[k]}
|
||||
echo }}}
|
||||
echo ]]] ===
|
||||
[[ "35d8b" =~ ^[0-9a-fA-F] ]] || echo {nomatch}
|
||||
|
@ -22,11 +23,19 @@ echo ]]] ===
|
|||
---
|
||||
|
||||
(program
|
||||
(command (command_name (word)) (concatenation (word) (word)))
|
||||
(command (command_name (word)) (word))
|
||||
(command (command_name (word)) (word) (word))
|
||||
(list
|
||||
(command (command_name (word)) (string) (word) (word) (word))
|
||||
(command (command_name (word)) (word))))
|
||||
(command
|
||||
(command_name (word))
|
||||
(string)
|
||||
(word)
|
||||
(concatenation (word) (word))
|
||||
(word))
|
||||
(command
|
||||
(command_name (word))
|
||||
(concatenation (word)))))
|
||||
|
||||
=============================
|
||||
Simple variable expansions
|
||||
|
@ -57,6 +66,32 @@ echo ${abc:
|
|||
(command (command_name (word)) (expansion (variable_name)))
|
||||
(command (command_name (word)) (expansion (variable_name))))
|
||||
|
||||
===================================
|
||||
Variable expansions in strings
|
||||
===================================
|
||||
|
||||
A="${A:-$B/c}"
|
||||
A="${b/$c/$d}"
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(variable_assignment
|
||||
(variable_name)
|
||||
(string
|
||||
(expansion
|
||||
(variable_name)
|
||||
(concatenation (simple_expansion (variable_name)) (word)))))
|
||||
(variable_assignment
|
||||
(variable_name)
|
||||
(string
|
||||
(expansion
|
||||
(variable_name)
|
||||
(concatenation
|
||||
(simple_expansion (variable_name))
|
||||
(word)
|
||||
(simple_expansion (variable_name)))))))
|
||||
|
||||
===================================
|
||||
Other variable expansion operators
|
||||
===================================
|
||||
|
@ -248,8 +283,8 @@ a+=(foo "bar" $(baz))
|
|||
(program
|
||||
(variable_assignment (variable_name) (array))
|
||||
(variable_assignment (variable_name) (array (word) (word) (word)))
|
||||
(command (command_name (word)) (expansion (variable_name)))
|
||||
(command (command_name (word)) (expansion (variable_name)))
|
||||
(command (command_name (word)) (expansion (subscript (variable_name) (word))))
|
||||
(command (command_name (word)) (expansion (subscript (variable_name) (word))))
|
||||
(variable_assignment
|
||||
(subscript (variable_name) (simple_expansion (variable_name)))
|
||||
(word))
|
||||
|
|
112
grammar.js
112
grammar.js
|
@ -1,3 +1,16 @@
|
|||
const SPECIAL_CHARACTERS = [
|
||||
"'", '"',
|
||||
'<', '>',
|
||||
'{', '}',
|
||||
'\\[', '\\]',
|
||||
'(', ')',
|
||||
'`', '$',
|
||||
'&', ';',
|
||||
'\\',
|
||||
'\\s',
|
||||
'#',
|
||||
];
|
||||
|
||||
module.exports = grammar({
|
||||
name: 'bash',
|
||||
|
||||
|
@ -6,9 +19,8 @@ module.exports = grammar({
|
|||
$._terminator,
|
||||
$._expression,
|
||||
$._primary_expression,
|
||||
$._variable_name,
|
||||
$._simple_variable_name,
|
||||
$._simple_word,
|
||||
$._special_variable_name,
|
||||
],
|
||||
|
||||
externals: $ => [
|
||||
|
@ -17,13 +29,12 @@ module.exports = grammar({
|
|||
$._heredoc_middle,
|
||||
$._heredoc_end,
|
||||
$.file_descriptor,
|
||||
$.word,
|
||||
$._empty_value,
|
||||
$._concat,
|
||||
$.variable_name, // Variable name followed by an operator like '=' or '+='
|
||||
'\n',
|
||||
']',
|
||||
'}',
|
||||
']',
|
||||
'\n',
|
||||
],
|
||||
|
||||
extras: $ => [
|
||||
|
@ -118,8 +129,8 @@ module.exports = grammar({
|
|||
|
||||
function_definition: $ => seq(
|
||||
choice(
|
||||
seq('function', $._simple_variable_name, optional(seq('(', ')'))),
|
||||
seq($._simple_variable_name, '(', ')')
|
||||
seq('function', $.word, optional(seq('(', ')'))),
|
||||
seq($.word, '(', ')')
|
||||
),
|
||||
$.compound_statement,
|
||||
optional($.file_redirect)
|
||||
|
@ -162,7 +173,8 @@ module.exports = grammar({
|
|||
repeat($._expression),
|
||||
repeat(choice(
|
||||
$.file_redirect,
|
||||
$.heredoc_redirect
|
||||
$.heredoc_redirect,
|
||||
$.herestring_redirect
|
||||
))
|
||||
)),
|
||||
|
||||
|
@ -201,7 +213,9 @@ module.exports = grammar({
|
|||
$.variable_name,
|
||||
'[',
|
||||
$._expression,
|
||||
']'
|
||||
optional($._concat),
|
||||
']',
|
||||
optional($._concat)
|
||||
),
|
||||
|
||||
file_redirect: $ => prec.left(seq(
|
||||
|
@ -228,16 +242,21 @@ module.exports = grammar({
|
|||
)
|
||||
),
|
||||
|
||||
herestring_redirect: $ => seq(
|
||||
'<<<',
|
||||
$._expression
|
||||
),
|
||||
|
||||
// Expressions
|
||||
|
||||
_expression: $ => choice(
|
||||
$.concatenation,
|
||||
$._primary_expression
|
||||
$._primary_expression,
|
||||
alias(prec(-2, $._special_characters), $.word)
|
||||
),
|
||||
|
||||
_primary_expression: $ => choice(
|
||||
$.word,
|
||||
$._simple_word,
|
||||
$.string,
|
||||
$.raw_string,
|
||||
$.expansion,
|
||||
|
@ -247,22 +266,36 @@ module.exports = grammar({
|
|||
),
|
||||
|
||||
concatenation: $ => prec(-1, seq(
|
||||
choice(
|
||||
$._primary_expression,
|
||||
repeat1(seq($._concat, $._primary_expression))
|
||||
$._special_characters,
|
||||
),
|
||||
repeat1(prec(-1, seq(
|
||||
$._concat,
|
||||
choice(
|
||||
$._primary_expression,
|
||||
$._special_characters,
|
||||
)
|
||||
))),
|
||||
)),
|
||||
|
||||
_special_characters: $ => token(prec(-1, repeat1(choice('{', '}', '[', ']')))),
|
||||
|
||||
string: $ => seq(
|
||||
'"',
|
||||
repeat(choice(
|
||||
repeat(seq(
|
||||
choice(
|
||||
$._string_content,
|
||||
$.expansion,
|
||||
$.simple_expansion,
|
||||
$.command_substitution
|
||||
),
|
||||
optional($._concat)
|
||||
)),
|
||||
'"'
|
||||
),
|
||||
|
||||
_string_content: $ => /([^"`$]|\\.)*/,
|
||||
_string_content: $ => /([^"`$]|\\.)+/,
|
||||
|
||||
array: $ => seq(
|
||||
'(',
|
||||
|
@ -274,30 +307,37 @@ module.exports = grammar({
|
|||
|
||||
simple_expansion: $ => seq(
|
||||
'$',
|
||||
$._variable_name
|
||||
choice($._simple_variable_name, $._special_variable_name)
|
||||
),
|
||||
|
||||
expansion: $ => seq(
|
||||
'${',
|
||||
optional('#'),
|
||||
choice(
|
||||
$._variable_name,
|
||||
seq('#', $._variable_name),
|
||||
seq('#', $._variable_name, '[', '@', ']'),
|
||||
seq($._variable_name, '[', '@', ']'),
|
||||
seq(
|
||||
$._variable_name,
|
||||
choice(':', ':?', '=', ':-', '%', '/'),
|
||||
optional(seq($._expression, optional($._concat)))
|
||||
)
|
||||
$.variable_name,
|
||||
'=',
|
||||
optional(seq(
|
||||
$._expression
|
||||
))
|
||||
),
|
||||
seq(
|
||||
choice(
|
||||
$.subscript,
|
||||
$._simple_variable_name,
|
||||
$._special_variable_name
|
||||
),
|
||||
optional(seq(
|
||||
choice(':', ':?', '=', ':-', '%', '/', '-'),
|
||||
optional(seq(
|
||||
$._expression
|
||||
))
|
||||
))
|
||||
),
|
||||
),
|
||||
'}'
|
||||
),
|
||||
|
||||
_variable_name: $ => choice(
|
||||
$._simple_variable_name,
|
||||
$.special_variable_name
|
||||
),
|
||||
|
||||
command_substitution: $ => choice(
|
||||
seq('$(', $._statement, ')'),
|
||||
prec(1, seq('`', $._statement, '`'))
|
||||
|
@ -311,14 +351,20 @@ module.exports = grammar({
|
|||
|
||||
comment: $ => token(prec(-1, /#.*/)),
|
||||
|
||||
_simple_variable_name: $ => alias($.identifier, $.variable_name),
|
||||
_simple_variable_name: $ => alias(/\w+/, $.variable_name),
|
||||
|
||||
_simple_word: $ => alias($.identifier, $.word),
|
||||
_special_variable_name: $ => alias(choice('*', '@', '?', '-', '$', '0', '_'), $.special_variable_name),
|
||||
|
||||
identifier: $ => /\w+/,
|
||||
|
||||
special_variable_name: $ => choice('*', '@', '#', '?', '-', '$', '!', '0', '_'),
|
||||
word: $ => token(repeat1(choice(
|
||||
noneOf(...SPECIAL_CHARACTERS),
|
||||
seq('\\', noneOf('\\s'))
|
||||
))),
|
||||
|
||||
_terminator: $ => choice(';', ';;', '\n', '&')
|
||||
}
|
||||
});
|
||||
|
||||
function noneOf(...characters) {
|
||||
const negatedString = characters.map(c => c == '\\' ? '\\\\' : c).join('')
|
||||
return new RegExp('[^' + negatedString + ']')
|
||||
}
|
||||
|
|
|
@ -325,7 +325,7 @@
|
|||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_simple_variable_name"
|
||||
"name": "word"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
|
@ -355,7 +355,7 @@
|
|||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_simple_variable_name"
|
||||
"name": "word"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
|
@ -548,6 +548,10 @@
|
|||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "heredoc_redirect"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "herestring_redirect"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -681,9 +685,33 @@
|
|||
"type": "SYMBOL",
|
||||
"name": "_expression"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_concat"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "]"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_concat"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -809,6 +837,19 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"herestring_redirect": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "<<<"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_expression"
|
||||
}
|
||||
]
|
||||
},
|
||||
"_expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
|
@ -819,6 +860,19 @@
|
|||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_primary_expression"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": -2,
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_special_characters"
|
||||
}
|
||||
},
|
||||
"named": true,
|
||||
"value": "word"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -829,10 +883,6 @@
|
|||
"type": "SYMBOL",
|
||||
"name": "word"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_simple_word"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "string"
|
||||
|
@ -864,13 +914,25 @@
|
|||
"value": -1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_primary_expression"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_special_characters"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "REPEAT1",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": -1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
|
@ -878,16 +940,57 @@
|
|||
"type": "SYMBOL",
|
||||
"name": "_concat"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_primary_expression"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_special_characters"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"_special_characters": {
|
||||
"type": "TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": -1,
|
||||
"content": {
|
||||
"type": "REPEAT1",
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "{"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "}"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "["
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "]"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"string": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
|
@ -898,6 +1001,9 @@
|
|||
{
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
|
@ -917,6 +1023,20 @@
|
|||
"name": "command_substitution"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_concat"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -927,7 +1047,7 @@
|
|||
},
|
||||
"_string_content": {
|
||||
"type": "PATTERN",
|
||||
"value": "([^\"`$]|\\\\.)*"
|
||||
"value": "([^\"`$]|\\\\.)+"
|
||||
},
|
||||
"array": {
|
||||
"type": "SEQ",
|
||||
|
@ -960,9 +1080,18 @@
|
|||
"type": "STRING",
|
||||
"value": "$"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_variable_name"
|
||||
"name": "_simple_variable_name"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_special_variable_name"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -975,21 +1104,46 @@
|
|||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_variable_name"
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "#"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_variable_name"
|
||||
"name": "variable_name"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "="
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_expression"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -997,55 +1151,28 @@
|
|||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "#"
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "subscript"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_variable_name"
|
||||
"name": "_simple_variable_name"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "["
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "@"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "]"
|
||||
"type": "SYMBOL",
|
||||
"name": "_special_variable_name"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_variable_name"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "["
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "@"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "]"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_variable_name"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
|
@ -1072,6 +1199,10 @@
|
|||
{
|
||||
"type": "STRING",
|
||||
"value": "/"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "-"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -1084,13 +1215,8 @@
|
|||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_expression"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_concat"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
|
@ -1114,19 +1240,6 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"_variable_name": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_simple_variable_name"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "special_variable_name"
|
||||
}
|
||||
]
|
||||
},
|
||||
"command_substitution": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
|
@ -1210,26 +1323,15 @@
|
|||
"_simple_variable_name": {
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
"type": "PATTERN",
|
||||
"value": "\\w+"
|
||||
},
|
||||
"named": true,
|
||||
"value": "variable_name"
|
||||
},
|
||||
"_simple_word": {
|
||||
"_special_variable_name": {
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
},
|
||||
"named": true,
|
||||
"value": "word"
|
||||
},
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\w+"
|
||||
},
|
||||
"special_variable_name": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
|
@ -1240,10 +1342,6 @@
|
|||
"type": "STRING",
|
||||
"value": "@"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "#"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "?"
|
||||
|
@ -1256,10 +1354,6 @@
|
|||
"type": "STRING",
|
||||
"value": "$"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "!"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "0"
|
||||
|
@ -1270,6 +1364,37 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"named": true,
|
||||
"value": "special_variable_name"
|
||||
},
|
||||
"word": {
|
||||
"type": "TOKEN",
|
||||
"content": {
|
||||
"type": "REPEAT1",
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "PATTERN",
|
||||
"value": "[^'\"<>{}\\[\\]()`$&;\\\\\\s#]"
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "\\"
|
||||
},
|
||||
{
|
||||
"type": "PATTERN",
|
||||
"value": "[^\\s]"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"_terminator": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
|
@ -1336,10 +1461,6 @@
|
|||
"type": "SYMBOL",
|
||||
"name": "file_descriptor"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "word"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_empty_value"
|
||||
|
@ -1354,7 +1475,7 @@
|
|||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "\n"
|
||||
"value": "}"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
|
@ -1362,7 +1483,7 @@
|
|||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "}"
|
||||
"value": "\n"
|
||||
}
|
||||
],
|
||||
"inline": [
|
||||
|
@ -1370,8 +1491,7 @@
|
|||
"_terminator",
|
||||
"_expression",
|
||||
"_primary_expression",
|
||||
"_variable_name",
|
||||
"_simple_variable_name",
|
||||
"_simple_word"
|
||||
"_special_variable_name"
|
||||
]
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -12,13 +12,12 @@ enum TokenType {
|
|||
HEREDOC_MIDDLE,
|
||||
HEREDOC_END,
|
||||
FILE_DESCRIPTOR,
|
||||
WORD,
|
||||
EMPTY_VALUE,
|
||||
CONCAT,
|
||||
VARIABLE_NAME,
|
||||
NEWLINE,
|
||||
CLOSING_BRACKET,
|
||||
CLOSING_BRACE,
|
||||
CLOSING_BRACKET,
|
||||
NEWLINE,
|
||||
};
|
||||
|
||||
struct Scanner {
|
||||
|
@ -92,13 +91,12 @@ struct Scanner {
|
|||
lexer->lookahead == '<' ||
|
||||
lexer->lookahead == ')' ||
|
||||
lexer->lookahead == '(' ||
|
||||
lexer->lookahead == '[' ||
|
||||
lexer->lookahead == '|' ||
|
||||
lexer->lookahead == ']' ||
|
||||
lexer->lookahead == '}' ||
|
||||
lexer->lookahead == ';' ||
|
||||
lexer->lookahead == '&' ||
|
||||
lexer->lookahead == '`'
|
||||
lexer->lookahead == '`' ||
|
||||
lexer->lookahead == 0 ||
|
||||
(lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) ||
|
||||
(lexer->lookahead == ']' && valid_symbols[CLOSING_BRACKET])
|
||||
)) {
|
||||
lexer->result_symbol = CONCAT;
|
||||
return true;
|
||||
|
@ -134,9 +132,7 @@ struct Scanner {
|
|||
return scan_heredoc_content(lexer, HEREDOC_BEGINNING, SIMPLE_HEREDOC);
|
||||
}
|
||||
|
||||
if (valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[WORD]) {
|
||||
unsigned length = 0;
|
||||
|
||||
if (valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR]) {
|
||||
for (;;) {
|
||||
if (
|
||||
lexer->lookahead == ' ' ||
|
||||
|
@ -145,70 +141,46 @@ struct Scanner {
|
|||
) {
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == '\\') {
|
||||
advance(lexer);
|
||||
skip(lexer);
|
||||
if (lexer->lookahead == '\n') {
|
||||
skip(lexer);
|
||||
} else {
|
||||
length++;
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_numeric = iswdigit(lexer->lookahead);
|
||||
bool is_alphanumeric = iswalpha(lexer->lookahead);
|
||||
bool is_number = true;
|
||||
if (iswdigit(lexer->lookahead)) {
|
||||
advance(lexer);
|
||||
} else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
|
||||
is_number = false;
|
||||
advance(lexer);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
// These characters are not allowed in unquoted arguments
|
||||
// or environment variable names
|
||||
if (
|
||||
lexer->lookahead == 0 ||
|
||||
lexer->lookahead == ';' ||
|
||||
lexer->lookahead == '"' ||
|
||||
lexer->lookahead == '(' ||
|
||||
lexer->lookahead == ')' ||
|
||||
lexer->lookahead == '\'' ||
|
||||
lexer->lookahead == '&' ||
|
||||
lexer->lookahead == '#' ||
|
||||
lexer->lookahead == '`' ||
|
||||
lexer->lookahead == '|' ||
|
||||
lexer->lookahead == '$' ||
|
||||
iswspace(lexer->lookahead)
|
||||
) break;
|
||||
if (iswdigit(lexer->lookahead)) {
|
||||
advance(lexer);
|
||||
} else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
|
||||
is_number = false;
|
||||
advance(lexer);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Curly braces are not allowed in unquoted arguments within curly braces
|
||||
// (e.g. inside of a variable expansion like `${key:arg}`).
|
||||
if (
|
||||
lexer->lookahead == '}' &&
|
||||
valid_symbols[CLOSING_BRACE]
|
||||
) break;
|
||||
|
||||
// Square brackets are not allowed in unquoted arguments within square brackets
|
||||
// (e.g. inside of an array subscript like `a[arg]`).
|
||||
if (
|
||||
lexer->lookahead == ']' &&
|
||||
valid_symbols[CLOSING_BRACKET]
|
||||
) break;
|
||||
|
||||
// Numbers followed by '<' and '>' at the beginning of commands
|
||||
// are parsed as file descriptors.
|
||||
if (lexer->lookahead == '<' || lexer->lookahead == '>') {
|
||||
if (is_numeric && valid_symbols[FILE_DESCRIPTOR]) {
|
||||
if (is_number &&
|
||||
valid_symbols[FILE_DESCRIPTOR] &&
|
||||
(lexer->lookahead == '>' || lexer->lookahead == '<')) {
|
||||
lexer->result_symbol = FILE_DESCRIPTOR;
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (!iswdigit(lexer->lookahead)) is_numeric = false;
|
||||
|
||||
if (!iswalnum(lexer->lookahead) && lexer->lookahead != '_') {
|
||||
|
||||
// Alphanumeric strings followed by '=', '[', or '+=' are treated
|
||||
// as environment variable names.
|
||||
if (is_alphanumeric && valid_symbols[VARIABLE_NAME] && length > 0) {
|
||||
if (valid_symbols[VARIABLE_NAME]) {
|
||||
if (lexer->lookahead == '+') {
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
|
@ -224,19 +196,7 @@ struct Scanner {
|
|||
}
|
||||
}
|
||||
|
||||
is_alphanumeric = false;
|
||||
}
|
||||
|
||||
advance(lexer);
|
||||
length++;
|
||||
}
|
||||
|
||||
// Do not handle strings containing only letters, because those
|
||||
// might be keywords. Let the normal lexer handle those.
|
||||
if (length > 0 && valid_symbols[WORD] && !is_alphanumeric) {
|
||||
lexer->result_symbol = WORD;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
Loading…
Reference in New Issue