Always lex braces and brackets as separate tokens
The lexer needs to always return braces and brackets separately so that the parser can decide if they are part of some construct like an array subscript or a variable expansion. This means that there was no point in moving bare-word tokenization into the external scanner. I've moved it back into the normal scanner. The tricky part is how to deal with the separate '}' and ']' tokens in the case where they are *not* part of a subscript or an expansion. For example, in code like `echo {hi}`, the syntax tree should still clearly indicate that only *one* argument is being passed to echo. For now, we achieve this by grouping the '{', hi, and '}' tokens into a single `concatenation` node, which is a bit odd, but it's the best we can do.
This commit is contained in:
parent
c34619a1c4
commit
6f81608535
|
@ -139,12 +139,12 @@ function do_yet_another_thing {
|
||||||
|
|
||||||
(program
|
(program
|
||||||
(function_definition
|
(function_definition
|
||||||
(variable_name)
|
(word)
|
||||||
(compound_statement (command (command_name (word)) (word))))
|
(compound_statement (command (command_name (word)) (word))))
|
||||||
(function_definition
|
(function_definition
|
||||||
(variable_name)
|
(word)
|
||||||
(compound_statement (command (command_name (word)) (word))))
|
(compound_statement (command (command_name (word)) (word))))
|
||||||
(function_definition
|
(function_definition
|
||||||
(variable_name)
|
(word)
|
||||||
(compound_statement (command (command_name (word)) (word)))
|
(compound_statement (command (command_name (word)) (word)))
|
||||||
(file_redirect (file_descriptor) (word))))
|
(file_redirect (file_descriptor) (word))))
|
||||||
|
|
|
@ -15,6 +15,7 @@ echo a b
|
||||||
Words with special characters
|
Words with special characters
|
||||||
=============================
|
=============================
|
||||||
|
|
||||||
|
echo {o[k]}
|
||||||
echo }}}
|
echo }}}
|
||||||
echo ]]] ===
|
echo ]]] ===
|
||||||
[[ "35d8b" =~ ^[0-9a-fA-F] ]] || echo {nomatch}
|
[[ "35d8b" =~ ^[0-9a-fA-F] ]] || echo {nomatch}
|
||||||
|
@ -22,11 +23,19 @@ echo ]]] ===
|
||||||
---
|
---
|
||||||
|
|
||||||
(program
|
(program
|
||||||
|
(command (command_name (word)) (concatenation (word) (word)))
|
||||||
(command (command_name (word)) (word))
|
(command (command_name (word)) (word))
|
||||||
(command (command_name (word)) (word) (word))
|
(command (command_name (word)) (word) (word))
|
||||||
(list
|
(list
|
||||||
(command (command_name (word)) (string) (word) (word) (word))
|
(command
|
||||||
(command (command_name (word)) (word))))
|
(command_name (word))
|
||||||
|
(string)
|
||||||
|
(word)
|
||||||
|
(concatenation (word) (word))
|
||||||
|
(word))
|
||||||
|
(command
|
||||||
|
(command_name (word))
|
||||||
|
(concatenation (word)))))
|
||||||
|
|
||||||
=============================
|
=============================
|
||||||
Simple variable expansions
|
Simple variable expansions
|
||||||
|
@ -57,6 +66,32 @@ echo ${abc:
|
||||||
(command (command_name (word)) (expansion (variable_name)))
|
(command (command_name (word)) (expansion (variable_name)))
|
||||||
(command (command_name (word)) (expansion (variable_name))))
|
(command (command_name (word)) (expansion (variable_name))))
|
||||||
|
|
||||||
|
===================================
|
||||||
|
Variable expansions in strings
|
||||||
|
===================================
|
||||||
|
|
||||||
|
A="${A:-$B/c}"
|
||||||
|
A="${b/$c/$d}"
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
(program
|
||||||
|
(variable_assignment
|
||||||
|
(variable_name)
|
||||||
|
(string
|
||||||
|
(expansion
|
||||||
|
(variable_name)
|
||||||
|
(concatenation (simple_expansion (variable_name)) (word)))))
|
||||||
|
(variable_assignment
|
||||||
|
(variable_name)
|
||||||
|
(string
|
||||||
|
(expansion
|
||||||
|
(variable_name)
|
||||||
|
(concatenation
|
||||||
|
(simple_expansion (variable_name))
|
||||||
|
(word)
|
||||||
|
(simple_expansion (variable_name)))))))
|
||||||
|
|
||||||
===================================
|
===================================
|
||||||
Other variable expansion operators
|
Other variable expansion operators
|
||||||
===================================
|
===================================
|
||||||
|
@ -248,8 +283,8 @@ a+=(foo "bar" $(baz))
|
||||||
(program
|
(program
|
||||||
(variable_assignment (variable_name) (array))
|
(variable_assignment (variable_name) (array))
|
||||||
(variable_assignment (variable_name) (array (word) (word) (word)))
|
(variable_assignment (variable_name) (array (word) (word) (word)))
|
||||||
(command (command_name (word)) (expansion (variable_name)))
|
(command (command_name (word)) (expansion (subscript (variable_name) (word))))
|
||||||
(command (command_name (word)) (expansion (variable_name)))
|
(command (command_name (word)) (expansion (subscript (variable_name) (word))))
|
||||||
(variable_assignment
|
(variable_assignment
|
||||||
(subscript (variable_name) (simple_expansion (variable_name)))
|
(subscript (variable_name) (simple_expansion (variable_name)))
|
||||||
(word))
|
(word))
|
||||||
|
|
112
grammar.js
112
grammar.js
|
@ -1,3 +1,16 @@
|
||||||
|
const SPECIAL_CHARACTERS = [
|
||||||
|
"'", '"',
|
||||||
|
'<', '>',
|
||||||
|
'{', '}',
|
||||||
|
'\\[', '\\]',
|
||||||
|
'(', ')',
|
||||||
|
'`', '$',
|
||||||
|
'&', ';',
|
||||||
|
'\\',
|
||||||
|
'\\s',
|
||||||
|
'#',
|
||||||
|
];
|
||||||
|
|
||||||
module.exports = grammar({
|
module.exports = grammar({
|
||||||
name: 'bash',
|
name: 'bash',
|
||||||
|
|
||||||
|
@ -6,9 +19,8 @@ module.exports = grammar({
|
||||||
$._terminator,
|
$._terminator,
|
||||||
$._expression,
|
$._expression,
|
||||||
$._primary_expression,
|
$._primary_expression,
|
||||||
$._variable_name,
|
|
||||||
$._simple_variable_name,
|
$._simple_variable_name,
|
||||||
$._simple_word,
|
$._special_variable_name,
|
||||||
],
|
],
|
||||||
|
|
||||||
externals: $ => [
|
externals: $ => [
|
||||||
|
@ -17,13 +29,12 @@ module.exports = grammar({
|
||||||
$._heredoc_middle,
|
$._heredoc_middle,
|
||||||
$._heredoc_end,
|
$._heredoc_end,
|
||||||
$.file_descriptor,
|
$.file_descriptor,
|
||||||
$.word,
|
|
||||||
$._empty_value,
|
$._empty_value,
|
||||||
$._concat,
|
$._concat,
|
||||||
$.variable_name, // Variable name followed by an operator like '=' or '+='
|
$.variable_name, // Variable name followed by an operator like '=' or '+='
|
||||||
'\n',
|
|
||||||
']',
|
|
||||||
'}',
|
'}',
|
||||||
|
']',
|
||||||
|
'\n',
|
||||||
],
|
],
|
||||||
|
|
||||||
extras: $ => [
|
extras: $ => [
|
||||||
|
@ -118,8 +129,8 @@ module.exports = grammar({
|
||||||
|
|
||||||
function_definition: $ => seq(
|
function_definition: $ => seq(
|
||||||
choice(
|
choice(
|
||||||
seq('function', $._simple_variable_name, optional(seq('(', ')'))),
|
seq('function', $.word, optional(seq('(', ')'))),
|
||||||
seq($._simple_variable_name, '(', ')')
|
seq($.word, '(', ')')
|
||||||
),
|
),
|
||||||
$.compound_statement,
|
$.compound_statement,
|
||||||
optional($.file_redirect)
|
optional($.file_redirect)
|
||||||
|
@ -162,7 +173,8 @@ module.exports = grammar({
|
||||||
repeat($._expression),
|
repeat($._expression),
|
||||||
repeat(choice(
|
repeat(choice(
|
||||||
$.file_redirect,
|
$.file_redirect,
|
||||||
$.heredoc_redirect
|
$.heredoc_redirect,
|
||||||
|
$.herestring_redirect
|
||||||
))
|
))
|
||||||
)),
|
)),
|
||||||
|
|
||||||
|
@ -201,7 +213,9 @@ module.exports = grammar({
|
||||||
$.variable_name,
|
$.variable_name,
|
||||||
'[',
|
'[',
|
||||||
$._expression,
|
$._expression,
|
||||||
']'
|
optional($._concat),
|
||||||
|
']',
|
||||||
|
optional($._concat)
|
||||||
),
|
),
|
||||||
|
|
||||||
file_redirect: $ => prec.left(seq(
|
file_redirect: $ => prec.left(seq(
|
||||||
|
@ -228,16 +242,21 @@ module.exports = grammar({
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
|
herestring_redirect: $ => seq(
|
||||||
|
'<<<',
|
||||||
|
$._expression
|
||||||
|
),
|
||||||
|
|
||||||
// Expressions
|
// Expressions
|
||||||
|
|
||||||
_expression: $ => choice(
|
_expression: $ => choice(
|
||||||
$.concatenation,
|
$.concatenation,
|
||||||
$._primary_expression
|
$._primary_expression,
|
||||||
|
alias(prec(-2, $._special_characters), $.word)
|
||||||
),
|
),
|
||||||
|
|
||||||
_primary_expression: $ => choice(
|
_primary_expression: $ => choice(
|
||||||
$.word,
|
$.word,
|
||||||
$._simple_word,
|
|
||||||
$.string,
|
$.string,
|
||||||
$.raw_string,
|
$.raw_string,
|
||||||
$.expansion,
|
$.expansion,
|
||||||
|
@ -247,22 +266,36 @@ module.exports = grammar({
|
||||||
),
|
),
|
||||||
|
|
||||||
concatenation: $ => prec(-1, seq(
|
concatenation: $ => prec(-1, seq(
|
||||||
|
choice(
|
||||||
$._primary_expression,
|
$._primary_expression,
|
||||||
repeat1(seq($._concat, $._primary_expression))
|
$._special_characters,
|
||||||
|
),
|
||||||
|
repeat1(prec(-1, seq(
|
||||||
|
$._concat,
|
||||||
|
choice(
|
||||||
|
$._primary_expression,
|
||||||
|
$._special_characters,
|
||||||
|
)
|
||||||
|
))),
|
||||||
)),
|
)),
|
||||||
|
|
||||||
|
_special_characters: $ => token(prec(-1, repeat1(choice('{', '}', '[', ']')))),
|
||||||
|
|
||||||
string: $ => seq(
|
string: $ => seq(
|
||||||
'"',
|
'"',
|
||||||
repeat(choice(
|
repeat(seq(
|
||||||
|
choice(
|
||||||
$._string_content,
|
$._string_content,
|
||||||
$.expansion,
|
$.expansion,
|
||||||
$.simple_expansion,
|
$.simple_expansion,
|
||||||
$.command_substitution
|
$.command_substitution
|
||||||
|
),
|
||||||
|
optional($._concat)
|
||||||
)),
|
)),
|
||||||
'"'
|
'"'
|
||||||
),
|
),
|
||||||
|
|
||||||
_string_content: $ => /([^"`$]|\\.)*/,
|
_string_content: $ => /([^"`$]|\\.)+/,
|
||||||
|
|
||||||
array: $ => seq(
|
array: $ => seq(
|
||||||
'(',
|
'(',
|
||||||
|
@ -274,30 +307,37 @@ module.exports = grammar({
|
||||||
|
|
||||||
simple_expansion: $ => seq(
|
simple_expansion: $ => seq(
|
||||||
'$',
|
'$',
|
||||||
$._variable_name
|
choice($._simple_variable_name, $._special_variable_name)
|
||||||
),
|
),
|
||||||
|
|
||||||
expansion: $ => seq(
|
expansion: $ => seq(
|
||||||
'${',
|
'${',
|
||||||
|
optional('#'),
|
||||||
choice(
|
choice(
|
||||||
$._variable_name,
|
|
||||||
seq('#', $._variable_name),
|
|
||||||
seq('#', $._variable_name, '[', '@', ']'),
|
|
||||||
seq($._variable_name, '[', '@', ']'),
|
|
||||||
seq(
|
seq(
|
||||||
$._variable_name,
|
$.variable_name,
|
||||||
choice(':', ':?', '=', ':-', '%', '/'),
|
'=',
|
||||||
optional(seq($._expression, optional($._concat)))
|
optional(seq(
|
||||||
)
|
$._expression
|
||||||
|
))
|
||||||
|
),
|
||||||
|
seq(
|
||||||
|
choice(
|
||||||
|
$.subscript,
|
||||||
|
$._simple_variable_name,
|
||||||
|
$._special_variable_name
|
||||||
|
),
|
||||||
|
optional(seq(
|
||||||
|
choice(':', ':?', '=', ':-', '%', '/', '-'),
|
||||||
|
optional(seq(
|
||||||
|
$._expression
|
||||||
|
))
|
||||||
|
))
|
||||||
|
),
|
||||||
),
|
),
|
||||||
'}'
|
'}'
|
||||||
),
|
),
|
||||||
|
|
||||||
_variable_name: $ => choice(
|
|
||||||
$._simple_variable_name,
|
|
||||||
$.special_variable_name
|
|
||||||
),
|
|
||||||
|
|
||||||
command_substitution: $ => choice(
|
command_substitution: $ => choice(
|
||||||
seq('$(', $._statement, ')'),
|
seq('$(', $._statement, ')'),
|
||||||
prec(1, seq('`', $._statement, '`'))
|
prec(1, seq('`', $._statement, '`'))
|
||||||
|
@ -311,14 +351,20 @@ module.exports = grammar({
|
||||||
|
|
||||||
comment: $ => token(prec(-1, /#.*/)),
|
comment: $ => token(prec(-1, /#.*/)),
|
||||||
|
|
||||||
_simple_variable_name: $ => alias($.identifier, $.variable_name),
|
_simple_variable_name: $ => alias(/\w+/, $.variable_name),
|
||||||
|
|
||||||
_simple_word: $ => alias($.identifier, $.word),
|
_special_variable_name: $ => alias(choice('*', '@', '?', '-', '$', '0', '_'), $.special_variable_name),
|
||||||
|
|
||||||
identifier: $ => /\w+/,
|
word: $ => token(repeat1(choice(
|
||||||
|
noneOf(...SPECIAL_CHARACTERS),
|
||||||
special_variable_name: $ => choice('*', '@', '#', '?', '-', '$', '!', '0', '_'),
|
seq('\\', noneOf('\\s'))
|
||||||
|
))),
|
||||||
|
|
||||||
_terminator: $ => choice(';', ';;', '\n', '&')
|
_terminator: $ => choice(';', ';;', '\n', '&')
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
function noneOf(...characters) {
|
||||||
|
const negatedString = characters.map(c => c == '\\' ? '\\\\' : c).join('')
|
||||||
|
return new RegExp('[^' + negatedString + ']')
|
||||||
|
}
|
||||||
|
|
|
@ -325,7 +325,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_simple_variable_name"
|
"name": "word"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "CHOICE",
|
"type": "CHOICE",
|
||||||
|
@ -355,7 +355,7 @@
|
||||||
"members": [
|
"members": [
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_simple_variable_name"
|
"name": "word"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
|
@ -548,6 +548,10 @@
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "heredoc_redirect"
|
"name": "heredoc_redirect"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "herestring_redirect"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -681,9 +685,33 @@
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_expression"
|
"name": "_expression"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "CHOICE",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "_concat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "BLANK"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
"value": "]"
|
"value": "]"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "CHOICE",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "_concat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "BLANK"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -809,6 +837,19 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"herestring_redirect": {
|
||||||
|
"type": "SEQ",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "STRING",
|
||||||
|
"value": "<<<"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "_expression"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
"_expression": {
|
"_expression": {
|
||||||
"type": "CHOICE",
|
"type": "CHOICE",
|
||||||
"members": [
|
"members": [
|
||||||
|
@ -819,6 +860,19 @@
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_primary_expression"
|
"name": "_primary_expression"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "ALIAS",
|
||||||
|
"content": {
|
||||||
|
"type": "PREC",
|
||||||
|
"value": -2,
|
||||||
|
"content": {
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "_special_characters"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"named": true,
|
||||||
|
"value": "word"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -829,10 +883,6 @@
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "word"
|
"name": "word"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"type": "SYMBOL",
|
|
||||||
"name": "_simple_word"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "string"
|
"name": "string"
|
||||||
|
@ -864,13 +914,25 @@
|
||||||
"value": -1,
|
"value": -1,
|
||||||
"content": {
|
"content": {
|
||||||
"type": "SEQ",
|
"type": "SEQ",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "CHOICE",
|
||||||
"members": [
|
"members": [
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_primary_expression"
|
"name": "_primary_expression"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "_special_characters"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"type": "REPEAT1",
|
"type": "REPEAT1",
|
||||||
|
"content": {
|
||||||
|
"type": "PREC",
|
||||||
|
"value": -1,
|
||||||
"content": {
|
"content": {
|
||||||
"type": "SEQ",
|
"type": "SEQ",
|
||||||
"members": [
|
"members": [
|
||||||
|
@ -878,16 +940,57 @@
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_concat"
|
"name": "_concat"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "CHOICE",
|
||||||
|
"members": [
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_primary_expression"
|
"name": "_primary_expression"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "_special_characters"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"_special_characters": {
|
||||||
|
"type": "TOKEN",
|
||||||
|
"content": {
|
||||||
|
"type": "PREC",
|
||||||
|
"value": -1,
|
||||||
|
"content": {
|
||||||
|
"type": "REPEAT1",
|
||||||
|
"content": {
|
||||||
|
"type": "CHOICE",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "STRING",
|
||||||
|
"value": "{"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "STRING",
|
||||||
|
"value": "}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "STRING",
|
||||||
|
"value": "["
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "STRING",
|
||||||
|
"value": "]"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"string": {
|
"string": {
|
||||||
"type": "SEQ",
|
"type": "SEQ",
|
||||||
"members": [
|
"members": [
|
||||||
|
@ -898,6 +1001,9 @@
|
||||||
{
|
{
|
||||||
"type": "REPEAT",
|
"type": "REPEAT",
|
||||||
"content": {
|
"content": {
|
||||||
|
"type": "SEQ",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
"type": "CHOICE",
|
"type": "CHOICE",
|
||||||
"members": [
|
"members": [
|
||||||
{
|
{
|
||||||
|
@ -917,6 +1023,20 @@
|
||||||
"name": "command_substitution"
|
"name": "command_substitution"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "CHOICE",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "_concat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "BLANK"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -927,7 +1047,7 @@
|
||||||
},
|
},
|
||||||
"_string_content": {
|
"_string_content": {
|
||||||
"type": "PATTERN",
|
"type": "PATTERN",
|
||||||
"value": "([^\"`$]|\\\\.)*"
|
"value": "([^\"`$]|\\\\.)+"
|
||||||
},
|
},
|
||||||
"array": {
|
"array": {
|
||||||
"type": "SEQ",
|
"type": "SEQ",
|
||||||
|
@ -960,9 +1080,18 @@
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
"value": "$"
|
"value": "$"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "CHOICE",
|
||||||
|
"members": [
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_variable_name"
|
"name": "_simple_variable_name"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "_special_variable_name"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -975,21 +1104,46 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "CHOICE",
|
"type": "CHOICE",
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"type": "SYMBOL",
|
|
||||||
"name": "_variable_name"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "SEQ",
|
|
||||||
"members": [
|
"members": [
|
||||||
{
|
{
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
"value": "#"
|
"value": "#"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "BLANK"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "CHOICE",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "SEQ",
|
||||||
|
"members": [
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_variable_name"
|
"name": "variable_name"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "STRING",
|
||||||
|
"value": "="
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "CHOICE",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "SEQ",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "_expression"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "BLANK"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -997,55 +1151,28 @@
|
||||||
"type": "SEQ",
|
"type": "SEQ",
|
||||||
"members": [
|
"members": [
|
||||||
{
|
{
|
||||||
"type": "STRING",
|
"type": "CHOICE",
|
||||||
"value": "#"
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "SYMBOL",
|
||||||
|
"name": "subscript"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_variable_name"
|
"name": "_simple_variable_name"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "STRING",
|
"type": "SYMBOL",
|
||||||
"value": "["
|
"name": "_special_variable_name"
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "STRING",
|
|
||||||
"value": "@"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "STRING",
|
|
||||||
"value": "]"
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "SEQ",
|
"type": "CHOICE",
|
||||||
"members": [
|
"members": [
|
||||||
{
|
|
||||||
"type": "SYMBOL",
|
|
||||||
"name": "_variable_name"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "STRING",
|
|
||||||
"value": "["
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "STRING",
|
|
||||||
"value": "@"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "STRING",
|
|
||||||
"value": "]"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "SEQ",
|
"type": "SEQ",
|
||||||
"members": [
|
"members": [
|
||||||
{
|
|
||||||
"type": "SYMBOL",
|
|
||||||
"name": "_variable_name"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "CHOICE",
|
"type": "CHOICE",
|
||||||
"members": [
|
"members": [
|
||||||
|
@ -1072,6 +1199,10 @@
|
||||||
{
|
{
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
"value": "/"
|
"value": "/"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "STRING",
|
||||||
|
"value": "-"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -1084,13 +1215,8 @@
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_expression"
|
"name": "_expression"
|
||||||
},
|
}
|
||||||
{
|
]
|
||||||
"type": "CHOICE",
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"type": "SYMBOL",
|
|
||||||
"name": "_concat"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "BLANK"
|
"type": "BLANK"
|
||||||
|
@ -1114,19 +1240,6 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"_variable_name": {
|
|
||||||
"type": "CHOICE",
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"type": "SYMBOL",
|
|
||||||
"name": "_simple_variable_name"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "SYMBOL",
|
|
||||||
"name": "special_variable_name"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"command_substitution": {
|
"command_substitution": {
|
||||||
"type": "CHOICE",
|
"type": "CHOICE",
|
||||||
"members": [
|
"members": [
|
||||||
|
@ -1210,26 +1323,15 @@
|
||||||
"_simple_variable_name": {
|
"_simple_variable_name": {
|
||||||
"type": "ALIAS",
|
"type": "ALIAS",
|
||||||
"content": {
|
"content": {
|
||||||
"type": "SYMBOL",
|
"type": "PATTERN",
|
||||||
"name": "identifier"
|
"value": "\\w+"
|
||||||
},
|
},
|
||||||
"named": true,
|
"named": true,
|
||||||
"value": "variable_name"
|
"value": "variable_name"
|
||||||
},
|
},
|
||||||
"_simple_word": {
|
"_special_variable_name": {
|
||||||
"type": "ALIAS",
|
"type": "ALIAS",
|
||||||
"content": {
|
"content": {
|
||||||
"type": "SYMBOL",
|
|
||||||
"name": "identifier"
|
|
||||||
},
|
|
||||||
"named": true,
|
|
||||||
"value": "word"
|
|
||||||
},
|
|
||||||
"identifier": {
|
|
||||||
"type": "PATTERN",
|
|
||||||
"value": "\\w+"
|
|
||||||
},
|
|
||||||
"special_variable_name": {
|
|
||||||
"type": "CHOICE",
|
"type": "CHOICE",
|
||||||
"members": [
|
"members": [
|
||||||
{
|
{
|
||||||
|
@ -1240,10 +1342,6 @@
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
"value": "@"
|
"value": "@"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"type": "STRING",
|
|
||||||
"value": "#"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
"value": "?"
|
"value": "?"
|
||||||
|
@ -1256,10 +1354,6 @@
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
"value": "$"
|
"value": "$"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"type": "STRING",
|
|
||||||
"value": "!"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
"value": "0"
|
"value": "0"
|
||||||
|
@ -1270,6 +1364,37 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"named": true,
|
||||||
|
"value": "special_variable_name"
|
||||||
|
},
|
||||||
|
"word": {
|
||||||
|
"type": "TOKEN",
|
||||||
|
"content": {
|
||||||
|
"type": "REPEAT1",
|
||||||
|
"content": {
|
||||||
|
"type": "CHOICE",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "PATTERN",
|
||||||
|
"value": "[^'\"<>{}\\[\\]()`$&;\\\\\\s#]"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "SEQ",
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"type": "STRING",
|
||||||
|
"value": "\\"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "PATTERN",
|
||||||
|
"value": "[^\\s]"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"_terminator": {
|
"_terminator": {
|
||||||
"type": "CHOICE",
|
"type": "CHOICE",
|
||||||
"members": [
|
"members": [
|
||||||
|
@ -1336,10 +1461,6 @@
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "file_descriptor"
|
"name": "file_descriptor"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"type": "SYMBOL",
|
|
||||||
"name": "word"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "_empty_value"
|
"name": "_empty_value"
|
||||||
|
@ -1354,7 +1475,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
"value": "\n"
|
"value": "}"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
|
@ -1362,7 +1483,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "STRING",
|
"type": "STRING",
|
||||||
"value": "}"
|
"value": "\n"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"inline": [
|
"inline": [
|
||||||
|
@ -1370,8 +1491,7 @@
|
||||||
"_terminator",
|
"_terminator",
|
||||||
"_expression",
|
"_expression",
|
||||||
"_primary_expression",
|
"_primary_expression",
|
||||||
"_variable_name",
|
|
||||||
"_simple_variable_name",
|
"_simple_variable_name",
|
||||||
"_simple_word"
|
"_special_variable_name"
|
||||||
]
|
]
|
||||||
}
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -12,13 +12,12 @@ enum TokenType {
|
||||||
HEREDOC_MIDDLE,
|
HEREDOC_MIDDLE,
|
||||||
HEREDOC_END,
|
HEREDOC_END,
|
||||||
FILE_DESCRIPTOR,
|
FILE_DESCRIPTOR,
|
||||||
WORD,
|
|
||||||
EMPTY_VALUE,
|
EMPTY_VALUE,
|
||||||
CONCAT,
|
CONCAT,
|
||||||
VARIABLE_NAME,
|
VARIABLE_NAME,
|
||||||
NEWLINE,
|
|
||||||
CLOSING_BRACKET,
|
|
||||||
CLOSING_BRACE,
|
CLOSING_BRACE,
|
||||||
|
CLOSING_BRACKET,
|
||||||
|
NEWLINE,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Scanner {
|
struct Scanner {
|
||||||
|
@ -92,13 +91,12 @@ struct Scanner {
|
||||||
lexer->lookahead == '<' ||
|
lexer->lookahead == '<' ||
|
||||||
lexer->lookahead == ')' ||
|
lexer->lookahead == ')' ||
|
||||||
lexer->lookahead == '(' ||
|
lexer->lookahead == '(' ||
|
||||||
lexer->lookahead == '[' ||
|
|
||||||
lexer->lookahead == '|' ||
|
|
||||||
lexer->lookahead == ']' ||
|
|
||||||
lexer->lookahead == '}' ||
|
|
||||||
lexer->lookahead == ';' ||
|
lexer->lookahead == ';' ||
|
||||||
lexer->lookahead == '&' ||
|
lexer->lookahead == '&' ||
|
||||||
lexer->lookahead == '`'
|
lexer->lookahead == '`' ||
|
||||||
|
lexer->lookahead == 0 ||
|
||||||
|
(lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) ||
|
||||||
|
(lexer->lookahead == ']' && valid_symbols[CLOSING_BRACKET])
|
||||||
)) {
|
)) {
|
||||||
lexer->result_symbol = CONCAT;
|
lexer->result_symbol = CONCAT;
|
||||||
return true;
|
return true;
|
||||||
|
@ -134,9 +132,7 @@ struct Scanner {
|
||||||
return scan_heredoc_content(lexer, HEREDOC_BEGINNING, SIMPLE_HEREDOC);
|
return scan_heredoc_content(lexer, HEREDOC_BEGINNING, SIMPLE_HEREDOC);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[WORD]) {
|
if (valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR]) {
|
||||||
unsigned length = 0;
|
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (
|
if (
|
||||||
lexer->lookahead == ' ' ||
|
lexer->lookahead == ' ' ||
|
||||||
|
@ -145,70 +141,46 @@ struct Scanner {
|
||||||
) {
|
) {
|
||||||
skip(lexer);
|
skip(lexer);
|
||||||
} else if (lexer->lookahead == '\\') {
|
} else if (lexer->lookahead == '\\') {
|
||||||
advance(lexer);
|
skip(lexer);
|
||||||
if (lexer->lookahead == '\n') {
|
if (lexer->lookahead == '\n') {
|
||||||
skip(lexer);
|
skip(lexer);
|
||||||
} else {
|
} else {
|
||||||
length++;
|
return false;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_numeric = iswdigit(lexer->lookahead);
|
bool is_number = true;
|
||||||
bool is_alphanumeric = iswalpha(lexer->lookahead);
|
if (iswdigit(lexer->lookahead)) {
|
||||||
|
advance(lexer);
|
||||||
|
} else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
|
||||||
|
is_number = false;
|
||||||
|
advance(lexer);
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
// These characters are not allowed in unquoted arguments
|
if (iswdigit(lexer->lookahead)) {
|
||||||
// or environment variable names
|
advance(lexer);
|
||||||
if (
|
} else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
|
||||||
lexer->lookahead == 0 ||
|
is_number = false;
|
||||||
lexer->lookahead == ';' ||
|
advance(lexer);
|
||||||
lexer->lookahead == '"' ||
|
} else {
|
||||||
lexer->lookahead == '(' ||
|
break;
|
||||||
lexer->lookahead == ')' ||
|
}
|
||||||
lexer->lookahead == '\'' ||
|
}
|
||||||
lexer->lookahead == '&' ||
|
|
||||||
lexer->lookahead == '#' ||
|
|
||||||
lexer->lookahead == '`' ||
|
|
||||||
lexer->lookahead == '|' ||
|
|
||||||
lexer->lookahead == '$' ||
|
|
||||||
iswspace(lexer->lookahead)
|
|
||||||
) break;
|
|
||||||
|
|
||||||
// Curly braces are not allowed in unquoted arguments within curly braces
|
if (is_number &&
|
||||||
// (e.g. inside of a variable expansion like `${key:arg}`).
|
valid_symbols[FILE_DESCRIPTOR] &&
|
||||||
if (
|
(lexer->lookahead == '>' || lexer->lookahead == '<')) {
|
||||||
lexer->lookahead == '}' &&
|
|
||||||
valid_symbols[CLOSING_BRACE]
|
|
||||||
) break;
|
|
||||||
|
|
||||||
// Square brackets are not allowed in unquoted arguments within square brackets
|
|
||||||
// (e.g. inside of an array subscript like `a[arg]`).
|
|
||||||
if (
|
|
||||||
lexer->lookahead == ']' &&
|
|
||||||
valid_symbols[CLOSING_BRACKET]
|
|
||||||
) break;
|
|
||||||
|
|
||||||
// Numbers followed by '<' and '>' at the beginning of commands
|
|
||||||
// are parsed as file descriptors.
|
|
||||||
if (lexer->lookahead == '<' || lexer->lookahead == '>') {
|
|
||||||
if (is_numeric && valid_symbols[FILE_DESCRIPTOR]) {
|
|
||||||
lexer->result_symbol = FILE_DESCRIPTOR;
|
lexer->result_symbol = FILE_DESCRIPTOR;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!iswdigit(lexer->lookahead)) is_numeric = false;
|
if (valid_symbols[VARIABLE_NAME]) {
|
||||||
|
|
||||||
if (!iswalnum(lexer->lookahead) && lexer->lookahead != '_') {
|
|
||||||
|
|
||||||
// Alphanumeric strings followed by '=', '[', or '+=' are treated
|
|
||||||
// as environment variable names.
|
|
||||||
if (is_alphanumeric && valid_symbols[VARIABLE_NAME] && length > 0) {
|
|
||||||
if (lexer->lookahead == '+') {
|
if (lexer->lookahead == '+') {
|
||||||
lexer->mark_end(lexer);
|
lexer->mark_end(lexer);
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
|
@ -224,19 +196,7 @@ struct Scanner {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
is_alphanumeric = false;
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
advance(lexer);
|
|
||||||
length++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do not handle strings containing only letters, because those
|
|
||||||
// might be keywords. Let the normal lexer handle those.
|
|
||||||
if (length > 0 && valid_symbols[WORD] && !is_alphanumeric) {
|
|
||||||
lexer->result_symbol = WORD;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
|
Loading…
Reference in New Issue