Reorganize, rename a bunch of things

This commit is contained in:
Max Brunsfeld 2017-07-15 22:13:55 -07:00
parent 7424d9fc9e
commit 1b5e1557f2
8 changed files with 17604 additions and 14084 deletions

@ -14,13 +14,13 @@ Commands with arguments
===============================
cat file1.txt
cat -n file1.txt file2.txt
git diff --word-diff=color -- file1.txt file2.txt
---
(program
(command (command_name) (argument))
(command (command_name) (argument) (argument) (argument)))
(command (command_name) (word))
(command (command_name) (word) (word) (word) (word) (word)))
===============================
Commands with quoted arguments
@ -32,10 +32,10 @@ echo 'hi'
---
(program
(command (command_name) (quoted_argument
(command (command_name) (string
(command_substitution (command (command_name)))
(command_substitution (command (command_name)))))
(command (command_name) (single_quoted_argument)))
(command (command_name) (raw_string)))
===============================
Quoted command names
@ -46,7 +46,7 @@ Quoted command names
---
(program
(command (quoted_argument (expansion (variable_name)) (expansion (variable_name))) (argument)))
(command (string (simple_expansion (variable_name)) (simple_expansion (variable_name))) (word)))
===============================
Commands with numeric arguments
@ -57,7 +57,7 @@ exit 1
---
(program
(command (command_name) (argument)))
(command (command_name) (word)))
===================================
Commands with environment variables
@ -70,14 +70,14 @@ VAR1=a VAR2="ok" git diff --word-diff=color
(program
(command
(environment_variable_assignment (variable_name) (argument))
(environment_variable_assignment (variable_name) (word))
(command_name))
(command
(environment_variable_assignment (variable_name) (argument))
(environment_variable_assignment (variable_name) (quoted_argument))
(environment_variable_assignment (variable_name) (word))
(environment_variable_assignment (variable_name) (string))
(command_name)
(argument)
(argument)))
(word)
(word)))
===================================
Empty environment variables
@ -106,8 +106,8 @@ cat foo | grep -v bar
(command (command_name))
(command (command_name)))
(pipeline
(command (command_name) (argument))
(command (command_name) (argument) (argument))))
(command (command_name) (word))
(command (command_name) (word) (word))))
===================================
Lists
@ -126,8 +126,8 @@ a | b && c && d; d e f || e g
(command (command_name)))
(command (command_name)))
(list
(command (command_name) (argument) (argument))
(command (command_name) (argument))))
(command (command_name) (word) (word))
(command (command_name) (word))))
===============================
File redirects
@ -142,44 +142,16 @@ cat a b > /dev/null
(program
(command
(command_name)
(file_redirect (argument)))
(file_redirect (word)))
(command
(command_name)
(argument)
(argument)
(file_redirect (argument)))
(word)
(word)
(file_redirect (word)))
(command
(file_redirect (file_descriptor) (argument))
(file_redirect (file_descriptor) (word))
(command_name)))
===============================
Variable expansions
===============================
cat $FOO
---
(program
(command
(command_name)
(expansion (variable_name))))
===============================
Variable expansion operators
===============================
cat ${BAR} ${ABC=def} ${GHI:?jkl}
---
(program
(command
(command_name)
(operator_expansion (variable_name))
(operator_expansion (variable_name) (argument))
(operator_expansion (variable_name) (argument))))
===============================
Heredoc redirects
===============================
@ -200,7 +172,7 @@ JS
(heredoc_redirect (heredoc)))
(command
(command_name)
(argument)
(word)
(heredoc_redirect (heredoc))))
===============================
@ -219,6 +191,6 @@ exit
(command
(command_name)
(heredoc_redirect (heredoc
(expansion (variable_name))
(operator_expansion (variable_name)))))
(simple_expansion (variable_name))
(expansion (variable_name)))))
(command (command_name)))

@ -11,10 +11,10 @@ done
(program
(while_statement
(command (command_name) (argument))
(command (command_name) (word))
(do_group
(command (command_name) (argument))
(command (command_name) (argument)))))
(command (command_name) (word))
(command (command_name) (word)))))
====================================
For statements
@ -28,10 +28,10 @@ done
(program
(for_statement
(argument)
(command (command_substitution (command (command_name) (argument) (argument))))
(word)
(command (command_substitution (command (command_name) (word) (word))))
(do_group
(command (command_name) (expansion (variable_name))))))
(command (command_name) (simple_expansion (variable_name))))))
====================================
If statements
@ -50,14 +50,14 @@ fi
(program
(if_statement
(pipeline
(command (command_name) (argument))
(command (command_name) (argument) (argument)))
(command (command_name) (argument))
(command (command_name) (word))
(command (command_name) (word) (word)))
(command (command_name) (word))
(elif_clause
(pipeline
(command (command_name) (argument))
(command (command_name) (argument) (argument)))
(command (command_name) (argument)))
(command (command_name) (word))
(command (command_name) (word) (word)))
(command (command_name) (word)))
(else_clause
(command (command_name)))))
@ -74,10 +74,10 @@ fi
(program
(if_statement
(bracket_command
(quoted_argument (command_substitution (command (command_name))))
(argument)
(single_quoted_argument))
(command (command_name) (argument))))
(string (command_substitution (command (command_name))))
(word)
(raw_string))
(command (command_name) (word))))
====================================
Case statements
@ -96,11 +96,11 @@ esac
---
(program
(case_statement (quoted_argument)
(case_item (argument)
(command (command_name) (argument)))
(case_item (argument)
(command (command_name) (argument)))))
(case_statement (string)
(case_item (word)
(command (command_name) (word)))
(case_item (word)
(command (command_name) (word)))))
===============================
Subshells
@ -113,7 +113,7 @@ Subshells
---
(program
(subshell (command (command_name) (argument))))
(subshell (command (command_name) (word))))
===============================
Function definitions
@ -130,5 +130,5 @@ function do_something_else() {
---
(program
(function_definition (command_name) (compound_command (command (command_name) (argument))))
(function_definition (command_name) (compound_command (command (command_name) (argument)))))
(function_definition (command_name) (compound_statement (command (command_name) (word))))
(function_definition (command_name) (compound_statement (command (command_name) (word)))))

100
corpus/expressions.txt Normal file

@ -0,0 +1,100 @@
=============================
Literal words
=============================
echo a
echo a b
---
(program
(command (command_name) (word))
(command (command_name) (word) (word)))
=============================
Simple variable expansions
=============================
echo $abc
---
(program
(command (command_name) (simple_expansion (variable_name))))
=============================
Variable expansions
=============================
echo ${abc}
echo ${abc:-def}
---
(program
(command (command_name) (expansion (variable_name)))
(command (command_name) (expansion (variable_name) (word))))
===================================
Other variable expansion operators
===================================
cat ${BAR} ${ABC=def} ${GHI:?jkl}
---
(program
(command
(command_name)
(expansion (variable_name))
(expansion (variable_name) (word))
(expansion (variable_name) (word))))
=============================
Command substitutions
=============================
echo `echo hi`
echo $(echo $(echo hi))
---
(program
(command
(command_name)
(command_substitution (command (command_name) (word))))
(command
(command_name)
(command_substitution (command
(command_name)
(command_substitution (command
(command_name)
(word)))))))
=============================
Single quoted strings
=============================
echo 'a b' 'c d'
---
(program
(command (command_name) (raw_string) (raw_string)))
=============================
Double quoted strings
=============================
echo "a" "b"
echo "a ${b} c" "d $e"
---
(program
(command (command_name)
(string)
(string))
(command (command_name)
(string (expansion (variable_name)))
(string (simple_expansion (variable_name)))))

@ -29,14 +29,14 @@ f=g \
(program
(command
(command_name)
(argument)
(argument))
(word)
(word))
(command
(environment_variable_assignment
(variable_name)
(argument))
(word))
(environment_variable_assignment
(variable_name)
(argument))
(word))
(command_name)
(argument)))
(word)))

@ -1,7 +1,12 @@
module.exports = grammar({
name: 'bash',
inline: $ => [$.statement, $.terminator, $.value],
inline: $ => [
$.statement,
$.terminator,
$.expression,
$._variable_name
],
externals: $ => [
$._simple_heredoc,
@ -25,6 +30,8 @@ module.exports = grammar({
$.terminator
),
// Statements
statement: $ => choice(
$.environment_variable_assignment,
$.command,
@ -41,7 +48,7 @@ module.exports = grammar({
for_statement: $ => seq(
'for',
rename($.word, 'argument'),
$.word,
'in',
$._terminated_statement,
$.do_group
@ -83,7 +90,7 @@ module.exports = grammar({
case_statement: $ => seq(
'case',
$.value,
$.expression,
optional($.terminator),
'in',
$.terminator,
@ -92,7 +99,7 @@ module.exports = grammar({
),
case_item: $ => seq(
$.value,
$.expression,
')',
repeat($._terminated_statement),
';;'
@ -103,40 +110,19 @@ module.exports = grammar({
rename($.leading_word, 'command_name'),
'(',
')',
$.compound_command
$.compound_statement
),
compound_command: $ => seq(
compound_statement: $ => seq(
'{',
repeat($._terminated_statement),
'}'
),
bracket_command: $ => choice(
seq('[', repeat1($.value), ']'),
seq('[[', repeat1($.value), ']]')
),
command: $ => seq(
repeat(choice(
$.environment_variable_assignment,
$.file_redirect
)),
choice(
rename(choice($.leading_word), 'command_name'),
':',
$.quoted_argument,
$.single_quoted_argument,
$.command_substitution
),
optional(seq(
/\s+/,
repeat($.value)
)),
repeat(choice(
$.file_redirect,
$.heredoc_redirect
))
subshell: $ => seq(
'(',
repeat($._terminated_statement),
')'
),
pipeline: $ => prec.left(1, seq(
@ -151,73 +137,48 @@ module.exports = grammar({
$.statement
)),
subshell: $ => seq(
'(',
repeat($._terminated_statement),
')'
bracket_command: $ => choice(
seq('[', repeat1($.expression), ']'),
seq('[[', repeat1($.expression), ']]')
),
// Commands
command: $ => prec.left(seq(
repeat(choice(
$.environment_variable_assignment,
$.file_redirect
)),
choice(
rename(choice($.leading_word), 'command_name'),
':',
$.string,
$.raw_string,
$.command_substitution
),
optional(seq(
/\s+/,
repeat($.expression)
)),
repeat(choice(
$.file_redirect,
$.heredoc_redirect
))
)),
environment_variable_assignment: $ => seq(
rename($.leading_word, 'variable_name'),
'=',
choice(
$.value,
$.expression,
$._empty_value
)
),
value: $ => choice(
rename($.word, 'argument'),
$.command_substitution,
$.quoted_argument,
$.single_quoted_argument,
$.expansion,
$.operator_expansion
),
quoted_argument: $ => seq(
'"',
repeat(choice(
$._quoted_chars,
$.expansion,
$.operator_expansion,
$.command_substitution
)),
'"'
),
_quoted_chars: $ => /[^"$]+/,
single_quoted_argument: $ => /'[^']*'/,
expansion: $ => seq(
'$',
choice(
rename($.word, 'variable_name'),
'$'
)
),
operator_expansion: $ => seq(
'${',
rename($.leading_word, 'variable_name'),
optional(seq(
choice(':', ':?', '=', ':-'),
$.value
)),
'}'
),
command_substitution: $ => seq(
'$(',
$.command,
')'
),
file_redirect: $ => seq(
optional($.file_descriptor),
choice('<', '>', '>>', '&>', '&>>', '<&', '>&'),
$.value
$.expression
),
heredoc_redirect: $ => seq(
@ -231,19 +192,75 @@ module.exports = grammar({
$._heredoc_beginning,
repeat(choice(
$.expansion,
$.operator_expansion,
$.simple_expansion,
$._heredoc_middle
)),
$._heredoc_end
)
),
leading_word: $ => /[^"\\\s#=|;:{}()]+/,
// Expressions
word: $ => /[^"#\\\s$<>{}&;()]+/,
expression: $ => choice(
$.word,
$.string,
$.raw_string,
$.expansion,
$.simple_expansion,
$.command_substitution
),
string: $ => seq(
'"',
repeat(choice(
/[^"$]+/,
$.expansion,
$.simple_expansion,
$.command_substitution
)),
'"'
),
raw_string: $ => /'[^']*'/,
simple_expansion: $ => seq(
'$',
choice(
rename($.simple_variable_name, 'variable_name'),
$.special_variable_name
)
),
expansion: $ => seq(
'${',
$._variable_name,
optional(seq(
choice(':', ':?', '=', ':-'),
$.expression
)),
'}'
),
_variable_name: $ => choice(
rename($.leading_word, 'variable_name'),
$.special_variable_name
),
command_substitution: $ => choice(
seq('$(', $.command, ')'),
seq('`', $.command, '`')
),
leading_word: $ => /[^`"\\\s#=|;:{}()]+/,
word: $ => /[^"`#\\\s$<>{}&;()]+/,
comment: $ => /#.*/,
simple_variable_name: $ => /\w+/,
special_variable_name: $ => choice('*', '@', '#', '?', '-', '$', '!', '0', '_'),
terminator: $ => choice(';', ';;', '\n', '&'),
}
});

337
src/grammar.json vendored

@ -78,12 +78,8 @@
"value": "for"
},
{
"type": "RENAME",
"content": {
"type": "SYMBOL",
"name": "word"
},
"value": "argument"
"type": "SYMBOL",
"name": "word"
},
{
"type": "STRING",
@ -232,7 +228,7 @@
},
{
"type": "SYMBOL",
"name": "value"
"name": "expression"
},
{
"type": "CHOICE",
@ -272,7 +268,7 @@
"members": [
{
"type": "SYMBOL",
"name": "value"
"name": "expression"
},
{
"type": "STRING",
@ -324,11 +320,11 @@
},
{
"type": "SYMBOL",
"name": "compound_command"
"name": "compound_statement"
}
]
},
"compound_command": {
"compound_statement": {
"type": "SEQ",
"members": [
{
@ -362,7 +358,7 @@
"type": "REPEAT1",
"content": {
"type": "SYMBOL",
"name": "value"
"name": "expression"
}
},
{
@ -382,7 +378,7 @@
"type": "REPEAT1",
"content": {
"type": "SYMBOL",
"name": "value"
"name": "expression"
}
},
{
@ -394,99 +390,103 @@
]
},
"command": {
"type": "SEQ",
"members": [
{
"type": "REPEAT",
"content": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "environment_variable_assignment"
},
{
"type": "SYMBOL",
"name": "file_redirect"
}
]
}
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "environment_variable_assignment"
"type": "RENAME",
"content": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "leading_word"
}
]
},
"value": "command_name"
},
{
"type": "STRING",
"value": ":"
},
{
"type": "SYMBOL",
"name": "file_redirect"
"name": "string"
},
{
"type": "SYMBOL",
"name": "raw_string"
},
{
"type": "SYMBOL",
"name": "command_substitution"
}
]
}
},
{
"type": "CHOICE",
"members": [
{
"type": "RENAME",
"content": {
"type": "CHOICE",
},
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "leading_word"
"type": "PATTERN",
"value": "\\s+"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "expression"
}
}
]
},
"value": "command_name"
},
{
"type": "STRING",
"value": ":"
},
{
"type": "SYMBOL",
"name": "quoted_argument"
},
{
"type": "SYMBOL",
"name": "single_quoted_argument"
},
{
"type": "SYMBOL",
"name": "command_substitution"
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "PATTERN",
"value": "\\s+"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "value"
}
}
]
},
{
"type": "BLANK"
}
]
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "file_redirect"
},
{
"type": "SYMBOL",
"name": "heredoc_redirect"
"type": "BLANK"
}
]
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "file_redirect"
},
{
"type": "SYMBOL",
"name": "heredoc_redirect"
}
]
}
}
}
]
]
}
},
"pipeline": {
"type": "PREC_LEFT",
@ -588,7 +588,7 @@
"members": [
{
"type": "SYMBOL",
"name": "value"
"name": "expression"
},
{
"type": "SYMBOL",
@ -598,28 +598,20 @@
}
]
},
"value": {
"expression": {
"type": "CHOICE",
"members": [
{
"type": "RENAME",
"content": {
"type": "SYMBOL",
"name": "word"
},
"value": "argument"
"type": "SYMBOL",
"name": "word"
},
{
"type": "SYMBOL",
"name": "command_substitution"
"name": "string"
},
{
"type": "SYMBOL",
"name": "quoted_argument"
},
{
"type": "SYMBOL",
"name": "single_quoted_argument"
"name": "raw_string"
},
{
"type": "SYMBOL",
@ -627,11 +619,15 @@
},
{
"type": "SYMBOL",
"name": "operator_expansion"
"name": "simple_expansion"
},
{
"type": "SYMBOL",
"name": "command_substitution"
}
]
},
"quoted_argument": {
"string": {
"type": "SEQ",
"members": [
{
@ -653,7 +649,7 @@
},
{
"type": "SYMBOL",
"name": "operator_expansion"
"name": "simple_expansion"
},
{
"type": "SYMBOL",
@ -672,11 +668,11 @@
"type": "PATTERN",
"value": "[^\"$]+"
},
"single_quoted_argument": {
"raw_string": {
"type": "PATTERN",
"value": "'[^']*'"
},
"expansion": {
"simple_expansion": {
"type": "SEQ",
"members": [
{
@ -690,19 +686,19 @@
"type": "RENAME",
"content": {
"type": "SYMBOL",
"name": "word"
"name": "simple_variable_name"
},
"value": "variable_name"
},
{
"type": "STRING",
"value": "$"
"type": "SYMBOL",
"name": "special_variable_name"
}
]
}
]
},
"operator_expansion": {
"expansion": {
"type": "SEQ",
"members": [
{
@ -710,12 +706,8 @@
"value": "${"
},
{
"type": "RENAME",
"content": {
"type": "SYMBOL",
"name": "leading_word"
},
"value": "variable_name"
"type": "SYMBOL",
"name": "_variable_name"
},
{
"type": "CHOICE",
@ -746,7 +738,7 @@
},
{
"type": "SYMBOL",
"name": "value"
"name": "expression"
}
]
},
@ -761,20 +753,59 @@
}
]
},
"command_substitution": {
"type": "SEQ",
"_variable_name": {
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "$("
"type": "RENAME",
"content": {
"type": "SYMBOL",
"name": "leading_word"
},
"value": "variable_name"
},
{
"type": "SYMBOL",
"name": "command"
"name": "special_variable_name"
}
]
},
"command_substitution": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "$("
},
{
"type": "SYMBOL",
"name": "command"
},
{
"type": "STRING",
"value": ")"
}
]
},
{
"type": "STRING",
"value": ")"
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "`"
},
{
"type": "SYMBOL",
"name": "command"
},
{
"type": "STRING",
"value": "`"
}
]
}
]
},
@ -828,7 +859,7 @@
},
{
"type": "SYMBOL",
"name": "value"
"name": "expression"
}
]
},
@ -879,7 +910,7 @@
},
{
"type": "SYMBOL",
"name": "operator_expansion"
"name": "simple_expansion"
},
{
"type": "SYMBOL",
@ -898,16 +929,61 @@
},
"leading_word": {
"type": "PATTERN",
"value": "[^\"\\\\\\s#=|;:{}()]+"
"value": "[^`\"\\\\\\s#=|;:{}()]+"
},
"word": {
"type": "PATTERN",
"value": "[^\"#\\\\\\s$<>{}&;()]+"
"value": "[^\"`#\\\\\\s$<>{}&;()]+"
},
"comment": {
"type": "PATTERN",
"value": "#.*"
},
"simple_variable_name": {
"type": "PATTERN",
"value": "\\w+"
},
"special_variable_name": {
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "*"
},
{
"type": "STRING",
"value": "@"
},
{
"type": "STRING",
"value": "#"
},
{
"type": "STRING",
"value": "?"
},
{
"type": "STRING",
"value": "-"
},
{
"type": "STRING",
"value": "$"
},
{
"type": "STRING",
"value": "!"
},
{
"type": "STRING",
"value": "0"
},
{
"type": "STRING",
"value": "_"
}
]
},
"terminator": {
"type": "CHOICE",
"members": [
@ -982,6 +1058,7 @@
"inline": [
"statement",
"terminator",
"value"
"expression",
"_variable_name"
]
}

30914
src/parser.c vendored

File diff suppressed because it is too large Load Diff

10
src/scanner.cc vendored

@ -36,7 +36,7 @@ struct Scanner {
current_leading_word += lexer->lookahead;
advance(lexer);
}
return current_leading_word == heredoc_identifier;
return current_leading_word == heredoc_delimiter;
}
bool scan_heredoc_content(TSLexer *lexer, TokenType middle_type, TokenType end_type) {
@ -74,14 +74,14 @@ struct Scanner {
}
bool scan(TSLexer *lexer, const bool *valid_symbols) {
if (valid_symbols[HEREDOC_MIDDLE]) {
if (valid_symbols[HEREDOC_MIDDLE] && !heredoc_delimiter.empty()) {
return scan_heredoc_content(lexer, HEREDOC_MIDDLE, HEREDOC_END);
}
if (valid_symbols[HEREDOC_BEGINNING]) {
heredoc_identifier.clear();
heredoc_delimiter.clear();
while (iswalpha(lexer->lookahead)) {
heredoc_identifier += lexer->lookahead;
heredoc_delimiter += lexer->lookahead;
advance(lexer);
}
@ -118,7 +118,7 @@ struct Scanner {
return false;
}
wstring heredoc_identifier;
wstring heredoc_delimiter;
wstring current_leading_word;
};