commit ab1d553e1d5f3b733819c8a4a4597de5fc4d4b57 Author: Max Brunsfeld Date: Fri Jul 14 12:28:54 2017 -0700 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..01a5e93 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +node_modules +build +*.log diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..3c42c29 --- /dev/null +++ b/.npmignore @@ -0,0 +1,3 @@ +corpus +build +examples diff --git a/binding.gyp b/binding.gyp new file mode 100644 index 0000000..b4ff008 --- /dev/null +++ b/binding.gyp @@ -0,0 +1,18 @@ +{ + "targets": [ + { + "target_name": "tree_sitter_bash_binding", + "include_dirs": [ + " [$.control_operator], + + rules: { + program: $ => repeat($.command), + + command: $ => seq( + choice( + $.simple_command, + $.pipeline, + $.list + ), + $.control_operator + ), + + simple_command: $ => seq( + repeat($.environment_variable_assignment), + rename($.leading_word, 'command_name'), + optional(seq( + /\s+/, + repeat(rename($.word, 'argument')) + )) + ), + + pipeline: $ => prec.left(seq( + $.simple_command, + choice('|', '|&'), + $.simple_command + )), + + list: $ => prec.left(seq( + choice( + $.simple_command, + $.list, + $.pipeline + ), + choice('&&', ';'), + choice( + $.simple_command, + $.pipeline + ) + )), + + environment_variable_assignment: $ => seq( + rename($.leading_word, 'variable_name'), + '=', + rename($.word, 'argument') + ), + + leading_word: $ => /[^\s=|;]+/, + + word: $ => /[^\s]+/, + + control_operator: $ => choice( + '\n', + ';;' + ) + } +}); diff --git a/index.js b/index.js new file mode 100644 index 0000000..e1ce768 --- /dev/null +++ b/index.js @@ -0,0 +1 @@ +module.exports = require("./build/Release/tree_sitter_bash_binding"); diff --git a/package.json b/package.json new file mode 100644 index 0000000..514097f --- /dev/null +++ b/package.json @@ -0,0 +1,22 @@ +{ + "name": "tree-sitter-bash", + "version": "0.2.1", + "description": "Bash grammar for tree-sitter", + "main": "index.js", + "keywords": [ + "parser", + "lexer" + ], + "author": "Max Brunsfeld", + "license": "MIT", + "dependencies": { + "nan": "^2.4.0" + }, + "devDependencies": { + "tree-sitter-cli": "^0.5.3" + }, + "scripts": { + "build": "tree-sitter generate && node-gyp build", + "test": "tree-sitter test && tree-sitter parse examples/* --quiet --time" + } +} diff --git a/src/binding.cc b/src/binding.cc new file mode 100644 index 0000000..91fd31b --- /dev/null +++ b/src/binding.cc @@ -0,0 +1,28 @@ +#include "tree_sitter/parser.h" +#include +#include "nan.h" + +using namespace v8; + +extern "C" TSLanguage * tree_sitter_bash(); + +namespace { + +NAN_METHOD(New) {} + +void Init(Handle exports, Handle module) { + Local tpl = Nan::New(New); + tpl->SetClassName(Nan::New("Language").ToLocalChecked()); + tpl->InstanceTemplate()->SetInternalFieldCount(1); + + Local constructor = tpl->GetFunction(); + Local instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); + Nan::SetInternalFieldPointer(instance, 0, tree_sitter_bash()); + + instance->Set(Nan::New("name").ToLocalChecked(), Nan::New("bash").ToLocalChecked()); + module->Set(Nan::New("exports").ToLocalChecked(), instance); +} + +NODE_MODULE(tree_sitter_bash_binding, Init) + +} // namespace diff --git a/src/grammar.json b/src/grammar.json new file mode 100644 index 0000000..04ef06a --- /dev/null +++ b/src/grammar.json @@ -0,0 +1,225 @@ +{ + "name": "bash", + "rules": { + "program": { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "command" + } + }, + "command": { + "type": "SEQ", + "members": [ + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "simple_command" + }, + { + "type": "SYMBOL", + "name": "pipeline" + }, + { + "type": "SYMBOL", + "name": "list" + } + ] + }, + { + "type": "SYMBOL", + "name": "control_operator" + } + ] + }, + "simple_command": { + "type": "SEQ", + "members": [ + { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "environment_variable_assignment" + } + }, + { + "type": "RENAME", + "content": { + "type": "SYMBOL", + "name": "leading_word" + }, + "value": "command_name" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "PATTERN", + "value": "\\s+" + }, + { + "type": "REPEAT", + "content": { + "type": "RENAME", + "content": { + "type": "SYMBOL", + "name": "word" + }, + "value": "argument" + } + } + ] + }, + { + "type": "BLANK" + } + ] + } + ] + }, + "pipeline": { + "type": "PREC_LEFT", + "value": 0, + "content": { + "type": "SEQ", + "members": [ + { + "type": "SYMBOL", + "name": "simple_command" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "|" + }, + { + "type": "STRING", + "value": "|&" + } + ] + }, + { + "type": "SYMBOL", + "name": "simple_command" + } + ] + } + }, + "list": { + "type": "PREC_LEFT", + "value": 0, + "content": { + "type": "SEQ", + "members": [ + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "simple_command" + }, + { + "type": "SYMBOL", + "name": "list" + }, + { + "type": "SYMBOL", + "name": "pipeline" + } + ] + }, + { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "&&" + }, + { + "type": "STRING", + "value": ";" + } + ] + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "simple_command" + }, + { + "type": "SYMBOL", + "name": "pipeline" + } + ] + } + ] + } + }, + "environment_variable_assignment": { + "type": "SEQ", + "members": [ + { + "type": "RENAME", + "content": { + "type": "SYMBOL", + "name": "leading_word" + }, + "value": "variable_name" + }, + { + "type": "STRING", + "value": "=" + }, + { + "type": "RENAME", + "content": { + "type": "SYMBOL", + "name": "word" + }, + "value": "argument" + } + ] + }, + "leading_word": { + "type": "PATTERN", + "value": "[^\\s=|;]+" + }, + "word": { + "type": "PATTERN", + "value": "[^\\s]+" + }, + "control_operator": { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "\n" + }, + { + "type": "STRING", + "value": ";;" + } + ] + } + }, + "extras": [ + { + "type": "PATTERN", + "value": "\\s" + } + ], + "conflicts": [], + "externals": [], + "inline": [ + "control_operator" + ] +} \ No newline at end of file diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..c2ac892 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,815 @@ +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + +#define LANGUAGE_VERSION 3 +#define STATE_COUNT 33 +#define SYMBOL_COUNT 20 +#define TOKEN_COUNT 11 +#define EXTERNAL_TOKEN_COUNT 0 +#define MAX_RENAME_SEQUENCE_LENGTH 4 + +enum { + aux_sym_SLASH_BSLASHs_PLUS_SLASH = 1, + anon_sym_PIPE = 2, + anon_sym_PIPE_AMP = 3, + anon_sym_AMP_AMP = 4, + anon_sym_SEMI = 5, + anon_sym_EQ = 6, + sym_leading_word = 7, + sym_word = 8, + anon_sym_LF = 9, + anon_sym_SEMI_SEMI = 10, + sym_program = 11, + sym_command = 12, + sym_simple_command = 13, + sym_pipeline = 14, + sym_list = 15, + sym_environment_variable_assignment = 16, + aux_sym_program_repeat1 = 17, + aux_sym_simple_command_repeat1 = 18, + aux_sym_simple_command_repeat2 = 19, + rename_sym_1 = 20, + rename_sym_argument = 21, + rename_sym_command_name = 22, + rename_sym_variable_name = 23, +}; + +static const char *ts_symbol_names[] = { + [ts_builtin_sym_end] = "END", + [aux_sym_SLASH_BSLASHs_PLUS_SLASH] = "/\\s+/", + [anon_sym_PIPE] = "|", + [anon_sym_PIPE_AMP] = "|&", + [anon_sym_AMP_AMP] = "&&", + [anon_sym_SEMI] = ";", + [anon_sym_EQ] = "=", + [sym_leading_word] = "leading_word", + [sym_word] = "word", + [anon_sym_LF] = "\n", + [anon_sym_SEMI_SEMI] = ";;", + [sym_program] = "program", + [sym_command] = "command", + [sym_simple_command] = "simple_command", + [sym_pipeline] = "pipeline", + [sym_list] = "list", + [sym_environment_variable_assignment] = "environment_variable_assignment", + [aux_sym_program_repeat1] = "program_repeat1", + [aux_sym_simple_command_repeat1] = "simple_command_repeat1", + [aux_sym_simple_command_repeat2] = "simple_command_repeat2", + [rename_sym_1] = "", + [rename_sym_argument] = "argument", + [rename_sym_command_name] = "command_name", + [rename_sym_variable_name] = "variable_name", +}; + +static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = { + [ts_builtin_sym_end] = { + .visible = false, + .named = true, + .structural = true, + .extra = false, + }, + [aux_sym_SLASH_BSLASHs_PLUS_SLASH] = { + .visible = false, + .named = false, + .structural = true, + .extra = false, + }, + [anon_sym_PIPE] = { + .visible = true, + .named = false, + .structural = true, + .extra = false, + }, + [anon_sym_PIPE_AMP] = { + .visible = true, + .named = false, + .structural = true, + .extra = false, + }, + [anon_sym_AMP_AMP] = { + .visible = true, + .named = false, + .structural = true, + .extra = false, + }, + [anon_sym_SEMI] = { + .visible = true, + .named = false, + .structural = true, + .extra = false, + }, + [anon_sym_EQ] = { + .visible = true, + .named = false, + .structural = true, + .extra = false, + }, + [sym_leading_word] = { + .visible = true, + .named = true, + .structural = true, + .extra = false, + }, + [sym_word] = { + .visible = true, + .named = true, + .structural = true, + .extra = false, + }, + [anon_sym_LF] = { + .visible = true, + .named = false, + .structural = true, + .extra = false, + }, + [anon_sym_SEMI_SEMI] = { + .visible = true, + .named = false, + .structural = true, + .extra = false, + }, + [sym_program] = { + .visible = true, + .named = true, + .structural = true, + .extra = false, + }, + [sym_command] = { + .visible = true, + .named = true, + .structural = true, + .extra = false, + }, + [sym_simple_command] = { + .visible = true, + .named = true, + .structural = true, + .extra = false, + }, + [sym_pipeline] = { + .visible = true, + .named = true, + .structural = true, + .extra = false, + }, + [sym_list] = { + .visible = true, + .named = true, + .structural = true, + .extra = false, + }, + [sym_environment_variable_assignment] = { + .visible = true, + .named = true, + .structural = true, + .extra = false, + }, + [aux_sym_program_repeat1] = { + .visible = false, + .named = false, + .structural = true, + .extra = false, + }, + [aux_sym_simple_command_repeat1] = { + .visible = false, + .named = false, + .structural = true, + .extra = false, + }, + [aux_sym_simple_command_repeat2] = { + .visible = false, + .named = false, + .structural = true, + .extra = false, + }, +}; + +static TSSymbol ts_rename_sequences[10][MAX_RENAME_SEQUENCE_LENGTH] = { + [1] = { + [0] = rename_sym_command_name, + }, + [2] = { + [0] = rename_sym_command_name, + }, + [3] = { + [1] = rename_sym_command_name, + }, + [4] = { + [0] = rename_sym_argument, + }, + [5] = { + [0] = rename_sym_command_name, + }, + [6] = { + [0] = rename_sym_variable_name, + [2] = rename_sym_argument, + }, + [7] = { + [1] = rename_sym_command_name, + }, + [8] = { + [1] = rename_sym_argument, + }, + [9] = { + [1] = rename_sym_command_name, + }, +}; + +static bool ts_lex(TSLexer *lexer, TSStateId state) { + START_LEXER(); + switch (state) { + case 0: + if (lookahead == 0) + ADVANCE(1); + if (lookahead == '&') + ADVANCE(2); + if (lookahead == ';') + ADVANCE(4); + if (lookahead == '=') + ADVANCE(6); + if (lookahead == '|') + ADVANCE(7); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + ADVANCE(8); + END_STATE(); + case 1: + ACCEPT_TOKEN(ts_builtin_sym_end); + END_STATE(); + case 2: + if (lookahead == '&') + ADVANCE(3); + END_STATE(); + case 3: + ACCEPT_TOKEN(anon_sym_AMP_AMP); + END_STATE(); + case 4: + ACCEPT_TOKEN(anon_sym_SEMI); + if (lookahead == ';') + ADVANCE(5); + END_STATE(); + case 5: + ACCEPT_TOKEN(anon_sym_SEMI_SEMI); + END_STATE(); + case 6: + ACCEPT_TOKEN(anon_sym_EQ); + END_STATE(); + case 7: + ACCEPT_TOKEN(anon_sym_PIPE); + END_STATE(); + case 8: + ACCEPT_TOKEN(aux_sym_SLASH_BSLASHs_PLUS_SLASH); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + ADVANCE(8); + END_STATE(); + case 9: + if (lookahead == 0) + ADVANCE(1); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + SKIP(9); + if (lookahead != ';' && + lookahead != '=' && + lookahead != '|') + ADVANCE(10); + END_STATE(); + case 10: + ACCEPT_TOKEN(sym_leading_word); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != '\r' && + lookahead != ' ' && + lookahead != ';' && + lookahead != '=' && + lookahead != '|') + ADVANCE(10); + END_STATE(); + case 11: + if (lookahead == '\n') + ADVANCE(12); + if (lookahead == '&') + ADVANCE(2); + if (lookahead == ';') + ADVANCE(4); + if (lookahead == '=') + ADVANCE(6); + if (lookahead == '|') + ADVANCE(14); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') + ADVANCE(13); + END_STATE(); + case 12: + ACCEPT_TOKEN(anon_sym_LF); + if (lookahead == '\n') + ADVANCE(12); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') + ADVANCE(13); + END_STATE(); + case 13: + ACCEPT_TOKEN(aux_sym_SLASH_BSLASHs_PLUS_SLASH); + if (lookahead == '\n') + ADVANCE(12); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') + ADVANCE(13); + END_STATE(); + case 14: + ACCEPT_TOKEN(anon_sym_PIPE); + if (lookahead == '&') + ADVANCE(15); + END_STATE(); + case 15: + ACCEPT_TOKEN(anon_sym_PIPE_AMP); + END_STATE(); + case 16: + if (lookahead == 0) + ADVANCE(1); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + SKIP(16); + END_STATE(); + case 17: + if (lookahead == '\n') + ADVANCE(18); + if (lookahead == '&') + ADVANCE(2); + if (lookahead == ';') + ADVANCE(4); + if (lookahead == '|') + ADVANCE(14); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') + SKIP(17); + END_STATE(); + case 18: + ACCEPT_TOKEN(anon_sym_LF); + END_STATE(); + case 19: + if (lookahead == '\n') + ADVANCE(18); + if (lookahead == '&') + ADVANCE(2); + if (lookahead == ';') + ADVANCE(4); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') + SKIP(19); + END_STATE(); + case 20: + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + SKIP(20); + if (lookahead != 0 && + lookahead != ';' && + lookahead != '=' && + lookahead != '|') + ADVANCE(10); + END_STATE(); + case 21: + if (lookahead == '\n') + ADVANCE(18); + if (lookahead == '&') + ADVANCE(22); + if (lookahead == ';') + ADVANCE(25); + if (lookahead == '|') + ADVANCE(27); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') + SKIP(21); + if (lookahead != 0) + ADVANCE(24); + END_STATE(); + case 22: + ACCEPT_TOKEN(sym_word); + if (lookahead == '&') + ADVANCE(23); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != '\r' && + lookahead != ' ') + ADVANCE(24); + END_STATE(); + case 23: + ACCEPT_TOKEN(anon_sym_AMP_AMP); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != '\r' && + lookahead != ' ') + ADVANCE(24); + END_STATE(); + case 24: + ACCEPT_TOKEN(sym_word); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != '\r' && + lookahead != ' ') + ADVANCE(24); + END_STATE(); + case 25: + ACCEPT_TOKEN(anon_sym_SEMI); + if (lookahead == ';') + ADVANCE(26); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != '\r' && + lookahead != ' ') + ADVANCE(24); + END_STATE(); + case 26: + ACCEPT_TOKEN(anon_sym_SEMI_SEMI); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != '\r' && + lookahead != ' ') + ADVANCE(24); + END_STATE(); + case 27: + ACCEPT_TOKEN(anon_sym_PIPE); + if (lookahead == '&') + ADVANCE(28); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != '\r' && + lookahead != ' ') + ADVANCE(24); + END_STATE(); + case 28: + ACCEPT_TOKEN(anon_sym_PIPE_AMP); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != '\r' && + lookahead != ' ') + ADVANCE(24); + END_STATE(); + case 29: + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') + SKIP(29); + if (lookahead != 0) + ADVANCE(24); + END_STATE(); + default: + return false; + } +} + +static TSLexMode ts_lex_modes[STATE_COUNT] = { + [0] = {.lex_state = 0}, + [1] = {.lex_state = 9}, + [2] = {.lex_state = 11}, + [3] = {.lex_state = 16}, + [4] = {.lex_state = 9}, + [5] = {.lex_state = 17}, + [6] = {.lex_state = 19}, + [7] = {.lex_state = 20}, + [8] = {.lex_state = 9}, + [9] = {.lex_state = 20}, + [10] = {.lex_state = 21}, + [11] = {.lex_state = 29}, + [12] = {.lex_state = 20}, + [13] = {.lex_state = 20}, + [14] = {.lex_state = 9}, + [15] = {.lex_state = 9}, + [16] = {.lex_state = 11}, + [17] = {.lex_state = 20}, + [18] = {.lex_state = 21}, + [19] = {.lex_state = 21}, + [20] = {.lex_state = 20}, + [21] = {.lex_state = 19}, + [22] = {.lex_state = 17}, + [23] = {.lex_state = 19}, + [24] = {.lex_state = 21}, + [25] = {.lex_state = 21}, + [26] = {.lex_state = 21}, + [27] = {.lex_state = 21}, + [28] = {.lex_state = 9}, + [29] = {.lex_state = 17}, + [30] = {.lex_state = 19}, + [31] = {.lex_state = 20}, + [32] = {.lex_state = 21}, +}; + +static uint16_t ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = { + [0] = { + [sym_program] = STATE(3), + [sym_command] = STATE(28), + [sym_simple_command] = STATE(29), + [sym_pipeline] = STATE(30), + [sym_list] = STATE(6), + [sym_environment_variable_assignment] = STATE(31), + [aux_sym_program_repeat1] = STATE(8), + [aux_sym_simple_command_repeat1] = STATE(9), + [aux_sym_simple_command_repeat2] = STATE(32), + [ts_builtin_sym_end] = ACTIONS(1), + [aux_sym_SLASH_BSLASHs_PLUS_SLASH] = ACTIONS(3), + [anon_sym_PIPE] = ACTIONS(5), + [anon_sym_AMP_AMP] = ACTIONS(7), + [anon_sym_SEMI] = ACTIONS(7), + [anon_sym_EQ] = ACTIONS(9), + [anon_sym_SEMI_SEMI] = ACTIONS(11), + }, + [1] = { + [sym_program] = STATE(3), + [sym_command] = STATE(4), + [sym_simple_command] = STATE(5), + [sym_pipeline] = STATE(6), + [sym_list] = STATE(6), + [sym_environment_variable_assignment] = STATE(7), + [aux_sym_program_repeat1] = STATE(8), + [aux_sym_simple_command_repeat1] = STATE(9), + [ts_builtin_sym_end] = ACTIONS(13), + [sym_leading_word] = ACTIONS(15), + }, + [2] = { + [aux_sym_SLASH_BSLASHs_PLUS_SLASH] = ACTIONS(17), + [anon_sym_PIPE] = ACTIONS(19), + [anon_sym_PIPE_AMP] = ACTIONS(19), + [anon_sym_AMP_AMP] = ACTIONS(19), + [anon_sym_SEMI] = ACTIONS(19), + [anon_sym_EQ] = ACTIONS(21), + [anon_sym_LF] = ACTIONS(19), + [anon_sym_SEMI_SEMI] = ACTIONS(19), + }, + [3] = { + [ts_builtin_sym_end] = ACTIONS(23), + }, + [4] = { + [ts_builtin_sym_end] = ACTIONS(25), + [sym_leading_word] = ACTIONS(27), + }, + [5] = { + [anon_sym_PIPE] = ACTIONS(29), + [anon_sym_PIPE_AMP] = ACTIONS(29), + [anon_sym_AMP_AMP] = ACTIONS(31), + [anon_sym_SEMI] = ACTIONS(31), + [anon_sym_LF] = ACTIONS(33), + [anon_sym_SEMI_SEMI] = ACTIONS(33), + }, + [6] = { + [anon_sym_AMP_AMP] = ACTIONS(31), + [anon_sym_SEMI] = ACTIONS(31), + [anon_sym_LF] = ACTIONS(33), + [anon_sym_SEMI_SEMI] = ACTIONS(33), + }, + [7] = { + [sym_leading_word] = ACTIONS(35), + }, + [8] = { + [sym_command] = STATE(15), + [sym_simple_command] = STATE(5), + [sym_pipeline] = STATE(6), + [sym_list] = STATE(6), + [sym_environment_variable_assignment] = STATE(7), + [aux_sym_simple_command_repeat1] = STATE(9), + [ts_builtin_sym_end] = ACTIONS(37), + [sym_leading_word] = ACTIONS(15), + }, + [9] = { + [sym_environment_variable_assignment] = STATE(17), + [sym_leading_word] = ACTIONS(39), + }, + [10] = { + [aux_sym_simple_command_repeat2] = STATE(19), + [anon_sym_PIPE] = ACTIONS(41), + [anon_sym_PIPE_AMP] = ACTIONS(41), + [anon_sym_AMP_AMP] = ACTIONS(41), + [anon_sym_SEMI] = ACTIONS(41), + [sym_word] = ACTIONS(43), + [anon_sym_LF] = ACTIONS(41), + [anon_sym_SEMI_SEMI] = ACTIONS(41), + }, + [11] = { + [sym_word] = ACTIONS(45), + }, + [12] = { + [sym_simple_command] = STATE(21), + [sym_environment_variable_assignment] = STATE(7), + [aux_sym_simple_command_repeat1] = STATE(9), + [sym_leading_word] = ACTIONS(15), + }, + [13] = { + [sym_simple_command] = STATE(22), + [sym_pipeline] = STATE(23), + [sym_environment_variable_assignment] = STATE(7), + [aux_sym_simple_command_repeat1] = STATE(9), + [sym_leading_word] = ACTIONS(15), + }, + [14] = { + [ts_builtin_sym_end] = ACTIONS(47), + [sym_leading_word] = ACTIONS(49), + }, + [15] = { + [ts_builtin_sym_end] = ACTIONS(51), + [sym_leading_word] = ACTIONS(53), + }, + [16] = { + [aux_sym_SLASH_BSLASHs_PLUS_SLASH] = ACTIONS(55), + [anon_sym_PIPE] = ACTIONS(57), + [anon_sym_PIPE_AMP] = ACTIONS(57), + [anon_sym_AMP_AMP] = ACTIONS(57), + [anon_sym_SEMI] = ACTIONS(57), + [anon_sym_EQ] = ACTIONS(21), + [anon_sym_LF] = ACTIONS(57), + [anon_sym_SEMI_SEMI] = ACTIONS(57), + }, + [17] = { + [sym_leading_word] = ACTIONS(59), + }, + [18] = { + [anon_sym_PIPE] = ACTIONS(61), + [anon_sym_PIPE_AMP] = ACTIONS(61), + [anon_sym_AMP_AMP] = ACTIONS(61), + [anon_sym_SEMI] = ACTIONS(61), + [sym_word] = ACTIONS(61), + [anon_sym_LF] = ACTIONS(61), + [anon_sym_SEMI_SEMI] = ACTIONS(61), + }, + [19] = { + [anon_sym_PIPE] = ACTIONS(63), + [anon_sym_PIPE_AMP] = ACTIONS(63), + [anon_sym_AMP_AMP] = ACTIONS(63), + [anon_sym_SEMI] = ACTIONS(63), + [sym_word] = ACTIONS(65), + [anon_sym_LF] = ACTIONS(63), + [anon_sym_SEMI_SEMI] = ACTIONS(63), + }, + [20] = { + [sym_leading_word] = ACTIONS(67), + }, + [21] = { + [anon_sym_AMP_AMP] = ACTIONS(69), + [anon_sym_SEMI] = ACTIONS(69), + [anon_sym_LF] = ACTIONS(69), + [anon_sym_SEMI_SEMI] = ACTIONS(69), + }, + [22] = { + [anon_sym_PIPE] = ACTIONS(29), + [anon_sym_PIPE_AMP] = ACTIONS(29), + [anon_sym_AMP_AMP] = ACTIONS(71), + [anon_sym_SEMI] = ACTIONS(71), + [anon_sym_LF] = ACTIONS(71), + [anon_sym_SEMI_SEMI] = ACTIONS(71), + }, + [23] = { + [anon_sym_AMP_AMP] = ACTIONS(71), + [anon_sym_SEMI] = ACTIONS(71), + [anon_sym_LF] = ACTIONS(71), + [anon_sym_SEMI_SEMI] = ACTIONS(71), + }, + [24] = { + [aux_sym_simple_command_repeat2] = STATE(26), + [anon_sym_PIPE] = ACTIONS(73), + [anon_sym_PIPE_AMP] = ACTIONS(73), + [anon_sym_AMP_AMP] = ACTIONS(73), + [anon_sym_SEMI] = ACTIONS(73), + [sym_word] = ACTIONS(43), + [anon_sym_LF] = ACTIONS(73), + [anon_sym_SEMI_SEMI] = ACTIONS(73), + }, + [25] = { + [anon_sym_PIPE] = ACTIONS(75), + [anon_sym_PIPE_AMP] = ACTIONS(75), + [anon_sym_AMP_AMP] = ACTIONS(75), + [anon_sym_SEMI] = ACTIONS(75), + [sym_word] = ACTIONS(75), + [anon_sym_LF] = ACTIONS(75), + [anon_sym_SEMI_SEMI] = ACTIONS(75), + }, + [26] = { + [anon_sym_PIPE] = ACTIONS(77), + [anon_sym_PIPE_AMP] = ACTIONS(77), + [anon_sym_AMP_AMP] = ACTIONS(77), + [anon_sym_SEMI] = ACTIONS(77), + [sym_word] = ACTIONS(65), + [anon_sym_LF] = ACTIONS(77), + [anon_sym_SEMI_SEMI] = ACTIONS(77), + }, + [27] = { + [aux_sym_simple_command_repeat2] = STATE(32), + [anon_sym_PIPE] = ACTIONS(79), + [anon_sym_PIPE_AMP] = ACTIONS(79), + [anon_sym_AMP_AMP] = ACTIONS(79), + [anon_sym_SEMI] = ACTIONS(79), + [sym_word] = ACTIONS(43), + [anon_sym_LF] = ACTIONS(79), + [anon_sym_SEMI_SEMI] = ACTIONS(79), + }, + [28] = { + [ts_builtin_sym_end] = ACTIONS(82), + [sym_leading_word] = ACTIONS(85), + }, + [29] = { + [anon_sym_PIPE] = ACTIONS(29), + [anon_sym_PIPE_AMP] = ACTIONS(29), + [anon_sym_AMP_AMP] = ACTIONS(88), + [anon_sym_SEMI] = ACTIONS(88), + [anon_sym_LF] = ACTIONS(92), + [anon_sym_SEMI_SEMI] = ACTIONS(92), + }, + [30] = { + [anon_sym_AMP_AMP] = ACTIONS(96), + [anon_sym_SEMI] = ACTIONS(96), + [anon_sym_LF] = ACTIONS(99), + [anon_sym_SEMI_SEMI] = ACTIONS(99), + }, + [31] = { + [sym_leading_word] = ACTIONS(102), + }, + [32] = { + [anon_sym_PIPE] = ACTIONS(105), + [anon_sym_PIPE_AMP] = ACTIONS(105), + [anon_sym_AMP_AMP] = ACTIONS(105), + [anon_sym_SEMI] = ACTIONS(105), + [sym_word] = ACTIONS(65), + [anon_sym_LF] = ACTIONS(105), + [anon_sym_SEMI_SEMI] = ACTIONS(105), + }, +}; + +static TSParseActionEntry ts_parse_actions[] = { + [0] = {.count = 0, .reusable = false, .depends_on_lookahead = false}, + [1] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, RECOVER(0), + [3] = {.count = 1, .reusable = true, .depends_on_lookahead = true}, RECOVER(27), + [5] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, RECOVER(12), + [7] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, RECOVER(13), + [9] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, RECOVER(11), + [11] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, RECOVER(14), + [13] = {.count = 1, .reusable = true, .depends_on_lookahead = false}, REDUCE(sym_program, 0), + [15] = {.count = 1, .reusable = true, .depends_on_lookahead = true}, SHIFT(2), + [17] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, SHIFT(10), + [19] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_simple_command, 1, .rename_sequence_id = 1), + [21] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, SHIFT(11), + [23] = {.count = 1, .reusable = true, .depends_on_lookahead = false}, ACCEPT_INPUT(), + [25] = {.count = 1, .reusable = true, .depends_on_lookahead = false}, REDUCE(aux_sym_program_repeat1, 1), + [27] = {.count = 1, .reusable = true, .depends_on_lookahead = true}, REDUCE(aux_sym_program_repeat1, 1), + [29] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, SHIFT(12), + [31] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, SHIFT(13), + [33] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, SHIFT(14), + [35] = {.count = 1, .reusable = true, .depends_on_lookahead = true}, REDUCE(aux_sym_simple_command_repeat1, 1), + [37] = {.count = 1, .reusable = true, .depends_on_lookahead = false}, REDUCE(sym_program, 1), + [39] = {.count = 1, .reusable = true, .depends_on_lookahead = true}, SHIFT(16), + [41] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_simple_command, 2, .rename_sequence_id = 2), + [43] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, SHIFT(18), + [45] = {.count = 1, .reusable = true, .depends_on_lookahead = true}, SHIFT(20), + [47] = {.count = 1, .reusable = true, .depends_on_lookahead = false}, REDUCE(sym_command, 2), + [49] = {.count = 1, .reusable = true, .depends_on_lookahead = true}, REDUCE(sym_command, 2), + [51] = {.count = 1, .reusable = true, .depends_on_lookahead = false}, REDUCE(aux_sym_program_repeat1, 2), + [53] = {.count = 1, .reusable = true, .depends_on_lookahead = true}, REDUCE(aux_sym_program_repeat1, 2), + [55] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, SHIFT(24), + [57] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_simple_command, 2, .rename_sequence_id = 3), + [59] = {.count = 1, .reusable = true, .depends_on_lookahead = true}, REDUCE(aux_sym_simple_command_repeat1, 2), + [61] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, REDUCE(aux_sym_simple_command_repeat2, 1, .rename_sequence_id = 4), + [63] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_simple_command, 3, .rename_sequence_id = 5), + [65] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, SHIFT(25), + [67] = {.count = 1, .reusable = true, .depends_on_lookahead = true}, REDUCE(sym_environment_variable_assignment, 3, .rename_sequence_id = 6), + [69] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_pipeline, 3), + [71] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_list, 3), + [73] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_simple_command, 3, .rename_sequence_id = 7), + [75] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, REDUCE(aux_sym_simple_command_repeat2, 2, .rename_sequence_id = 8), + [77] = {.count = 1, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_simple_command, 4, .rename_sequence_id = 9), + [79] = {.count = 2, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_simple_command, 2, .rename_sequence_id = 2), REDUCE(sym_simple_command, 3, .rename_sequence_id = 7), + [82] = {.count = 2, .reusable = true, .depends_on_lookahead = false}, REDUCE(aux_sym_program_repeat1, 1), REDUCE(aux_sym_program_repeat1, 2), + [85] = {.count = 2, .reusable = true, .depends_on_lookahead = true}, REDUCE(aux_sym_program_repeat1, 1), REDUCE(aux_sym_program_repeat1, 2), + [88] = {.count = 3, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_pipeline, 3), REDUCE(sym_list, 3), SHIFT(13), + [92] = {.count = 3, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_pipeline, 3), REDUCE(sym_list, 3), SHIFT(14), + [96] = {.count = 2, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_list, 3), SHIFT(13), + [99] = {.count = 2, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_list, 3), SHIFT(14), + [102] = {.count = 2, .reusable = true, .depends_on_lookahead = true}, REDUCE(aux_sym_simple_command_repeat1, 1), REDUCE(aux_sym_simple_command_repeat1, 2), + [105] = {.count = 2, .reusable = false, .depends_on_lookahead = false}, REDUCE(sym_simple_command, 3, .rename_sequence_id = 5), REDUCE(sym_simple_command, 4, .rename_sequence_id = 9), +}; + +const TSLanguage *tree_sitter_bash() { + GET_LANGUAGE(); +} diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h new file mode 100644 index 0000000..b168210 --- /dev/null +++ b/src/tree_sitter/parser.h @@ -0,0 +1,195 @@ +#ifndef TREE_SITTER_PARSER_H_ +#define TREE_SITTER_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +typedef uint16_t TSSymbol; +typedef uint16_t TSStateId; +typedef uint8_t TSExternalTokenState[16]; + +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 + +typedef struct { + bool visible : 1; + bool named : 1; + bool extra : 1; + bool structural : 1; +} TSSymbolMetadata; + +typedef struct { + void (*advance)(void *, bool); + void (*mark_end)(void *); + int32_t lookahead; + TSSymbol result_symbol; +} TSLexer; + +typedef enum { + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef struct { + union { + struct { + TSStateId to_state; + bool extra : 1; + }; + struct { + TSSymbol symbol; + int16_t dynamic_precedence; + uint8_t child_count; + uint8_t rename_sequence_id : 7; + bool fragile : 1; + }; + }; + TSParseActionType type : 4; +} TSParseAction; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef union { + TSParseAction action; + struct { + uint8_t count; + bool reusable : 1; + bool depends_on_lookahead : 1; + }; +} TSParseActionEntry; + +typedef struct TSLanguage { + uint32_t version; + uint32_t symbol_count; + uint32_t token_count; + uint32_t external_token_count; + const char **symbol_names; + const TSSymbolMetadata *symbol_metadata; + const uint16_t *parse_table; + const TSParseActionEntry *parse_actions; + const TSLexMode *lex_modes; + const TSSymbol *rename_sequences; + uint16_t max_rename_sequence_length; + bool (*lex_fn)(TSLexer *, TSStateId); + struct { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(); + void (*destroy)(void *); + void (*reset)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + bool (*serialize)(void *, TSExternalTokenState); + void (*deserialize)(void *, const TSExternalTokenState); + } external_scanner; +} TSLanguage; + +/* + * Lexer Macros + */ + +#define START_LEXER() \ + bool result = false; \ + int32_t lookahead; \ + next_state: \ + lookahead = lexer->lookahead; + +#define ADVANCE(state_value) \ + { \ + lexer->advance(lexer, false); \ + state = state_value; \ + goto next_state; \ + } + +#define SKIP(state_value) \ + { \ + lexer->advance(lexer, true); \ + state = state_value; \ + goto next_state; \ + } + +#define ACCEPT_TOKEN(symbol_value) \ + result = true; \ + lexer->result_symbol = symbol_value; \ + lexer->mark_end(lexer); + +#define END_STATE() return result; + +/* + * Parse Table Macros + */ + +#define STATE(id) id +#define ACTIONS(id) id + +#define SHIFT(to_state_value) \ + { \ + { \ + .type = TSParseActionTypeShift, \ + .to_state = to_state_value, \ + } \ + } + +#define RECOVER(to_state_value) \ + { \ + { \ + .type = TSParseActionTypeRecover, \ + .to_state = to_state_value \ + } \ + } + +#define SHIFT_EXTRA() \ + { \ + { \ + .type = TSParseActionTypeShift, \ + .extra = true \ + } \ + } + +#define REDUCE(symbol_val, child_count_val, ...) \ + { \ + { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_val, \ + .child_count = child_count_val, \ + __VA_ARGS__ \ + } \ + } + +#define ACCEPT_INPUT() \ + { \ + { .type = TSParseActionTypeAccept } \ + } + +#define GET_LANGUAGE(...) \ + static TSLanguage language = { \ + .version = LANGUAGE_VERSION, \ + .symbol_count = SYMBOL_COUNT, \ + .token_count = TOKEN_COUNT, \ + .symbol_metadata = ts_symbol_metadata, \ + .parse_table = (const unsigned short *)ts_parse_table, \ + .parse_actions = ts_parse_actions, \ + .lex_modes = ts_lex_modes, \ + .symbol_names = ts_symbol_names, \ + .rename_sequences = (const TSSymbol *)ts_rename_sequences, \ + .max_rename_sequence_length = MAX_RENAME_SEQUENCE_LENGTH, \ + .lex_fn = ts_lex, \ + .external_token_count = EXTERNAL_TOKEN_COUNT, \ + .external_scanner = {__VA_ARGS__} \ + }; \ + return &language \ + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_PARSER_H_