2017-07-14 20:54:05 +00:00
|
|
|
#include <tree_sitter/parser.h>
|
|
|
|
#include <string>
|
|
|
|
#include <cwctype>
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2017-08-01 17:12:16 +00:00
|
|
|
using std::string;
|
2017-07-14 20:54:05 +00:00
|
|
|
|
|
|
|
enum TokenType {
|
2017-07-14 21:27:13 +00:00
|
|
|
SIMPLE_HEREDOC,
|
|
|
|
HEREDOC_BEGINNING,
|
|
|
|
HEREDOC_MIDDLE,
|
|
|
|
HEREDOC_END,
|
2017-07-15 00:14:23 +00:00
|
|
|
FILE_DESCRIPTOR,
|
2017-07-15 00:41:14 +00:00
|
|
|
EMPTY_VALUE,
|
2017-07-17 17:19:35 +00:00
|
|
|
CONCAT,
|
|
|
|
VARIABLE_NAME,
|
|
|
|
NEWLINE,
|
2017-07-14 20:54:05 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct Scanner {
|
|
|
|
void skip(TSLexer *lexer) {
|
|
|
|
lexer->advance(lexer, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
void advance(TSLexer *lexer) {
|
|
|
|
lexer->advance(lexer, false);
|
|
|
|
}
|
|
|
|
|
2017-08-01 17:12:16 +00:00
|
|
|
unsigned serialize(char *buffer) {
|
2018-01-19 17:52:45 +00:00
|
|
|
if (heredoc_delimiter.size() >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return 0;
|
2017-08-01 17:26:42 +00:00
|
|
|
heredoc_delimiter.copy(buffer, heredoc_delimiter.length());
|
2017-08-01 17:12:16 +00:00
|
|
|
return heredoc_delimiter.length();
|
|
|
|
}
|
2017-07-14 20:54:05 +00:00
|
|
|
|
2017-08-01 17:12:16 +00:00
|
|
|
void deserialize(const char *buffer, unsigned length) {
|
|
|
|
if (length == 0) heredoc_delimiter.clear();
|
|
|
|
else heredoc_delimiter.assign(buffer, buffer + length);
|
|
|
|
}
|
2017-07-14 20:54:05 +00:00
|
|
|
|
2017-07-14 21:27:13 +00:00
|
|
|
bool scan_heredoc_end_identifier(TSLexer *lexer) {
|
|
|
|
current_leading_word.clear();
|
2017-07-14 20:54:05 +00:00
|
|
|
while (iswalpha(lexer->lookahead)) {
|
2017-07-14 21:27:13 +00:00
|
|
|
current_leading_word += lexer->lookahead;
|
2017-07-14 20:54:05 +00:00
|
|
|
advance(lexer);
|
|
|
|
}
|
2017-07-16 05:13:55 +00:00
|
|
|
return current_leading_word == heredoc_delimiter;
|
2017-07-14 21:27:13 +00:00
|
|
|
}
|
2017-07-14 20:54:05 +00:00
|
|
|
|
2017-07-14 21:27:13 +00:00
|
|
|
bool scan_heredoc_content(TSLexer *lexer, TokenType middle_type, TokenType end_type) {
|
|
|
|
bool did_advance = false;
|
2017-07-14 20:54:05 +00:00
|
|
|
|
|
|
|
for (;;) {
|
2017-07-14 21:27:13 +00:00
|
|
|
switch (lexer->lookahead) {
|
|
|
|
case '\0': {
|
|
|
|
lexer->result_symbol = end_type;
|
2017-08-01 17:12:16 +00:00
|
|
|
return did_advance;
|
2017-07-14 21:27:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
case '$': {
|
|
|
|
lexer->result_symbol = middle_type;
|
|
|
|
return did_advance;
|
|
|
|
}
|
|
|
|
|
|
|
|
case '\n': {
|
|
|
|
did_advance = true;
|
|
|
|
advance(lexer);
|
|
|
|
if (scan_heredoc_end_identifier(lexer)) {
|
|
|
|
lexer->result_symbol = end_type;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
default: {
|
|
|
|
did_advance = true;
|
|
|
|
advance(lexer);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool scan(TSLexer *lexer, const bool *valid_symbols) {
|
2017-07-17 17:19:35 +00:00
|
|
|
if (valid_symbols[CONCAT]) {
|
|
|
|
if (!(
|
|
|
|
iswspace(lexer->lookahead) ||
|
|
|
|
lexer->lookahead == '>' ||
|
|
|
|
lexer->lookahead == '<' ||
|
|
|
|
lexer->lookahead == ')' ||
|
|
|
|
lexer->lookahead == '(' ||
|
|
|
|
lexer->lookahead == '[' ||
|
|
|
|
lexer->lookahead == ']' ||
|
|
|
|
lexer->lookahead == '}' ||
|
|
|
|
lexer->lookahead == ';' ||
|
|
|
|
lexer->lookahead == '&' ||
|
|
|
|
lexer->lookahead == '`'
|
|
|
|
)) {
|
|
|
|
lexer->result_symbol = CONCAT;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (valid_symbols[EMPTY_VALUE]) {
|
|
|
|
if (iswspace(lexer->lookahead)) {
|
|
|
|
lexer->result_symbol = EMPTY_VALUE;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-16 05:13:55 +00:00
|
|
|
if (valid_symbols[HEREDOC_MIDDLE] && !heredoc_delimiter.empty()) {
|
2017-07-14 21:27:13 +00:00
|
|
|
return scan_heredoc_content(lexer, HEREDOC_MIDDLE, HEREDOC_END);
|
2017-07-15 00:41:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (valid_symbols[HEREDOC_BEGINNING]) {
|
2017-07-16 05:13:55 +00:00
|
|
|
heredoc_delimiter.clear();
|
2017-07-15 00:14:23 +00:00
|
|
|
while (iswalpha(lexer->lookahead)) {
|
2017-07-16 05:13:55 +00:00
|
|
|
heredoc_delimiter += lexer->lookahead;
|
2017-07-15 00:14:23 +00:00
|
|
|
advance(lexer);
|
|
|
|
}
|
2017-07-14 21:27:13 +00:00
|
|
|
|
2017-07-15 00:14:23 +00:00
|
|
|
if (lexer->lookahead != '\n') return false;
|
2017-07-14 20:54:05 +00:00
|
|
|
advance(lexer);
|
|
|
|
|
2017-07-15 00:14:23 +00:00
|
|
|
if (scan_heredoc_end_identifier(lexer)) {
|
|
|
|
lexer->result_symbol = SIMPLE_HEREDOC;
|
|
|
|
return true;
|
|
|
|
}
|
2017-07-14 20:54:05 +00:00
|
|
|
|
2017-07-15 00:14:23 +00:00
|
|
|
return scan_heredoc_content(lexer, HEREDOC_BEGINNING, SIMPLE_HEREDOC);
|
2017-07-15 00:41:14 +00:00
|
|
|
}
|
|
|
|
|
2017-07-17 17:19:35 +00:00
|
|
|
if (valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR]) {
|
|
|
|
for (;;) {
|
|
|
|
if (
|
|
|
|
lexer->lookahead == ' ' ||
|
|
|
|
lexer->lookahead == '\t' ||
|
|
|
|
(lexer->lookahead == '\n' && !valid_symbols[NEWLINE])
|
|
|
|
) {
|
|
|
|
skip(lexer);
|
|
|
|
} else if (lexer->lookahead == '\\') {
|
|
|
|
skip(lexer);
|
|
|
|
if (lexer->lookahead == '\n') {
|
|
|
|
skip(lexer);
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool is_number = true;
|
2017-07-15 00:14:23 +00:00
|
|
|
if (iswdigit(lexer->lookahead)) {
|
|
|
|
advance(lexer);
|
2017-07-17 17:19:35 +00:00
|
|
|
} else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
|
|
|
|
is_number = false;
|
|
|
|
advance(lexer);
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
if (iswdigit(lexer->lookahead)) {
|
|
|
|
advance(lexer);
|
|
|
|
} else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
|
|
|
|
is_number = false;
|
|
|
|
advance(lexer);
|
|
|
|
} else {
|
|
|
|
break;
|
2017-07-15 00:14:23 +00:00
|
|
|
}
|
|
|
|
}
|
2017-07-14 20:54:05 +00:00
|
|
|
|
2017-12-26 22:55:37 +00:00
|
|
|
if (is_number &&
|
|
|
|
valid_symbols[FILE_DESCRIPTOR] &&
|
|
|
|
(lexer->lookahead == '>' || lexer->lookahead == '<')) {
|
2017-07-17 17:19:35 +00:00
|
|
|
lexer->result_symbol = FILE_DESCRIPTOR;
|
2017-07-15 00:41:14 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-12-26 22:55:37 +00:00
|
|
|
if (valid_symbols[VARIABLE_NAME]) {
|
|
|
|
if (lexer->lookahead == '+') {
|
|
|
|
lexer->mark_end(lexer);
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '=') {
|
|
|
|
lexer->result_symbol = VARIABLE_NAME;
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else if (lexer->lookahead == '=' || lexer->lookahead == '[') {
|
|
|
|
lexer->result_symbol = VARIABLE_NAME;
|
|
|
|
return true;
|
|
|
|
}
|
2017-07-16 06:12:22 +00:00
|
|
|
}
|
2017-07-17 17:19:35 +00:00
|
|
|
|
|
|
|
return false;
|
2017-07-16 06:12:22 +00:00
|
|
|
}
|
|
|
|
|
2017-07-15 00:14:23 +00:00
|
|
|
return false;
|
2017-07-14 20:54:05 +00:00
|
|
|
}
|
2017-07-14 21:27:13 +00:00
|
|
|
|
2017-08-01 17:12:16 +00:00
|
|
|
string heredoc_delimiter;
|
|
|
|
string current_leading_word;
|
2017-07-14 20:54:05 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
extern "C" {
|
|
|
|
|
|
|
|
void *tree_sitter_bash_external_scanner_create() {
|
|
|
|
return new Scanner();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool tree_sitter_bash_external_scanner_scan(void *payload, TSLexer *lexer,
|
|
|
|
const bool *valid_symbols) {
|
|
|
|
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
|
|
return scanner->scan(lexer, valid_symbols);
|
|
|
|
}
|
|
|
|
|
2017-08-01 17:12:16 +00:00
|
|
|
unsigned tree_sitter_bash_external_scanner_serialize(void *payload, char *state) {
|
2017-07-14 20:54:05 +00:00
|
|
|
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
|
|
return scanner->serialize(state);
|
|
|
|
}
|
|
|
|
|
2017-08-01 17:12:16 +00:00
|
|
|
void tree_sitter_bash_external_scanner_deserialize(void *payload, const char *state, unsigned length) {
|
2017-07-14 20:54:05 +00:00
|
|
|
Scanner *scanner = static_cast<Scanner *>(payload);
|
2017-08-01 17:12:16 +00:00
|
|
|
scanner->deserialize(state, length);
|
2017-07-14 20:54:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void tree_sitter_bash_external_scanner_destroy(void *payload) {
|
|
|
|
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
|
|
delete scanner;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|