2017-07-14 20:54:05 +00:00
|
|
|
#include <tree_sitter/parser.h>
|
|
|
|
#include <string>
|
|
|
|
#include <cwctype>
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
using std::wstring;
|
|
|
|
|
|
|
|
enum TokenType {
|
2017-07-14 21:27:13 +00:00
|
|
|
SIMPLE_HEREDOC,
|
|
|
|
HEREDOC_BEGINNING,
|
|
|
|
HEREDOC_MIDDLE,
|
|
|
|
HEREDOC_END,
|
2017-07-15 00:14:23 +00:00
|
|
|
FILE_DESCRIPTOR,
|
2017-07-15 00:41:14 +00:00
|
|
|
EMPTY_VALUE,
|
2017-07-16 06:12:22 +00:00
|
|
|
LENGTH_OPERATOR
|
2017-07-14 20:54:05 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct Scanner {
|
|
|
|
void skip(TSLexer *lexer) {
|
|
|
|
lexer->advance(lexer, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
void advance(TSLexer *lexer) {
|
|
|
|
lexer->advance(lexer, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
void reset() {}
|
|
|
|
|
|
|
|
bool serialize(TSExternalTokenState state) { return true; }
|
|
|
|
|
|
|
|
void deserialize(TSExternalTokenState state) {}
|
|
|
|
|
2017-07-14 21:27:13 +00:00
|
|
|
bool scan_heredoc_end_identifier(TSLexer *lexer) {
|
|
|
|
current_leading_word.clear();
|
2017-07-14 20:54:05 +00:00
|
|
|
while (iswalpha(lexer->lookahead)) {
|
2017-07-14 21:27:13 +00:00
|
|
|
current_leading_word += lexer->lookahead;
|
2017-07-14 20:54:05 +00:00
|
|
|
advance(lexer);
|
|
|
|
}
|
2017-07-16 05:13:55 +00:00
|
|
|
return current_leading_word == heredoc_delimiter;
|
2017-07-14 21:27:13 +00:00
|
|
|
}
|
2017-07-14 20:54:05 +00:00
|
|
|
|
2017-07-14 21:27:13 +00:00
|
|
|
bool scan_heredoc_content(TSLexer *lexer, TokenType middle_type, TokenType end_type) {
|
|
|
|
bool did_advance = false;
|
2017-07-14 20:54:05 +00:00
|
|
|
|
|
|
|
for (;;) {
|
2017-07-14 21:27:13 +00:00
|
|
|
switch (lexer->lookahead) {
|
|
|
|
case '\0': {
|
|
|
|
lexer->result_symbol = end_type;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
case '$': {
|
|
|
|
lexer->result_symbol = middle_type;
|
|
|
|
return did_advance;
|
|
|
|
}
|
|
|
|
|
|
|
|
case '\n': {
|
|
|
|
did_advance = true;
|
|
|
|
advance(lexer);
|
|
|
|
if (scan_heredoc_end_identifier(lexer)) {
|
|
|
|
lexer->result_symbol = end_type;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
default: {
|
|
|
|
did_advance = true;
|
|
|
|
advance(lexer);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool scan(TSLexer *lexer, const bool *valid_symbols) {
|
2017-07-16 05:13:55 +00:00
|
|
|
if (valid_symbols[HEREDOC_MIDDLE] && !heredoc_delimiter.empty()) {
|
2017-07-14 21:27:13 +00:00
|
|
|
return scan_heredoc_content(lexer, HEREDOC_MIDDLE, HEREDOC_END);
|
2017-07-15 00:41:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (valid_symbols[HEREDOC_BEGINNING]) {
|
2017-07-16 05:13:55 +00:00
|
|
|
heredoc_delimiter.clear();
|
2017-07-15 00:14:23 +00:00
|
|
|
while (iswalpha(lexer->lookahead)) {
|
2017-07-16 05:13:55 +00:00
|
|
|
heredoc_delimiter += lexer->lookahead;
|
2017-07-15 00:14:23 +00:00
|
|
|
advance(lexer);
|
|
|
|
}
|
2017-07-14 21:27:13 +00:00
|
|
|
|
2017-07-15 00:14:23 +00:00
|
|
|
if (lexer->lookahead != '\n') return false;
|
2017-07-14 20:54:05 +00:00
|
|
|
advance(lexer);
|
|
|
|
|
2017-07-15 00:14:23 +00:00
|
|
|
if (scan_heredoc_end_identifier(lexer)) {
|
|
|
|
lexer->result_symbol = SIMPLE_HEREDOC;
|
|
|
|
return true;
|
|
|
|
}
|
2017-07-14 20:54:05 +00:00
|
|
|
|
2017-07-15 00:14:23 +00:00
|
|
|
return scan_heredoc_content(lexer, HEREDOC_BEGINNING, SIMPLE_HEREDOC);
|
2017-07-15 00:41:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (valid_symbols[FILE_DESCRIPTOR]) {
|
2017-07-15 00:14:23 +00:00
|
|
|
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') skip(lexer);
|
|
|
|
if (iswdigit(lexer->lookahead)) {
|
|
|
|
advance(lexer);
|
|
|
|
while (iswdigit(lexer->lookahead)) advance(lexer);
|
|
|
|
if (lexer->lookahead == '>' || lexer->lookahead == '<') {
|
|
|
|
lexer->result_symbol = FILE_DESCRIPTOR;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2017-07-14 20:54:05 +00:00
|
|
|
}
|
|
|
|
|
2017-07-15 00:41:14 +00:00
|
|
|
if (valid_symbols[EMPTY_VALUE]) {
|
|
|
|
if (iswspace(lexer->lookahead)) {
|
|
|
|
lexer->result_symbol = EMPTY_VALUE;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-16 06:12:22 +00:00
|
|
|
if (valid_symbols[LENGTH_OPERATOR]) {
|
|
|
|
if (lexer->lookahead == '#') {
|
|
|
|
advance(lexer);
|
|
|
|
if (iswalpha(lexer->lookahead)) {
|
|
|
|
lexer->result_symbol = LENGTH_OPERATOR;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-15 00:14:23 +00:00
|
|
|
return false;
|
2017-07-14 20:54:05 +00:00
|
|
|
}
|
2017-07-14 21:27:13 +00:00
|
|
|
|
2017-07-16 05:13:55 +00:00
|
|
|
wstring heredoc_delimiter;
|
2017-07-14 21:27:13 +00:00
|
|
|
wstring current_leading_word;
|
2017-07-14 20:54:05 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
extern "C" {
|
|
|
|
|
|
|
|
void *tree_sitter_bash_external_scanner_create() {
|
|
|
|
return new Scanner();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool tree_sitter_bash_external_scanner_scan(void *payload, TSLexer *lexer,
|
|
|
|
const bool *valid_symbols) {
|
|
|
|
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
|
|
return scanner->scan(lexer, valid_symbols);
|
|
|
|
}
|
|
|
|
|
|
|
|
void tree_sitter_bash_external_scanner_reset(void *payload) {
|
|
|
|
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
|
|
scanner->reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool tree_sitter_bash_external_scanner_serialize(void *payload, TSExternalTokenState state) {
|
|
|
|
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
|
|
return scanner->serialize(state);
|
|
|
|
}
|
|
|
|
|
|
|
|
void tree_sitter_bash_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
|
|
|
|
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
|
|
scanner->deserialize(state);
|
|
|
|
}
|
|
|
|
|
|
|
|
void tree_sitter_bash_external_scanner_destroy(void *payload) {
|
|
|
|
Scanner *scanner = static_cast<Scanner *>(payload);
|
|
|
|
delete scanner;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|