tree-sitter-bash/src/scanner.cc

171 lines
4.0 KiB
C++
Raw Normal View History

2017-07-14 20:54:05 +00:00
#include <tree_sitter/parser.h>
#include <string>
#include <cwctype>
namespace {
using std::wstring;
enum TokenType {
SIMPLE_HEREDOC,
HEREDOC_BEGINNING,
HEREDOC_MIDDLE,
HEREDOC_END,
FILE_DESCRIPTOR,
2017-07-15 00:41:14 +00:00
EMPTY_VALUE,
LENGTH_OPERATOR
2017-07-14 20:54:05 +00:00
};
struct Scanner {
void skip(TSLexer *lexer) {
lexer->advance(lexer, true);
}
void advance(TSLexer *lexer) {
lexer->advance(lexer, false);
}
void reset() {}
bool serialize(TSExternalTokenState state) { return true; }
void deserialize(TSExternalTokenState state) {}
bool scan_heredoc_end_identifier(TSLexer *lexer) {
current_leading_word.clear();
2017-07-14 20:54:05 +00:00
while (iswalpha(lexer->lookahead)) {
current_leading_word += lexer->lookahead;
2017-07-14 20:54:05 +00:00
advance(lexer);
}
2017-07-16 05:13:55 +00:00
return current_leading_word == heredoc_delimiter;
}
2017-07-14 20:54:05 +00:00
bool scan_heredoc_content(TSLexer *lexer, TokenType middle_type, TokenType end_type) {
bool did_advance = false;
2017-07-14 20:54:05 +00:00
for (;;) {
switch (lexer->lookahead) {
case '\0': {
lexer->result_symbol = end_type;
return true;
}
case '$': {
lexer->result_symbol = middle_type;
return did_advance;
}
case '\n': {
did_advance = true;
advance(lexer);
if (scan_heredoc_end_identifier(lexer)) {
lexer->result_symbol = end_type;
return true;
}
break;
}
default: {
did_advance = true;
advance(lexer);
break;
}
}
}
}
bool scan(TSLexer *lexer, const bool *valid_symbols) {
2017-07-16 05:13:55 +00:00
if (valid_symbols[HEREDOC_MIDDLE] && !heredoc_delimiter.empty()) {
return scan_heredoc_content(lexer, HEREDOC_MIDDLE, HEREDOC_END);
2017-07-15 00:41:14 +00:00
}
if (valid_symbols[HEREDOC_BEGINNING]) {
2017-07-16 05:13:55 +00:00
heredoc_delimiter.clear();
while (iswalpha(lexer->lookahead)) {
2017-07-16 05:13:55 +00:00
heredoc_delimiter += lexer->lookahead;
advance(lexer);
}
if (lexer->lookahead != '\n') return false;
2017-07-14 20:54:05 +00:00
advance(lexer);
if (scan_heredoc_end_identifier(lexer)) {
lexer->result_symbol = SIMPLE_HEREDOC;
return true;
}
2017-07-14 20:54:05 +00:00
return scan_heredoc_content(lexer, HEREDOC_BEGINNING, SIMPLE_HEREDOC);
2017-07-15 00:41:14 +00:00
}
if (valid_symbols[FILE_DESCRIPTOR]) {
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') skip(lexer);
if (iswdigit(lexer->lookahead)) {
advance(lexer);
while (iswdigit(lexer->lookahead)) advance(lexer);
if (lexer->lookahead == '>' || lexer->lookahead == '<') {
lexer->result_symbol = FILE_DESCRIPTOR;
return true;
}
}
2017-07-14 20:54:05 +00:00
}
2017-07-15 00:41:14 +00:00
if (valid_symbols[EMPTY_VALUE]) {
if (iswspace(lexer->lookahead)) {
lexer->result_symbol = EMPTY_VALUE;
return true;
}
}
if (valid_symbols[LENGTH_OPERATOR]) {
if (lexer->lookahead == '#') {
advance(lexer);
if (iswalpha(lexer->lookahead)) {
lexer->result_symbol = LENGTH_OPERATOR;
return true;
}
}
}
return false;
2017-07-14 20:54:05 +00:00
}
2017-07-16 05:13:55 +00:00
wstring heredoc_delimiter;
wstring current_leading_word;
2017-07-14 20:54:05 +00:00
};
}
extern "C" {
void *tree_sitter_bash_external_scanner_create() {
return new Scanner();
}
bool tree_sitter_bash_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->scan(lexer, valid_symbols);
}
void tree_sitter_bash_external_scanner_reset(void *payload) {
Scanner *scanner = static_cast<Scanner *>(payload);
scanner->reset();
}
bool tree_sitter_bash_external_scanner_serialize(void *payload, TSExternalTokenState state) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->serialize(state);
}
void tree_sitter_bash_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
Scanner *scanner = static_cast<Scanner *>(payload);
scanner->deserialize(state);
}
void tree_sitter_bash_external_scanner_destroy(void *payload) {
Scanner *scanner = static_cast<Scanner *>(payload);
delete scanner;
}
}