2021-09-25 00:23:37 +00:00
|
|
|
#include <tree_sitter/parser.h>
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
// See references in grammar.externals
|
2021-09-25 00:23:37 +00:00
|
|
|
enum TokenType {
|
|
|
|
QUOTED_CONTENT_I_SINGLE,
|
|
|
|
QUOTED_CONTENT_I_DOUBLE,
|
|
|
|
QUOTED_CONTENT_I_HEREDOC_SINGLE,
|
|
|
|
QUOTED_CONTENT_I_HEREDOC_DOUBLE,
|
|
|
|
QUOTED_CONTENT_I_PARENTHESIS,
|
|
|
|
QUOTED_CONTENT_I_CURLY,
|
|
|
|
QUOTED_CONTENT_I_SQUARE,
|
|
|
|
QUOTED_CONTENT_I_ANGLE,
|
|
|
|
QUOTED_CONTENT_I_BAR,
|
|
|
|
QUOTED_CONTENT_I_SLASH,
|
|
|
|
QUOTED_CONTENT_SINGLE,
|
|
|
|
QUOTED_CONTENT_DOUBLE,
|
|
|
|
QUOTED_CONTENT_HEREDOC_SINGLE,
|
|
|
|
QUOTED_CONTENT_HEREDOC_DOUBLE,
|
|
|
|
QUOTED_CONTENT_PARENTHESIS,
|
|
|
|
QUOTED_CONTENT_CURLY,
|
|
|
|
QUOTED_CONTENT_SQUARE,
|
|
|
|
QUOTED_CONTENT_ANGLE,
|
|
|
|
QUOTED_CONTENT_BAR,
|
|
|
|
QUOTED_CONTENT_SLASH,
|
|
|
|
|
|
|
|
NEWLINE_BEFORE_DO,
|
2021-09-28 14:00:35 +00:00
|
|
|
NEWLINE_BEFORE_BINARY_OPERATOR,
|
2021-09-25 00:23:37 +00:00
|
|
|
NEWLINE_BEFORE_COMMENT,
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
BEFORE_UNARY_OPERATOR,
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
NOT_IN,
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
QUOTED_ATOM_START
|
|
|
|
};
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
void advance(TSLexer* lexer) {
|
|
|
|
lexer->advance(lexer, false);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
void skip(TSLexer *lexer) {
|
|
|
|
lexer->advance(lexer, true);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
// Note: some checks require several lexer steps of lookahead
|
|
|
|
// and alter its state, for these we use names check_*
|
2021-09-25 00:23:37 +00:00
|
|
|
|
|
|
|
bool is_whitespace(int32_t c) {
|
2021-09-28 14:00:35 +00:00
|
|
|
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool is_inline_whitespace(int32_t c) {
|
2021-09-28 14:00:35 +00:00
|
|
|
return c == ' ' || c == '\t';
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool is_newline(int32_t c) {
|
|
|
|
return c == '\n';
|
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
bool is_digit(int32_t c) {
|
|
|
|
return '0' <= c && c <= '9';
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
bool check_keyword_end(TSLexer* lexer) {
|
2021-09-25 00:23:37 +00:00
|
|
|
if (lexer->lookahead == ':') {
|
|
|
|
advance(lexer);
|
|
|
|
return is_whitespace(lexer->lookahead);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
bool check_operator_end(TSLexer* lexer) {
|
2021-09-25 00:23:37 +00:00
|
|
|
// Keyword
|
|
|
|
if (lexer->lookahead == ':') {
|
2021-09-28 14:00:35 +00:00
|
|
|
return !check_keyword_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
while (is_inline_whitespace(lexer->lookahead)) {
|
|
|
|
advance(lexer);
|
|
|
|
}
|
|
|
|
// Operator identifier with arity
|
|
|
|
if (lexer->lookahead == '/') {
|
|
|
|
advance(lexer);
|
|
|
|
while (is_whitespace(lexer->lookahead)) {
|
|
|
|
advance(lexer);
|
|
|
|
}
|
|
|
|
if (is_digit(lexer->lookahead)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
const char token_terminators[] = {
|
2021-09-25 00:23:37 +00:00
|
|
|
// Operator starts
|
|
|
|
'@', '.', '+', '-', '^', '-', '*', '/', '<', '>', '|', '~', '=', '&', '\\', '%',
|
|
|
|
// Delimiters
|
|
|
|
'{', '}', '[', ']', '(', ')', '"', '\'',
|
|
|
|
// Separators
|
|
|
|
',', ';',
|
|
|
|
// Comment
|
|
|
|
'#'
|
|
|
|
};
|
|
|
|
|
|
|
|
// Note: this is a heuristic as we only use this to distinguish word
|
2021-09-28 14:00:35 +00:00
|
|
|
// operators and we don't want to include complex Unicode ranges
|
2021-09-25 00:23:37 +00:00
|
|
|
bool is_token_end(int32_t c) {
|
2021-09-28 14:00:35 +00:00
|
|
|
for (const char& terminator : token_terminators) {
|
|
|
|
if (c == terminator) {
|
2021-09-25 00:23:37 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return is_whitespace(c);
|
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
struct QuotedContentInfo {
|
|
|
|
const TokenType token_type;
|
|
|
|
const bool supports_interpol;
|
|
|
|
const int32_t end_delimiter;
|
|
|
|
const uint8_t delimiter_length;
|
|
|
|
};
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
const QuotedContentInfo quoted_content_infos[] = {
|
|
|
|
{ QUOTED_CONTENT_I_SINGLE, true, '\'', 1 },
|
|
|
|
{ QUOTED_CONTENT_I_DOUBLE, true, '"', 1 },
|
|
|
|
{ QUOTED_CONTENT_I_HEREDOC_SINGLE, true, '\'', 3 },
|
|
|
|
{ QUOTED_CONTENT_I_HEREDOC_DOUBLE, true, '"', 3 },
|
|
|
|
{ QUOTED_CONTENT_I_PARENTHESIS, true, ')', 1 },
|
|
|
|
{ QUOTED_CONTENT_I_CURLY, true, '}', 1 },
|
|
|
|
{ QUOTED_CONTENT_I_SQUARE, true, ']', 1 },
|
|
|
|
{ QUOTED_CONTENT_I_ANGLE, true, '>', 1 },
|
|
|
|
{ QUOTED_CONTENT_I_BAR, true, '|', 1 },
|
|
|
|
{ QUOTED_CONTENT_I_SLASH, true, '/', 1 },
|
|
|
|
{ QUOTED_CONTENT_SINGLE, false, '\'', 1 },
|
|
|
|
{ QUOTED_CONTENT_DOUBLE, false, '"', 1 },
|
|
|
|
{ QUOTED_CONTENT_HEREDOC_SINGLE, false, '\'', 3 },
|
|
|
|
{ QUOTED_CONTENT_HEREDOC_DOUBLE, false, '"', 3 },
|
|
|
|
{ QUOTED_CONTENT_PARENTHESIS, false, ')', 1 },
|
|
|
|
{ QUOTED_CONTENT_CURLY, false, '}', 1 },
|
|
|
|
{ QUOTED_CONTENT_SQUARE, false, ']', 1 },
|
|
|
|
{ QUOTED_CONTENT_ANGLE, false, '>', 1 },
|
|
|
|
{ QUOTED_CONTENT_BAR, false, '|', 1 },
|
|
|
|
{ QUOTED_CONTENT_SLASH, false, '/', 1 },
|
|
|
|
};
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
const uint8_t quoted_content_infos_length = sizeof(quoted_content_infos) / sizeof(QuotedContentInfo);
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
int8_t find_quoted_token_info(const bool* valid_symbols) {
|
|
|
|
// Quoted tokens are mutually exclusive and only one should be valid
|
|
|
|
// at a time. If multiple are valid it means we parse an arbitrary
|
|
|
|
// code outside quotes, in which case we don't want to tokenize it as
|
|
|
|
// quoted content.
|
|
|
|
if (valid_symbols[QUOTED_CONTENT_I_SINGLE] && valid_symbols[QUOTED_CONTENT_I_DOUBLE]) {
|
|
|
|
return -1;
|
|
|
|
}
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
for (uint8_t i = 0; i < quoted_content_infos_length; i++) {
|
|
|
|
if (valid_symbols[quoted_content_infos[i].token_type]) {
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
}
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) {
|
|
|
|
lexer->result_symbol = info.token_type;
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
for (bool has_content = false; true; has_content = true) {
|
|
|
|
lexer->mark_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == info.end_delimiter) {
|
|
|
|
uint8_t length = 1;
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
while (length < info.delimiter_length) {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == info.end_delimiter) {
|
|
|
|
length++;
|
|
|
|
} else {
|
|
|
|
break;
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
if (length == info.delimiter_length) {
|
|
|
|
return has_content;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (lexer->lookahead == '#') {
|
|
|
|
advance(lexer);
|
|
|
|
if (info.supports_interpol && lexer->lookahead == '{') {
|
|
|
|
return has_content;
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
} else if (lexer->lookahead == '\\') {
|
|
|
|
if (info.supports_interpol) {
|
|
|
|
return has_content;
|
|
|
|
} else {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == info.end_delimiter) {
|
|
|
|
return has_content;
|
|
|
|
}
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
} else if (lexer->lookahead == '\0') {
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
advance(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
return false;
|
|
|
|
}
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
bool scan_newline(TSLexer* lexer, const bool* valid_symbols) {
|
|
|
|
advance(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
while (is_whitespace(lexer->lookahead)) {
|
|
|
|
advance(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
// Note we include all the whitespace after newline, so that the
|
|
|
|
// parser doesn't have to go through it again
|
|
|
|
lexer->mark_end(lexer);
|
|
|
|
|
|
|
|
if (lexer->lookahead == '#') {
|
|
|
|
lexer->result_symbol = NEWLINE_BEFORE_COMMENT;
|
|
|
|
return true;
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == 'd' && valid_symbols[NEWLINE_BEFORE_DO]) {
|
|
|
|
lexer->result_symbol = NEWLINE_BEFORE_DO;
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == 'o') {
|
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return is_token_end(lexer->lookahead);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
if (valid_symbols[NEWLINE_BEFORE_BINARY_OPERATOR] ) {
|
|
|
|
lexer->result_symbol = NEWLINE_BEFORE_BINARY_OPERATOR;
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
// &&, &&&
|
|
|
|
if (lexer->lookahead == '&') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '&') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '&') {
|
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
|
|
|
} else {
|
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
}
|
|
|
|
// =, ==, ===, =~, =>
|
|
|
|
} else if (lexer->lookahead == '=') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '=') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '=') {
|
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
} else {
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
} else if (lexer->lookahead == '~') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
|
|
|
} else if (lexer->lookahead == '>') {
|
|
|
|
advance(lexer);
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
} else {
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
// ::
|
|
|
|
} else if (lexer->lookahead == ':') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == ':') {
|
|
|
|
advance(lexer);
|
|
|
|
// Ignore ::: atom
|
|
|
|
if (lexer->lookahead == ':') return false;
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
// ++, +++
|
|
|
|
} else if (lexer->lookahead == '+') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '+') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '+') {
|
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
|
|
|
} else {
|
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
}
|
|
|
|
// --, ---, ->
|
|
|
|
} else if (lexer->lookahead == '-') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '-') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '-') {
|
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
} else {
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
} else if (lexer->lookahead == '>') {
|
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
// <, <=, <-, <>, <~, <~>, <|>, <<<, <<~
|
|
|
|
} else if (lexer->lookahead == '<') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '=' ||
|
|
|
|
lexer->lookahead == '-' ||
|
|
|
|
lexer->lookahead == '>') {
|
|
|
|
advance(lexer);
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
} else if (lexer->lookahead == '~') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '>') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
} else {
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
} else if (lexer->lookahead == '|') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == '>') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
} else if (lexer->lookahead == '<') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == '<' ||
|
|
|
|
lexer->lookahead == '~') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
} else {
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
// >, >=, >>>
|
|
|
|
} else if (lexer->lookahead == '>') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '=') {
|
|
|
|
advance(lexer);
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
} else if (lexer->lookahead == '>') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '>') {
|
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
} else {
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
// ^^^
|
|
|
|
} else if (lexer->lookahead == '^') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '^') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == '^') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
}
|
|
|
|
// !=, !==
|
|
|
|
} else if (lexer->lookahead == '!') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '=') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == '=') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
} else {
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
}
|
|
|
|
// ~>, ~>>
|
|
|
|
} else if (lexer->lookahead == '~') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '>') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == '>') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
} else {
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
}
|
|
|
|
// |, ||, |||, |>
|
|
|
|
} else if (lexer->lookahead == '|') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '|') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == '|') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
} else {
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
} else if (lexer->lookahead == '>') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
|
|
|
} else {
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
// *, **
|
|
|
|
} else if (lexer->lookahead == '*') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '*') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
|
|
|
} else {
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
// / //
|
|
|
|
} else if (lexer->lookahead == '/') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '/') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return check_operator_end(lexer);
|
|
|
|
} else {
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
// ., ..
|
|
|
|
} else if (lexer->lookahead == '.') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '.') {
|
|
|
|
advance(lexer);
|
|
|
|
// Ignore ... identifier
|
|
|
|
if (lexer->lookahead == '.') return false;
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
} else {
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
// double slash
|
|
|
|
} else if (lexer->lookahead == '\\') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '\\') {
|
|
|
|
advance(lexer);
|
|
|
|
return check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
} else if (lexer->lookahead == 'w') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == 'h') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == 'e') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == 'n') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return is_token_end(lexer->lookahead) && check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
}
|
|
|
|
} else if (lexer->lookahead == 'a') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == 'n') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == 'd') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return is_token_end(lexer->lookahead) && check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
}
|
|
|
|
// or
|
|
|
|
} else if (lexer->lookahead == 'o') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == 'r') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return is_token_end(lexer->lookahead) && check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
// in
|
|
|
|
} else if (lexer->lookahead == 'i') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == 'n') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return is_token_end(lexer->lookahead) && check_operator_end(lexer);
|
|
|
|
}
|
|
|
|
// not in
|
|
|
|
} else if (lexer->lookahead == 'n') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == 'o') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == 't') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
while (is_inline_whitespace(lexer->lookahead)) {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
}
|
|
|
|
if (lexer->lookahead == 'i') {
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == 'n') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
return is_token_end(lexer->lookahead) && check_operator_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
}
|
2021-09-25 00:23:37 +00:00
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool scan(TSLexer* lexer, const bool* valid_symbols) {
|
|
|
|
int8_t quoted_content_info_idx = find_quoted_token_info(valid_symbols);
|
|
|
|
|
|
|
|
// Quoted content, which matches any character except for close
|
|
|
|
// delimiters, escapes and interpolations
|
|
|
|
if (quoted_content_info_idx != -1) {
|
|
|
|
const QuotedContentInfo& info = quoted_content_infos[quoted_content_info_idx];
|
|
|
|
return scan_quoted_content(lexer, info);
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
bool skipped_whitespace = false;
|
|
|
|
|
|
|
|
while (is_inline_whitespace(lexer->lookahead)) {
|
|
|
|
skipped_whitespace = true;
|
|
|
|
skip(lexer);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Newline, which is either tokenized as a special newline or ignored
|
|
|
|
if (is_newline(lexer->lookahead) && (
|
|
|
|
valid_symbols[NEWLINE_BEFORE_DO] ||
|
|
|
|
valid_symbols[NEWLINE_BEFORE_BINARY_OPERATOR] ||
|
|
|
|
valid_symbols[NEWLINE_BEFORE_COMMENT])) {
|
|
|
|
return scan_newline(lexer, valid_symbols);
|
|
|
|
}
|
|
|
|
|
|
|
|
// before unary +
|
|
|
|
if (lexer->lookahead == '+') {
|
|
|
|
if (skipped_whitespace && valid_symbols[BEFORE_UNARY_OPERATOR]) {
|
|
|
|
lexer->mark_end(lexer);
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == '+' || lexer->lookahead == ':' || lexer->lookahead == '/') {
|
|
|
|
return false;
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
if (is_whitespace(lexer->lookahead)) {
|
|
|
|
return false;
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
lexer->result_symbol = BEFORE_UNARY_OPERATOR;
|
|
|
|
return true;
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
// before unary -
|
|
|
|
} else if (lexer->lookahead == '-') {
|
|
|
|
if (skipped_whitespace && valid_symbols[BEFORE_UNARY_OPERATOR]) {
|
|
|
|
lexer->mark_end(lexer);
|
|
|
|
lexer->result_symbol = BEFORE_UNARY_OPERATOR;
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == '-' || lexer->lookahead == '>' || lexer->lookahead == ':' || lexer->lookahead == '/') {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (is_whitespace(lexer->lookahead)) {
|
|
|
|
return false;
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
return true;
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
// not in
|
|
|
|
} else if (lexer->lookahead == 'n') {
|
|
|
|
if (valid_symbols[NOT_IN]) {
|
|
|
|
lexer->result_symbol = NOT_IN;
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == 'o') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == 't') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
while (is_inline_whitespace(lexer->lookahead)) {
|
|
|
|
advance(lexer);
|
|
|
|
}
|
|
|
|
if (lexer->lookahead == 'i') {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
if (lexer->lookahead == 'n') {
|
|
|
|
advance(lexer);
|
|
|
|
return is_token_end(lexer->lookahead);
|
|
|
|
}
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-09-28 14:00:35 +00:00
|
|
|
// quoted atom start
|
2021-09-25 00:23:37 +00:00
|
|
|
} else if (lexer->lookahead == ':') {
|
2021-09-28 14:00:35 +00:00
|
|
|
if (valid_symbols[QUOTED_ATOM_START]) {
|
2021-09-25 00:23:37 +00:00
|
|
|
advance(lexer);
|
2021-09-28 14:00:35 +00:00
|
|
|
lexer->mark_end(lexer);
|
|
|
|
lexer->result_symbol = QUOTED_ATOM_START;
|
|
|
|
if (lexer->lookahead == '"' || lexer->lookahead == '\'') {
|
|
|
|
return true;
|
|
|
|
}
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Expose the API expected by tree-sitter
|
|
|
|
|
|
|
|
extern "C" {
|
|
|
|
void* tree_sitter_elixir_external_scanner_create() {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool tree_sitter_elixir_external_scanner_scan(void* payload, TSLexer* lexer, const bool* valid_symbols) {
|
|
|
|
return scan(lexer, valid_symbols);
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned tree_sitter_elixir_external_scanner_serialize(void* payload, char* buffer) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void tree_sitter_elixir_external_scanner_deserialize(void* payload, const char* buffer, unsigned length) {}
|
|
|
|
|
|
|
|
void tree_sitter_elixir_external_scanner_destroy(void* payload) {}
|
|
|
|
}
|
|
|
|
|
2021-09-28 14:00:35 +00:00
|
|
|
// end anonymous namespace
|
2021-09-25 00:23:37 +00:00
|
|
|
}
|