Initial parser implementation

This commit is contained in:
Jonatan Kłosko 2021-09-25 02:23:37 +02:00
parent 3f19b87c70
commit dad92d2b87
31 changed files with 415172 additions and 337 deletions

7
.gitignore vendored
View File

@ -1,3 +1,8 @@
# Deps
/node_modules/ /node_modules/
/build/
# Temporary files
/tmp/ /tmp/
# Temporary files generated by Tree-sitter
log.html

View File

@ -9,7 +9,7 @@
"sources": [ "sources": [
"bindings/node/binding.cc", "bindings/node/binding.cc",
"src/parser.c", "src/parser.c",
# If your language uses an external scanner, add it here. "src/scanner.cc"
], ],
"cflags_c": [ "cflags_c": [
"-std=c99", "-std=c99",

View File

@ -1,7 +1,921 @@
// Operator precedence:
// * https://hexdocs.pm/elixir/master/operators.html
// * https://github.com/elixir-lang/elixir/blob/master/lib/elixir/src/elixir_parser.yrl
const PREC = {
IN_MATCH_OPS: 10,
WHEN_OP: 20,
TYPE_OP: 30,
BAR_OP: 40,
ASSOC_OP: 50,
CAPTURE_OP: 60,
MATCH_OP: 70,
OR_OPS: 80,
AND_OPS: 90,
COMP_OPS: 100,
REL_OPS: 110,
ARROW_OPS: 120,
IN_OPS: 130,
XOR_OP: 140,
TERNARY_OP: 150,
CONCAT_OPS: 160,
ADD_OPS: 170,
MULT_OPS: 180,
POWER_OP: 190,
UNARY_OPS: 200,
ACCESS: 205,
DOT_OP: 210,
AT_OP: 220,
CAPTURE_OPERAND: 235,
};
const IN_MATCH_OPS = ["<-", "\\\\"];
const OR_OPS = ["||", "|||", "or"];
const AND_OPS = ["&&", "&&&", "and"];
const COMP_OPS = ["==", "!=", "=~", "===", "!=="];
const REL_OPS = ["<", ">", "<=", ">="];
const ARROW_OPS = ["|>", "<<<", ">>>", "<<~", "~>>", "<~", "~>", "<~>", "<|>"];
const IN_OPS = ["in", "not in"];
const CONCAT_OPS = ["++", "--", "+++", "---", "..", "<>"];
const ADD_OPS = ["+", "-"];
const MULT_OPS = ["*", "/"];
const UNARY_OPS = ["+", "-", "!", "^", "~~~", "not"];
const ALL_OPS = [
["->", "when", "::", "|", "=>", "&", "=", "^^^", "//", "**", ".", "@"],
IN_MATCH_OPS,
OR_OPS,
AND_OPS,
COMP_OPS,
REL_OPS,
ARROW_OPS,
IN_OPS,
CONCAT_OPS,
ADD_OPS,
MULT_OPS,
UNARY_OPS,
].flat();
// Ignore word literals and "=>" which is not a valid atom
const ATOM_OPERATOR_LITERALS = ALL_OPS.filter(
(operator) => !/[a-z]/.test(operator) && operator !== "=>"
);
// Note that for keywords we use external scanner (KEYWORD_SPECIAL_LITERAL),
// so it should be kept in sync
const ATOM_SPECIAL_LITERALS = ["...", "%{}", "{}", "%", "<<>>", "..//"];
// Word tokens used directly in the grammar
const RESERVED_WORD_TOKENS = [
// Operators
["and", "in", "not", "or", "when"],
// Literals
["true", "false", "nil"],
// Other
["after", "catch", "do", "else", "end", "fn", "rescue"],
].flat();
const SPECIAL_IDENTIFIERS = [
"__MODULE__",
"__DIR__",
"__ENV__",
"__CALLER__",
"__STACKTRACE__",
];
// Numbers
const DIGITS = /[0-9]+/;
const BIN_DIGITS = /[0-1]+/;
const OCT_DIGITS = /[0-7]+/;
const HEX_DIGITS = /[0-9a-fA-F]+/;
const numberDec = sep1(DIGITS, "_");
const numberBin = seq("0b", sep1(BIN_DIGITS, "_"));
const numberOct = seq("0o", sep1(OCT_DIGITS, "_"));
const numberHex = seq("0x", sep1(HEX_DIGITS, "_"));
const integer = choice(numberDec, numberBin, numberOct, numberHex);
const floatScientificPart = seq(/[eE]/, optional(choice("-", "+")), integer);
const float = seq(numberDec, ".", numberDec, optional(floatScientificPart));
const aliasPart = /[A-Z][_a-zA-Z0-9]*/;
module.exports = grammar({ module.exports = grammar({
name: "elixir", name: "elixir",
// TODO describe stuff (also in the separate notes doc add clarification
// how we use this verbose tokens to avoid needing scanner state)
externals: ($) => [
$._quoted_content_i_single,
$._quoted_content_i_double,
$._quoted_content_i_heredoc_single,
$._quoted_content_i_heredoc_double,
$._quoted_content_i_parenthesis,
$._quoted_content_i_curly,
$._quoted_content_i_square,
$._quoted_content_i_angle,
$._quoted_content_i_bar,
$._quoted_content_i_slash,
$._quoted_content_single,
$._quoted_content_double,
$._quoted_content_heredoc_single,
$._quoted_content_heredoc_double,
$._quoted_content_parenthesis,
$._quoted_content_curly,
$._quoted_content_square,
$._quoted_content_angle,
$._quoted_content_bar,
$._quoted_content_slash,
$._keyword_special_literal,
$._atom_start,
$._keyword_end,
$._newline_before_do,
$._newline_before_binary_op,
// TODO explain this, basically we use newline ignored for newline before comment,
// as after the comment there is another newline that we then consider as usual (so
// that comments are skipped when considering newlines) <- this is chaotic need a better one
$._newline_before_comment,
// TODO explain this, basically we use this to force unary + and -
// if there is no spacing before the operand
$._before_unary_op,
$._not_in,
],
// TODO include in notes about why using extra for newline before binary op is fine
// TODO figure out how "\n" helps with the behaviour in
// [
// :a,
// ]
// and how it generally works with extras
extras: ($) => [
$.comment,
/\s|\\\n/,
$._newline_before_binary_op,
$._newline_before_comment,
"\n",
],
// TODO check if the parser doesn't compile without each conflict rule,
// otherwise it means we don't really use it (I think)
conflicts: ($) => [
// [$._newline_before_binary_op],
[$.binary_operator],
[$.keywords],
// [$.identifier, $.atom_literal],
[$._expression, $._local_call_with_arguments],
[
$._expression,
$._local_call_with_arguments,
$._local_call_without_arguments,
],
[$._remote_call, $._parenthesised_remote_call],
// stab clause `(x` may be either `(x;y) ->` or `(x, y) ->`
// [$.block, $._stab_clause_arguments],
[$.block, $._stab_clause_parentheses_arguments],
[$.block, $._stab_clause_arguments],
[$.block, $._stab_clause_arguments_expression],
// when in stab clause
[$.binary_operator, $._stab_clause_arguments_expression],
[$.tuple, $.map],
[$.tuple, $.map_content],
[$.operator_identifier, $.stab_clause],
[$.unary_operator, $.operator_identifier],
// [$.alias],
[$.body],
// [$.block, $._stab_clause_arguments],
// [$.block, $._stab_clause_parentheses_arguments],
// [$.block, $._stab_clause_parentheses_arguments],
[$.after_block],
[$.rescue_block],
[$.catch_block],
[$.else_block],
],
rules: { rules: {
source: ($) => "TODO", source: ($) =>
seq(
optional($._terminator),
optional(
seq(sep1($._expression, $._terminator), optional($._terminator))
)
),
_terminator: ($) =>
prec.right(choice(seq(repeat("\n"), ";"), repeat1("\n"))),
_expression: ($) =>
choice(
$.block,
$._identifier,
$.alias,
$.integer,
$.float,
$.atom,
$.string,
$.charlist,
$.sigil,
$.list,
$.tuple,
$.bitstring,
$.map,
$.char,
$.boolean,
$.nil,
$.unary_operator,
$.binary_operator,
$.dot,
$.call,
$.access_call,
$.anonymous_function
),
block: ($) =>
prec(
PREC.WHEN_OP,
seq(
"(",
seq(
optional($._terminator),
optional(
seq(
sep1(choice($._expression, $.stab_clause), $._terminator),
optional($._terminator)
)
)
),
")"
)
),
_identifier: ($) =>
choice($.identifier, $.unused_identifier, $.special_identifier),
// Note: Elixir does not allow uppercase and titlecase letters
// as a variable starting character, but this regex would match
// those. This implies we would happily parse those cases, but
// since they are not valid Elixir it's unlikely to stumble upon
// them. TODO reword
// Ref: https://hexdocs.pm/elixir/master/unicode-syntax.html#variables
// TODO see if we need this in custom scanner in the end, if we do,
// then we may use the generation script from the original repo instead
// and make this an external (though I'd check if these custom unicode
// functions are efficient, does compiler optimise such checks?)
// identifier: ($) => choice(/[\p{ID_Start}][\p{ID_Continue}]*[?!]?/u, "..."),
// identifier: ($) => choice(/[\p{Ll}\p{Lm}\p{Lo}\p{Nl}\p{Other_ID_Start}][\p{ID_Continue}]*[?!]?/u, "..."),
// identifier: ($) => choice(/[\p{Ll}\p{Lm}\p{Lo}\p{Nl}][\p{ID_Continue}]*[?!]?/u, "..."),
//
// TODO elaborate, but basically
//
// we remove uppercase/titlecase letters from ID_Start as elixir does
// we remove the subtractions (we cannot express group subtraction in regex),
// but it's fine becaues at the time of writing these groups only really subtract
// a single character
// Unicode.Set.to_utf8_char "[[[:L:][:Nl:][:Other_ID_Start:]] & [[:Pattern_Syntax:][:Pattern_White_Space:]]]"
// we use hardcoded codepoints for \p{Other_ID_Start} since treesitter/js regexp doesn't
// recognise this group
//
// Other_ID_Start \u1885\u1886\u2118\u212E\u309B\u309C
// (this the list at the time of writing, it's for backward compatibility, see https://unicode.org/reports/tr31/#Backward_Compatibility)
identifier: ($) =>
choice(
/[\p{Ll}\p{Lm}\p{Lo}\p{Nl}\u1885\u1886\u2118\u212E\u309B\u309C][\p{ID_Continue}]*[?!]?/u,
"..."
),
unused_identifier: ($) => /_[\p{ID_Continue}]*[?!]?/u,
special_identifier: ($) => choice(...SPECIAL_IDENTIFIERS),
// We have a separate rule for single-part alias, so that we
// can use it in the keywords rule
alias: ($) => choice($._alias_single, $._alias_multi),
_alias_single: ($) => aliasPart,
_alias_multi: ($) => token(sep1(aliasPart, /\s*\.\s*/)),
integer: ($) => token(integer),
float: ($) => token(float),
atom: ($) =>
seq(
$._atom_start,
choice(
alias($._atom_word_literal, $.atom_literal),
alias($._atom_operator_literal, $.atom_literal),
alias($._atom_special_literal, $.atom_literal),
$._quoted_i_double,
$._quoted_i_single
)
),
// TODO comment on the unicode groups here
_atom_word_literal: ($) => /[\p{ID_Start}_][\p{ID_Continue}@]*[?!]?/u,
_atom_operator_literal: ($) => choice(...ATOM_OPERATOR_LITERALS),
_atom_special_literal: ($) => choice(...ATOM_SPECIAL_LITERALS),
// Defines $._quoted_content_i_{name} and $._quoted_content_{name} rules,
// content with and without interpolation respectively
...defineQuoted(`"`, `"`, "double"),
...defineQuoted(`'`, `'`, "single"),
...defineQuoted(`'''`, `'''`, "heredoc_single"),
...defineQuoted(`"""`, `"""`, "heredoc_double"),
...defineQuoted(`(`, `)`, "parenthesis"),
...defineQuoted(`{`, `}`, "curly"),
...defineQuoted(`[`, `]`, "square"),
...defineQuoted(`<`, `>`, "angle"),
...defineQuoted(`|`, `|`, "bar"),
...defineQuoted(`/`, `/`, "slash"),
string: ($) => choice($._quoted_i_double, $._quoted_i_heredoc_double),
charlist: ($) => choice($._quoted_i_single, $._quoted_i_heredoc_single),
interpolation: ($) => seq("#{", $._expression, "}"),
escape_sequence: ($) =>
token(
seq(
"\\",
choice(
// Single escaped character
/[^ux]/,
// Hex byte
/x[0-9a-fA-F]{1,2}/,
/x{[0-9a-fA-F]+}/,
// Unicode code point
/u{[0-9a-fA-F]+}/,
/u[0-9a-fA-F]{4}/
)
)
),
sigil: ($) =>
seq(
"~",
choice(
seq(
alias(token.immediate(/[a-z]/), $.sigil_name),
choice(
$._quoted_i_double,
$._quoted_i_single,
$._quoted_i_heredoc_single,
$._quoted_i_heredoc_double,
$._quoted_i_parenthesis,
$._quoted_i_curly,
$._quoted_i_square,
$._quoted_i_angle,
$._quoted_i_bar,
$._quoted_i_slash
)
),
seq(
alias(token.immediate(/[A-Z]/), $.sigil_name),
choice(
$._quoted_double,
$._quoted_single,
$._quoted_heredoc_single,
$._quoted_heredoc_double,
$._quoted_parenthesis,
$._quoted_curly,
$._quoted_square,
$._quoted_angle,
$._quoted_bar,
$._quoted_slash
)
)
),
optional(alias(token.immediate(/[a-zA-Z]+/), $.sigil_modifiers))
),
unary_operator: ($) =>
choice(
unaryOp($, prec, PREC.CAPTURE_OP, "&", $._capture_expression),
unaryOp($, prec, PREC.UNARY_OPS, choice(...UNARY_OPS)),
unaryOp($, prec, PREC.AT_OP, "@"),
// Capture operand like &1 is a special case with higher precedence
unaryOp($, prec, PREC.CAPTURE_OPERAND, "&", $.integer)
),
_capture_expression: ($) =>
choice(
// TODO sholud parenthesised expression be generally used (?)
// Precedence over block expression
prec(PREC.WHEN_OP + 1, seq("(", $._expression, ")")),
$._expression
),
binary_operator: ($) =>
choice(
binaryOp($, prec.left, PREC.IN_MATCH_OPS, choice(...IN_MATCH_OPS)),
binaryOp(
$,
prec.right,
PREC.WHEN_OP,
"when",
$._expression,
choice($._expression, $.keywords)
),
binaryOp($, prec.right, PREC.TYPE_OP, "::"),
binaryOp(
$,
prec.right,
PREC.BAR_OP,
"|",
$._expression,
choice($._expression, $.keywords)
),
binaryOp($, prec.right, PREC.ASSOC_OP, "=>"),
binaryOp($, prec.right, PREC.MATCH_OP, "="),
binaryOp($, prec.left, PREC.OR_OPS, choice(...OR_OPS)),
binaryOp($, prec.left, PREC.AND_OPS, choice(...AND_OPS)),
binaryOp($, prec.left, PREC.COMP_OPS, choice(...COMP_OPS)),
binaryOp($, prec.left, PREC.REL_OPS, choice(...REL_OPS)),
binaryOp($, prec.left, PREC.ARROW_OPS, choice(...ARROW_OPS)),
binaryOp($, prec.left, PREC.IN_OPS, choice("in", $._not_in)),
binaryOp($, prec.left, PREC.XOR_OP, "^^^"),
binaryOp($, prec.right, PREC.TERNARY_OP, "//"),
binaryOp($, prec.right, PREC.CONCAT_OPS, choice(...CONCAT_OPS)),
binaryOp($, prec.left, PREC.ADD_OPS, choice(...ADD_OPS)),
binaryOp($, prec.left, PREC.MULT_OPS, choice(...MULT_OPS)),
binaryOp($, prec.left, PREC.POWER_OP, "**"),
// Operator with arity
binaryOp(
$,
prec.left,
PREC.MULT_OPS,
"/",
$.operator_identifier,
$.integer
)
),
operator_identifier: ($) =>
// Operators with the following changes:
// * exclude "=>" since it's not a valid atom/operator identifier anyway (valid only in map)
// * we exclude // since it's only valid after ..
// * we remove "-" and "+" since they are both unary and binary
// We use the same precedence as unary operators, so that a sequence
// like `& /` is a conflict and is resolved via $.conflicts
// (could be be either `& / 2` or `& / / 2`)
choice(
// Unary operators
prec(PREC.CAPTURE_OP, "&"),
prec(PREC.UNARY_OPS, choice(...UNARY_OPS)),
prec(PREC.AT_OP, "@"),
// Binary operators
...IN_MATCH_OPS,
"when",
"::",
"|",
"=",
...OR_OPS,
...AND_OPS,
...COMP_OPS,
...REL_OPS,
...ARROW_OPS,
"in",
$._not_in,
"^^",
...CONCAT_OPS,
...MULT_OPS,
"**",
"->",
"."
),
dot: ($) =>
prec(
PREC.DOT_OP,
seq(choice($._expression), ".", choice($.alias, $.tuple))
),
keywords: ($) => sep1($.pair, ","),
pair: ($) => seq($.keyword, $._expression),
keyword: ($) =>
seq(
// Tree-sitter doesn't consider ambiguities within individual
// tokens (in this case regexps). So both in [a] and [a: 1] it
// would always parse "a" as the same node (based on whether
// $.identifier or $.atom_literal) is listed first in the rules.
// However, since identifiers and alias parts are valid atom
// literals, we can list them here, in which case the parser will
// consider all paths and pick the valid one.
// Also see https://github.com/tree-sitter/tree-sitter/issues/518
choice(
alias($._atom_word_literal, $.atom_literal),
alias($._atom_operator_literal, $.atom_literal),
alias($._keyword_special_literal, $.atom_literal),
alias($.identifier, $.atom_literal),
alias($.unused_identifier, $.atom_literal),
alias($.special_identifier, $.atom_literal),
alias($._alias_single, $.atom_literal),
alias(choice(...RESERVED_WORD_TOKENS), $.atom_literal),
$._quoted_i_double,
$._quoted_i_single
),
$._keyword_end
),
list: ($) => seq("[", optional($._items_with_trailing_separator), "]"),
tuple: ($) => seq("{", optional($._items_with_trailing_separator), "}"),
bitstring: ($) =>
seq("<<", optional($._items_with_trailing_separator), ">>"),
map: ($) => seq("%", optional($.struct), "{", optional($.map_content), "}"),
struct: ($) =>
prec.left(
choice(
$.alias,
$.atom,
$._identifier,
$.unary_operator,
$.dot,
alias($._parenthesised_call, $.call)
)
),
map_content: ($) => $._items_with_trailing_separator,
_items_with_trailing_separator: ($) =>
seq(
choice(
seq(sep1($._expression, ","), optional(seq(",", $.keywords))),
$.keywords
),
optional(",")
),
char: ($) => /\?(.|\\.)/,
boolean: ($) => choice("true", "false"),
nil: ($) => "nil",
call: ($) =>
choice(
$._local_call_with_arguments,
$._parenthesised_local_call_with_arguments,
$._local_call_without_arguments,
$._remote_call,
$._parenthesised_remote_call,
$._anonymous_call,
$._call_on_call
),
_parenthesised_call: ($) =>
choice(
$._parenthesised_local_call_with_arguments,
$._parenthesised_remote_call,
$._anonymous_call,
$._call_on_call
),
_call_on_call: ($) =>
prec.left(
seq(
alias(
choice(
$._parenthesised_local_call_with_arguments,
$._parenthesised_remote_call,
$._anonymous_call
),
$.call
),
// arguments in parentheses
// alias($._local_or_remote_arguments, $.arguments),
// TODO just make nonimmediate/immediate in the name
alias($._anonymous_arguments, $.arguments),
optional(seq(optional($._newline_before_do), $.do_block))
)
),
_local_call_with_arguments: ($) =>
// Given `x + y` it can be interpreted either as a binary operator
// or a call with unary operator. This is an actual ambiguity, so
// we use dynamic precedence to penalize call
// prec.dynamic(
// TODO ideally we would penalize whitespace after unary op,
// so that x + y is binary op and x +y is unary op, to reflect
// Elixir ast
// -1,
prec.left(
seq(
$._identifier,
alias($._call_arguments, $.arguments),
// TODO include this in notes:
// We use external scanner for _newline_before_do because
// this way we can lookahead through any whitespace
// (especially newlines). We cannot simply use repeat("\n")
// and conflict with expression end, because this function
// rule has left precedence (so that do-end sticks to the outermost
// call), and thus expression end would always be preferred
optional(seq(optional($._newline_before_do), $.do_block))
// optional($.do_block)
)
// )
),
_parenthesised_local_call_with_arguments: ($) =>
// Given `x + y` it can be interpreted either as a binary operator
// or a call with unary operator. This is an actual ambiguity, so
// we use dynamic precedence to penalize call
// prec.dynamic(
// TODO ideally we would penalize whitespace after unary op,
// so that x + y is binary op and x +y is unary op, to reflect
// Elixir ast
// -1,
prec.left(
seq(
$._identifier,
alias($._parenthesised_call_arguments, $.arguments),
// TODO include this in notes:
// We use external scanner for _newline_before_do because
// this way we can lookahead through any whitespace
// (especially newlines). We cannot simply use repeat("\n")
// and conflict with expression end, because this function
// rule has left precedence (so that do-end sticks to the outermost
// call), and thus expression end would always be preferred
optional(seq(optional($._newline_before_do), $.do_block))
// optional($.do_block)
)
// )
),
_local_call_without_arguments: ($) =>
// We use lower precedence, so given `fun arg do end`
// we don't tokenize `arg` as a call
// we actually need a conflict because of `foo bar do end` vs `foo bar do: 1`
// prec(-1,
prec.dynamic(-1, seq($._identifier, $.do_block)),
// )
_remote_call: ($) =>
prec.left(
seq(
alias($._remote_dot, $.dot),
optional(alias($._call_arguments, $.arguments)),
optional(seq(optional($._newline_before_do), $.do_block))
// optional($.do_block)
)
),
_parenthesised_remote_call: ($) =>
prec.left(
seq(
alias($._remote_dot, $.dot),
alias($._parenthesised_call_arguments, $.arguments),
optional(seq(optional($._newline_before_do), $.do_block))
// optional($.do_block)
)
),
_remote_dot: ($) =>
prec(
PREC.DOT_OP,
seq(
$._expression,
".",
// TODO can also be string, anything else?
// compare with the other parser
// TODO we don't want to support heredoc though
choice(
$._identifier,
alias(choice(...RESERVED_WORD_TOKENS), $.identifier),
$.operator_identifier,
alias($._quoted_i_double, $.string),
alias($._quoted_i_single, $.charlist)
)
)
),
_parenthesised_call_arguments: ($) =>
seq(token.immediate("("), optional($._call_arguments), ")"),
_anonymous_call: ($) =>
seq(
alias($._anonymous_dot, $.dot),
alias($._anonymous_arguments, $.arguments)
),
_anonymous_dot: ($) => prec(PREC.DOT_OP, seq($._expression, ".")),
_anonymous_arguments: ($) => seq("(", optional($._call_arguments), ")"),
_call_arguments: ($) =>
// Right precedence ensures that `fun1 fun2 x, y` is treated
// as `fun1(fun2(x, y))` and not `fun1(fun2(x), y)
prec.right(
seq(
choice(
seq(
sep1($._expression, ","),
optional(seq(",", $.keywords, optional(",")))
),
seq($.keywords, optional(","))
)
)
),
access_call: ($) =>
prec(
PREC.ACCESS,
seq($._expression, token.immediate("["), $._expression, "]")
),
do_block: ($) =>
seq(
sugarBlock($, "do"),
repeat(
choice($.after_block, $.rescue_block, $.catch_block, $.else_block)
),
"end"
),
after_block: ($) => sugarBlock($, "after"),
rescue_block: ($) => sugarBlock($, "rescue"),
catch_block: ($) => sugarBlock($, "catch"),
else_block: ($) => sugarBlock($, "else"),
// Specify right precedence, so that we consume as much as we can
stab_clause: ($) =>
prec.right(seq(optional($._stab_clause_left), "->", optional($.body))),
_stab_clause_left: ($) =>
choice(
// Note the first option has higher precedence, TODO clarify
alias($._stab_clause_parentheses_arguments, $.arguments),
// TODO naming/cleanup
alias(
$._stab_clause_parentheses_arguments_with_guard,
$.binary_operator
),
alias($._stab_clause_arguments, $.arguments),
alias($._stab_clause_arguments_with_guard, $.binary_operator)
),
_stab_clause_parentheses_arguments: ($) =>
// `(1) ->` may be interpreted either as block argument
// or argument in parentheses and we use dynamic precedence
// to favour the latter
prec(
PREC.WHEN_OP,
prec.dynamic(1, seq("(", optional($._stab_clause_arguments), ")"))
),
_stab_clause_parentheses_arguments_with_guard: ($) =>
seq(
alias($._stab_clause_parentheses_arguments, $.arguments),
"when",
$._expression
),
_stab_clause_arguments_with_guard: ($) =>
// `a when b ->` may be interpted either such that `a when b` is an argument
// or a guard binary operator with argument `a` and right operand `b`,
// we use dynamic precedence to favour the latter
prec.dynamic(
1,
seq(alias($._stab_clause_arguments, $.arguments), "when", $._expression)
),
_stab_clause_arguments: ($) =>
// TODO this is a variant of _items_with_trailing_separator, cleanup
choice(
seq(
sep1($._stab_clause_arguments_expression, ","),
optional(seq(",", $.keywords))
),
$.keywords
),
_stab_clause_arguments_expression: ($) =>
// Note here we use the same precedence as when operator,
// so we get a conflict and resolve it dynamically
prec(PREC.WHEN_OP, $._expression),
body: ($) =>
seq(
choice(
seq($._terminator, sep($._expression, $._terminator)),
sep1($._expression, $._terminator)
),
optional($._terminator)
),
anonymous_function: ($) =>
seq(
"fn",
optional($._terminator),
sep1($.stab_clause, $._terminator),
"end"
),
// A comment may be anywhere, we give it a lower precedence,
// so it doesn't intercept sequences such as interpolation
comment: ($) => token(prec(-1, seq("#", /.*/))),
}, },
}); });
function sep1(rule, separator) {
return seq(rule, repeat(seq(separator, rule)));
}
function sep(rule, separator) {
return optional(sep1(rule, separator));
}
function unaryOp($, assoc, precedence, operator, right = null) {
return assoc(
precedence,
// TODO clarify, we use lower precedence, so given `x + y`,
// which can be interpreted as either `x + y` or `x(+y)`
// we favour the former. The only exception is when
// _before_unary_op matches which forces the latter interpretation
// in case like `x +y`
prec.dynamic(
-1,
seq(
optional($._before_unary_op),
field("operator", operator),
right || $._expression
)
)
);
}
function binaryOp($, assoc, precedence, operator, left = null, right = null) {
return assoc(
precedence,
seq(
field("left", left || $._expression),
field("operator", operator),
field("right", right || $._expression)
)
);
}
function sugarBlock($, start) {
return seq(
start,
optional($._terminator),
optional(
choice(
sep1(choice($.stab_clause), $._terminator),
seq(sep1(choice($._expression), $._terminator), optional($._terminator))
)
)
);
}
function defineQuoted(start, end, name) {
return {
[`_quoted_i_${name}`]: ($) =>
seq(
start,
repeat(
choice(
// TODO rename the extenrals to _content
alias($[`_quoted_content_i_${name}`], $.string_content),
$.interpolation,
$.escape_sequence
)
),
end
),
[`_quoted_${name}`]: ($) =>
seq(
start,
repeat(
choice(
// TODO rename the extenrals to _content
alias($[`_quoted_content_${name}`], $.string_content),
// It's always possible to escape the end delimiter
$.escape_sequence
)
),
end
),
};
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

403241
src/parser.c

File diff suppressed because it is too large Load Diff

844
src/scanner.cc Normal file
View File

@ -0,0 +1,844 @@
#include <tree_sitter/parser.h>
namespace {
enum TokenType {
// TODO add a note that all QUOTE_* tokens are mutually exclusive
// i.e. the valid_symbols array contains at most one truthy of these
QUOTED_CONTENT_I_SINGLE,
QUOTED_CONTENT_I_DOUBLE,
QUOTED_CONTENT_I_HEREDOC_SINGLE,
QUOTED_CONTENT_I_HEREDOC_DOUBLE,
QUOTED_CONTENT_I_PARENTHESIS,
QUOTED_CONTENT_I_CURLY,
QUOTED_CONTENT_I_SQUARE,
QUOTED_CONTENT_I_ANGLE,
QUOTED_CONTENT_I_BAR,
QUOTED_CONTENT_I_SLASH,
QUOTED_CONTENT_SINGLE,
QUOTED_CONTENT_DOUBLE,
QUOTED_CONTENT_HEREDOC_SINGLE,
QUOTED_CONTENT_HEREDOC_DOUBLE,
QUOTED_CONTENT_PARENTHESIS,
QUOTED_CONTENT_CURLY,
QUOTED_CONTENT_SQUARE,
QUOTED_CONTENT_ANGLE,
QUOTED_CONTENT_BAR,
QUOTED_CONTENT_SLASH,
KEYWORD_SPECIAL_LITERAL,
ATOM_START,
KEYWORD_END,
NEWLINE_BEFORE_DO,
NEWLINE_BEFORE_BINARY_OP,
NEWLINE_BEFORE_COMMENT,
BEFORE_UNARY_OP,
NOT_IN
};
bool quoted_token_type(const bool* valid_symbols, TokenType& token_type) {
// Quoted symbols are mutually exclusive and only one should
// be valid at a time. If multiple are valid it means we parse
// an arbitrary code outside quotes, in which case we don't
// want to tokenize it as quoted content.
if (valid_symbols[QUOTED_CONTENT_I_SINGLE] && valid_symbols[QUOTED_CONTENT_I_DOUBLE]) {
return false;
}
if (valid_symbols[QUOTED_CONTENT_I_SINGLE]) {
token_type = QUOTED_CONTENT_I_SINGLE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_I_DOUBLE]) {
token_type = QUOTED_CONTENT_I_DOUBLE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_I_HEREDOC_SINGLE]) {
token_type = QUOTED_CONTENT_I_HEREDOC_SINGLE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_I_HEREDOC_DOUBLE]) {
token_type = QUOTED_CONTENT_I_HEREDOC_DOUBLE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_I_PARENTHESIS]) {
token_type = QUOTED_CONTENT_I_PARENTHESIS;
return true;
}
if (valid_symbols[QUOTED_CONTENT_I_CURLY]) {
token_type = QUOTED_CONTENT_I_CURLY;
return true;
}
if (valid_symbols[QUOTED_CONTENT_I_SQUARE]) {
token_type = QUOTED_CONTENT_I_SQUARE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_I_ANGLE]) {
token_type = QUOTED_CONTENT_I_ANGLE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_I_BAR]) {
token_type = QUOTED_CONTENT_I_BAR;
return true;
}
if (valid_symbols[QUOTED_CONTENT_I_SLASH]) {
token_type = QUOTED_CONTENT_I_SLASH;
return true;
}
if (valid_symbols[QUOTED_CONTENT_SINGLE]) {
token_type = QUOTED_CONTENT_SINGLE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_DOUBLE]) {
token_type = QUOTED_CONTENT_DOUBLE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_HEREDOC_SINGLE]) {
token_type = QUOTED_CONTENT_HEREDOC_SINGLE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_HEREDOC_DOUBLE]) {
token_type = QUOTED_CONTENT_HEREDOC_DOUBLE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_PARENTHESIS]) {
token_type = QUOTED_CONTENT_PARENTHESIS;
return true;
}
if (valid_symbols[QUOTED_CONTENT_CURLY]) {
token_type = QUOTED_CONTENT_CURLY;
return true;
}
if (valid_symbols[QUOTED_CONTENT_SQUARE]) {
token_type = QUOTED_CONTENT_SQUARE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_ANGLE]) {
token_type = QUOTED_CONTENT_ANGLE;
return true;
}
if (valid_symbols[QUOTED_CONTENT_BAR]) {
token_type = QUOTED_CONTENT_BAR;
return true;
}
if (valid_symbols[QUOTED_CONTENT_SLASH]) {
token_type = QUOTED_CONTENT_SLASH;
return true;
}
return false;
}
int32_t quoted_end_delimiter(TokenType token_type) {
switch (token_type) {
case QUOTED_CONTENT_I_SINGLE:
case QUOTED_CONTENT_SINGLE:
case QUOTED_CONTENT_I_HEREDOC_SINGLE:
case QUOTED_CONTENT_HEREDOC_SINGLE:
return '\'';
case QUOTED_CONTENT_I_DOUBLE:
case QUOTED_CONTENT_DOUBLE:
case QUOTED_CONTENT_I_HEREDOC_DOUBLE:
case QUOTED_CONTENT_HEREDOC_DOUBLE:
return '\"';
case QUOTED_CONTENT_I_PARENTHESIS:
case QUOTED_CONTENT_PARENTHESIS:
return ')';
case QUOTED_CONTENT_I_CURLY:
case QUOTED_CONTENT_CURLY:
return '}';
case QUOTED_CONTENT_I_SQUARE:
case QUOTED_CONTENT_SQUARE:
return ']';
case QUOTED_CONTENT_I_ANGLE:
case QUOTED_CONTENT_ANGLE:
return '>';
case QUOTED_CONTENT_I_BAR:
case QUOTED_CONTENT_BAR:
return '|';
case QUOTED_CONTENT_I_SLASH:
case QUOTED_CONTENT_SLASH:
return '/';
}
__builtin_unreachable();
}
uint8_t quoted_delimiter_length(TokenType token_type) {
switch (token_type) {
case QUOTED_CONTENT_I_HEREDOC_SINGLE:
case QUOTED_CONTENT_I_HEREDOC_DOUBLE:
case QUOTED_CONTENT_HEREDOC_SINGLE:
case QUOTED_CONTENT_HEREDOC_DOUBLE:
return 3;
default:
return 1;
}
}
bool quoted_is_interpol(TokenType token_type) {
switch (token_type) {
case QUOTED_CONTENT_I_SINGLE:
case QUOTED_CONTENT_I_DOUBLE:
case QUOTED_CONTENT_I_HEREDOC_SINGLE:
case QUOTED_CONTENT_I_HEREDOC_DOUBLE:
case QUOTED_CONTENT_I_PARENTHESIS:
case QUOTED_CONTENT_I_CURLY:
case QUOTED_CONTENT_I_SQUARE:
case QUOTED_CONTENT_I_ANGLE:
case QUOTED_CONTENT_I_BAR:
case QUOTED_CONTENT_I_SLASH:
return true;
default:
return false;
}
}
bool is_whitespace(int32_t c) {
return c == ' ' || c == '\t' || c == '\v' ||
c == '\n' || c == '\f' || c == '\r';
}
bool is_inline_whitespace(int32_t c) {
return c == ' ' || c == '\t' || c == '\v';
}
// TODO what about these weird \f \r
bool is_newline(int32_t c) {
return c == '\n';
}
void advance(TSLexer* lexer) {
lexer->advance(lexer, false);
}
void skip(TSLexer *lexer) {
lexer->advance(lexer, true);
}
bool finish_atom_start(TSLexer* lexer) {
// The first ':' is already scanned and parser advanced
lexer->mark_end(lexer);
lexer->result_symbol = ATOM_START;
if (lexer->lookahead == ':') {
advance(lexer);
if (lexer->lookahead == ':') {
// :::
return true;
} else {
return false;
}
} else {
return !is_whitespace(lexer->lookahead);
}
}
bool is_keyword_end(TSLexer* lexer) {
if (lexer->lookahead == ':') {
advance(lexer);
return is_whitespace(lexer->lookahead);
}
return false;
}
bool finish_keyword(TSLexer* lexer) {
lexer->mark_end(lexer);
lexer->result_symbol = KEYWORD_SPECIAL_LITERAL;
return is_keyword_end(lexer);
}
bool is_digit(int32_t c) {
return '0' <= c && c <= '9';
}
bool is_operator_end(TSLexer* lexer) {
// Keyword
if (lexer->lookahead == ':') {
return !is_keyword_end(lexer);
}
while (is_inline_whitespace(lexer->lookahead)) {
advance(lexer);
}
// Operator identifier with arity
if (lexer->lookahead == '/') {
advance(lexer);
while (is_whitespace(lexer->lookahead)) {
advance(lexer);
}
if (is_digit(lexer->lookahead)) {
return false;
}
}
return true;
}
const char TOKEN_TERMINATORS[] = {
// Operator starts
'@', '.', '+', '-', '^', '-', '*', '/', '<', '>', '|', '~', '=', '&', '\\', '%',
// Delimiters
'{', '}', '[', ']', '(', ')', '"', '\'',
// Separators
',', ';',
// Comment
'#'
};
// Note: this is a heuristic as we only use this to distinguish word
// operators and we don't want to include complex Unicode ranges.
bool is_token_end(int32_t c) {
for (unsigned int i = 0; i < sizeof(TOKEN_TERMINATORS); i++) {
if (c == TOKEN_TERMINATORS[i]) {
return true;
}
}
return is_whitespace(c);
}
bool scan(TSLexer* lexer, const bool* valid_symbols) {
TokenType token_type;
bool is_quoted_symbol = quoted_token_type(valid_symbols, token_type);
// Quoted content, which matches any character except for close
// delimiters, escapes and interpolations
if (is_quoted_symbol) {
// TODO naming
// TODO move all of this into a separate function like scan_quoted_content
int32_t end_delimiter = quoted_end_delimiter(token_type);
bool supports_interpol = quoted_is_interpol(token_type);
uint8_t delimiter_length = quoted_delimiter_length(token_type);
lexer->result_symbol = token_type;
for (bool has_content = false; true; has_content = true) {
lexer->mark_end(lexer);
if (lexer->lookahead == end_delimiter) {
uint8_t length = 1;
while (length < delimiter_length) {
advance(lexer);
if (lexer->lookahead == end_delimiter) {
length++;
} else {
break;
}
}
if (length == delimiter_length) {
return has_content;
}
} else {
switch (lexer->lookahead) {
case '#':
advance(lexer);
if (supports_interpol && lexer->lookahead == '{') {
return has_content;
}
break;
case '\\':
if (supports_interpol) {
return has_content;
} else {
advance(lexer);
if (lexer->lookahead == end_delimiter) {
return has_content;
}
}
break;
case '\0':
return false;
default:
advance(lexer);
}
}
}
return false;
}
if (lexer->lookahead == ':') {
if (valid_symbols[ATOM_START] || valid_symbols[KEYWORD_END]) {
advance(lexer);
if (is_whitespace(lexer->lookahead)) {
if (valid_symbols[KEYWORD_END]) {
lexer->result_symbol = KEYWORD_END;
return true;
}
} else {
if (valid_symbols[ATOM_START]) {
return finish_atom_start(lexer);
}
}
return false;
}
}
bool skipped_whitespace = false;
while (is_inline_whitespace(lexer->lookahead)) {
skipped_whitespace = true;
skip(lexer);
}
// TODO moves this below together with other functions on this level
if (lexer->lookahead == '+') {
if (skipped_whitespace && valid_symbols[BEFORE_UNARY_OP]) {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '+' || lexer->lookahead == ':' || lexer->lookahead == '/') {
return false;
}
if (is_whitespace(lexer->lookahead)) {
return false;
}
lexer->result_symbol = BEFORE_UNARY_OP;
return true;
}
}
if (lexer->lookahead == '-') {
if (skipped_whitespace && valid_symbols[BEFORE_UNARY_OP]) {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '-' || lexer->lookahead == '>' || lexer->lookahead == ':' || lexer->lookahead == '/') {
return false;
}
if (is_whitespace(lexer->lookahead)) {
return false;
}
lexer->result_symbol = BEFORE_UNARY_OP;
return true;
}
}
if (lexer->lookahead == 'n') {
lexer->result_symbol = NOT_IN;
advance(lexer);
if (lexer->lookahead == 'o') {
advance(lexer);
if (lexer->lookahead == 't') {
advance(lexer);
while (is_inline_whitespace(lexer->lookahead)) {
advance(lexer);
}
if (lexer->lookahead == 'i') {
advance(lexer);
if (lexer->lookahead == 'n') {
advance(lexer);
return is_token_end(lexer->lookahead);
}
}
}
}
return false;
}
// TODO can be a separate function
if (is_newline(lexer->lookahead) && (
valid_symbols[NEWLINE_BEFORE_DO] ||
valid_symbols[NEWLINE_BEFORE_BINARY_OP] ||
valid_symbols[NEWLINE_BEFORE_COMMENT])) {
advance(lexer);
while (is_whitespace(lexer->lookahead)) {
advance(lexer);
}
// Note we include all the whitespace after newline, so that the
// parser doesn't have to go through it again
lexer->mark_end(lexer);
if (lexer->lookahead == '#') {
lexer->result_symbol = NEWLINE_BEFORE_COMMENT;
return true;
}
if (valid_symbols[NEWLINE_BEFORE_DO] && lexer->lookahead == 'd') {
lexer->result_symbol = NEWLINE_BEFORE_DO;
advance(lexer);
if (lexer->lookahead == 'o') {
advance(lexer);
return is_token_end(lexer->lookahead);
}
return false;
}
if (valid_symbols[NEWLINE_BEFORE_BINARY_OP] ) {
lexer->result_symbol = NEWLINE_BEFORE_BINARY_OP;
// &&, &&&
if (lexer->lookahead == '&') {
advance(lexer);
if (lexer->lookahead == '&') {
advance(lexer);
if (lexer->lookahead == '&') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
}
// =, ==, ===, =~, =>
} else if (lexer->lookahead == '=') {
advance(lexer);
if (lexer->lookahead == '=') {
advance(lexer);
if (lexer->lookahead == '=') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
} else if (lexer->lookahead == '~') {
advance(lexer);
return is_operator_end(lexer);
} else if (lexer->lookahead == '>') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
// ::
} else if (lexer->lookahead == ':') {
advance(lexer);
if (lexer->lookahead == ':') {
advance(lexer);
// Ignore ::: atom
if (lexer->lookahead == ':') return false;
return is_operator_end(lexer);
}
// ++, +++
} else if (lexer->lookahead == '+') {
advance(lexer);
if (lexer->lookahead == '+') {
advance(lexer);
if (lexer->lookahead == '+') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
}
// --, ---, ->
} else if (lexer->lookahead == '-') {
advance(lexer);
if (lexer->lookahead == '-') {
advance(lexer);
if (lexer->lookahead == '-') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
} else if (lexer->lookahead == '>') {
advance(lexer);
return is_operator_end(lexer);
}
// <, <=, <-, <>, <~, <~>, <|>, <<<, <<~
} else if (lexer->lookahead == '<') {
advance(lexer);
if (lexer->lookahead == '=' ||
lexer->lookahead == '-' ||
lexer->lookahead == '>') {
advance(lexer);
return is_operator_end(lexer);
} else if (lexer->lookahead == '~') {
advance(lexer);
if (lexer->lookahead == '>') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
} else if (lexer->lookahead == '|') {
advance(lexer);
if (lexer->lookahead == '>') {
advance(lexer);
return is_operator_end(lexer);
}
} else if (lexer->lookahead == '<') {
advance(lexer);
if (lexer->lookahead == '<' ||
lexer->lookahead == '~') {
advance(lexer);
return is_operator_end(lexer);
}
} else {
return is_operator_end(lexer);
}
// >, >=, >>>
} else if (lexer->lookahead == '>') {
advance(lexer);
if (lexer->lookahead == '=') {
advance(lexer);
return is_operator_end(lexer);
} else if (lexer->lookahead == '>') {
advance(lexer);
if (lexer->lookahead == '>') {
advance(lexer);
return is_operator_end(lexer);
}
} else {
return is_operator_end(lexer);
}
// ^^^
} else if (lexer->lookahead == '^') {
advance(lexer);
if (lexer->lookahead == '^') {
advance(lexer);
if (lexer->lookahead == '^') {
advance(lexer);
return is_operator_end(lexer);
}
}
// !=, !==
} else if (lexer->lookahead == '!') {
advance(lexer);
if (lexer->lookahead == '=') {
advance(lexer);
if (lexer->lookahead == '=') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
}
// ~>, ~>>
} else if (lexer->lookahead == '~') {
advance(lexer);
if (lexer->lookahead == '>') {
advance(lexer);
if (lexer->lookahead == '>') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
}
// |, ||, |||, |>
} else if (lexer->lookahead == '|') {
advance(lexer);
if (lexer->lookahead == '|') {
advance(lexer);
if (lexer->lookahead == '|') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
} else if (lexer->lookahead == '>') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
// *, **
} else if (lexer->lookahead == '*') {
advance(lexer);
if (lexer->lookahead == '*') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
// / //
} else if (lexer->lookahead == '/') {
advance(lexer);
if (lexer->lookahead == '/') {
advance(lexer);
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
// ., ..
} else if (lexer->lookahead == '.') {
advance(lexer);
if (lexer->lookahead == '.') {
advance(lexer);
// Ignore ... identifier
if (lexer->lookahead == '.') return false;
return is_operator_end(lexer);
} else {
return is_operator_end(lexer);
}
// double slash
} else if (lexer->lookahead == '\\') {
advance(lexer);
if (lexer->lookahead == '\\') {
advance(lexer);
return is_operator_end(lexer);
}
} else if (lexer->lookahead == 'w') {
advance(lexer);
if (lexer->lookahead == 'h') {
advance(lexer);
if (lexer->lookahead == 'e') {
advance(lexer);
if (lexer->lookahead == 'n') {
advance(lexer);
return is_token_end(lexer->lookahead) && is_operator_end(lexer);
}
}
}
} else if (lexer->lookahead == 'a') {
advance(lexer);
if (lexer->lookahead == 'n') {
advance(lexer);
if (lexer->lookahead == 'd') {
advance(lexer);
return is_token_end(lexer->lookahead) && is_operator_end(lexer);
}
}
// or
} else if (lexer->lookahead == 'o') {
advance(lexer);
if (lexer->lookahead == 'r') {
advance(lexer);
return is_token_end(lexer->lookahead) && is_operator_end(lexer);
}
// in
} else if (lexer->lookahead == 'i') {
advance(lexer);
if (lexer->lookahead == 'n') {
advance(lexer);
return is_token_end(lexer->lookahead) && is_operator_end(lexer);
}
// not in
} else if (lexer->lookahead == 'n') {
advance(lexer);
if (lexer->lookahead == 'o') {
advance(lexer);
if (lexer->lookahead == 't') {
advance(lexer);
while (is_inline_whitespace(lexer->lookahead)) {
advance(lexer);
}
if (lexer->lookahead == 'i') {
advance(lexer);
if (lexer->lookahead == 'n') {
advance(lexer);
return is_token_end(lexer->lookahead) && is_operator_end(lexer);
}
}
}
}
}
}
return false;
}
// ... ..//
if (lexer->lookahead == '.') {
if (valid_symbols[KEYWORD_SPECIAL_LITERAL]) {
advance(lexer);
if (lexer->lookahead == '.') {
advance(lexer);
if (lexer->lookahead == '.') {
advance(lexer);
return finish_keyword(lexer);
} else if (lexer->lookahead == '/') {
advance(lexer);
if (lexer->lookahead == '/') {
advance(lexer);
return finish_keyword(lexer);
}
}
}
}
// % %{}
} else if (lexer->lookahead == '%') {
if (valid_symbols[KEYWORD_SPECIAL_LITERAL]) {
advance(lexer);
if (lexer->lookahead == '{') {
advance(lexer);
if (lexer->lookahead == '}') {
advance(lexer);
return finish_keyword(lexer);
}
} else {
return finish_keyword(lexer);
}
}
// {}
} else if (lexer->lookahead == '{') {
if (valid_symbols[KEYWORD_SPECIAL_LITERAL]) {
advance(lexer);
if (lexer->lookahead == '}') {
advance(lexer);
return finish_keyword(lexer);
}
}
// <<>>
} else if (lexer->lookahead == '<') {
if (valid_symbols[KEYWORD_SPECIAL_LITERAL]) {
advance(lexer);
if (lexer->lookahead == '<') {
advance(lexer);
if (lexer->lookahead == '>') {
advance(lexer);
if (lexer->lookahead == '>') {
advance(lexer);
return finish_keyword(lexer);
}
}
}
}
// atom start
} else if (lexer->lookahead == ':') {
if (valid_symbols[ATOM_START]) {
advance(lexer);
return finish_atom_start(lexer);
}
}
return false;
}
// Expose the API expected by tree-sitter
extern "C" {
void* tree_sitter_elixir_external_scanner_create() {
return nullptr;
}
bool tree_sitter_elixir_external_scanner_scan(void* payload, TSLexer* lexer, const bool* valid_symbols) {
return scan(lexer, valid_symbols);
}
unsigned tree_sitter_elixir_external_scanner_serialize(void* payload, char* buffer) {
return 0;
}
void tree_sitter_elixir_external_scanner_deserialize(void* payload, const char* buffer, unsigned length) {}
void tree_sitter_elixir_external_scanner_destroy(void* payload) {}
}
}

View File

@ -91,11 +91,7 @@ does not match inside a string
(source (source
(string (string
(string_start) (string_content))
(string_content)
(string_end))
(string (string
(string_start)
(string_content) (string_content)
(interpolation (identifier)) (interpolation (identifier))))
(string_end)))

View File

@ -87,7 +87,7 @@ end
(call (call
(identifier) (identifier)
(arguments (arguments
(identifier))) (identifier))))
(do_block (do_block
(identifier)))) (identifier))))
@ -100,11 +100,47 @@ do
x x
end end
fun x
# comment
do
x
end
fun()
do
x
end
Mod.fun x
do
x
end
--- ---
(source (source
(call (call
(identifier) (identifier)
(arguments
(identifier))
(do_block
(identifier)))
(call
(identifier)
(arguments
(identifier))
(comment)
(do_block
(identifier)))
(call
(identifier)
(arguments)
(do_block
(identifier)))
(call
(dot
(alias)
(identifier))
(arguments (arguments
(identifier)) (identifier))
(do_block (do_block
@ -265,7 +301,7 @@ end
(do_block (do_block
(stab_clause (stab_clause
(arguments (arguments
(integer)) (identifier))
(body (body
(identifier) (identifier)
(identifier)))))) (identifier))))))
@ -318,8 +354,7 @@ end
(identifier) (identifier)
(list))) (list)))
(body (body
(atom (identifier))))))
(atom_literal)))))))
===================================== =====================================
stab clause / with guard / multiple arguments stab clause / with guard / multiple arguments
@ -344,8 +379,7 @@ end
(identifier) (identifier)
(list))) (list)))
(body (body
(atom (identifier))))))
(atom_literal)))))))
===================================== =====================================
stab clause / with guard / arguments in parentheses stab clause / with guard / arguments in parentheses
@ -370,8 +404,7 @@ end
(identifier) (identifier)
(list))) (list)))
(body (body
(atom (identifier))))))
(atom_literal)))))))
===================================== =====================================
stab clause / with guard / multiple guards stab clause / with guard / multiple guards
@ -392,16 +425,152 @@ end
(arguments (arguments
(identifier)) (identifier))
(binary_operator (binary_operator
(binary_operator
(identifier)
(integer))
(binary_operator
(identifier)
(integer))))
(body
(identifier))))))
=====================================
stab clause / edge cases / no stab
=====================================
foo do
a when a
end
foo do
([])
end
---
(source
(call
(identifier)
(do_block
(binary_operator
(identifier)
(identifier))))
(call
(identifier)
(do_block
(block
(list)))))
=====================================
stab clause / edge cases / "when" in arguments
=====================================
foo do
a when b, c when d == e -> 1
end
---
(source
(call
(identifier)
(do_block
(stab_clause
(binary_operator
(arguments
(binary_operator (binary_operator
(identifier) (identifier)
(identifier)) (identifier))
(identifier))
(binary_operator
(identifier)
(identifier)))
(body
(integer))))))
=====================================
stab clause / edge cases / block argument
=====================================
foo do
(x; y) -> 1
((x; y)) -> 1
end
---
(source
(call
(identifier)
(do_block
(stab_clause
(arguments
(block
(identifier)
(identifier)))
(body
(integer)))
(stab_clause
(arguments
(block
(identifier)
(identifier)))
(body
(integer))))))
=====================================
stab clause / edge cases / operator with lower precedence than "when"
=====================================
foo do
x <- y when x -> y
end
foo do
(x <- y) when x -> y
end
---
(source
(call
(identifier)
(do_block
(stab_clause
(arguments
(binary_operator
(identifier)
(binary_operator (binary_operator
(identifier) (identifier)
(identifier)))) (identifier))))
(body (body
(atom (identifier)))))
(atom_literal))))))) (call
(identifier)
(do_block
(stab_clause
(binary_operator
(arguments
(binary_operator
(identifier)
(identifier)))
(identifier))
(body
(identifier))))))
=====================================
stab clause / edge cases / empty
=====================================
fun do->end
---
(source
(call
(identifier)
(do_block
(stab_clause))))
===================================== =====================================
pattern matching pattern matching
@ -424,8 +593,9 @@ end
(identifier) (identifier)
(identifier)))) (identifier))))
(body (body
(atom (tuple
(atom_literal))))))) (identifier)
(identifier)))))))
===================================== =====================================
child blocks / after child blocks / after
@ -578,3 +748,33 @@ end
(identifier)) (identifier))
(body (body
(identifier))))))) (identifier)))))))
=====================================
child blocks / keyword pattern with child block start token
=====================================
fun do
x
after
after
after: 1 -> y
end
---
(source
(call
(identifier)
(do_block
(identifier)
(after_block)
(after_block
(stab_clause
(arguments
(keywords
(pair
(keyword
(atom_literal))
(integer))))
(body
(identifier)))))))

View File

@ -5,13 +5,76 @@ operator with arity (valid and supported by IEx.Helpers.h)
::/2 ::/2
@ / 1 @ / 1
& / 1 & / 1
not / 1
not in / 2
* / 2
h +/2
--- ---
(source (source
(binary_operator
(operator_identifier)
(integer))
(binary_operator
(operator_identifier)
(integer))
(binary_operator
(operator_identifier)
(integer))
(binary_operator
(operator_identifier)
(integer))
(binary_operator
(operator_identifier)
(integer))
(binary_operator
(operator_identifier)
(integer))
(call
(identifier)
(arguments
(binary_operator
(operator_identifier)
(integer)))))
=====================================
stab and slash ambiguity
=====================================
(-> / 2)
(-> / / 2)
---
(source
(block
(binary_operator (binary_operator
(operator_identifier) (operator_identifier)
(integer))) (integer)))
(block
(stab_clause
(body
(binary_operator
(operator_identifier)
(integer))))))
=====================================
unary operator and slash ambiguity
=====================================
& / 2
& / / 2
---
(source
(binary_operator
(operator_identifier)
(integer))
(unary_operator
(binary_operator
(operator_identifier)
(integer))))
===================================== =====================================
map with identifiers map with identifiers
@ -54,16 +117,3 @@ def Mod.fun(x), do: 1
(keyword (keyword
(atom_literal)) (atom_literal))
(integer)))))) (integer))))))
=====================================
[error] arrow outside of map
=====================================
a => b
---
(source
(identifier)
(ERROR
(identifier)))

View File

@ -15,6 +15,7 @@ fn () -> 1 end
(integer)))) (integer))))
(anonymous_function (anonymous_function
(stab_clause (stab_clause
(arguments)
(body (body
(integer))))) (integer)))))
@ -178,8 +179,7 @@ end
(atom (atom
(atom_literal)))) (atom_literal))))
(body (body
(atom (boolean)))))
(atom_literal))))))
===================================== =====================================
with guard / one argument with guard / one argument
@ -201,8 +201,7 @@ end
(identifier) (identifier)
(list))) (list)))
(body (body
(atom (identifier)))))
(atom_literal))))))
===================================== =====================================
with guard / multiple arguments with guard / multiple arguments
@ -225,8 +224,7 @@ end
(identifier) (identifier)
(list))) (list)))
(body (body
(atom (identifier)))))
(atom_literal))))))
===================================== =====================================
with guard / arguments in parentheses with guard / arguments in parentheses
@ -249,8 +247,7 @@ end
(identifier) (identifier)
(list))) (list)))
(body (body
(atom (identifier)))))
(atom_literal))))))
===================================== =====================================
with guard / multiple guards with guard / multiple guards
@ -271,13 +268,12 @@ end
(binary_operator (binary_operator
(binary_operator (binary_operator
(identifier) (identifier)
(identifier)) (integer))
(binary_operator (binary_operator
(identifier) (identifier)
(identifier)))) (integer))))
(body (body
(atom (identifier)))))
(atom_literal))))))
===================================== =====================================
pattern matching pattern matching
@ -299,8 +295,9 @@ end
(identifier) (identifier)
(identifier)))) (identifier))))
(body (body
(atom (tuple
(atom_literal)))) (identifier)
(identifier))))
(stab_clause (stab_clause
(binary_operator (binary_operator
(arguments (arguments
@ -311,13 +308,8 @@ end
(keyword (keyword
(atom_literal)) (atom_literal))
(identifier)))))) (identifier))))))
(binary_operator
(binary_operator (binary_operator
(identifier) (identifier)
(identifier)) (integer)))
(binary_operator
(identifier)
(identifier))))
(body (body
(atom (integer)))))
(atom_literal))))))

View File

@ -94,3 +94,30 @@ trailing semicolon
(block (block
(integer) (integer)
(integer))) (integer)))
=====================================
stab clauses
=====================================
(x -> x; y -> y
z -> z)
---
(source
(block
(stab_clause
(arguments
(identifier))
(body
(identifier)))
(stab_clause
(arguments
(identifier))
(body
(identifier)))
(stab_clause
(arguments
(identifier))
(body
(identifier)))))

View File

@ -46,7 +46,9 @@ local call / arguments without parentheses
===================================== =====================================
fun a fun a
fun {}
fun [1, 2], option: true, other: 5 fun [1, 2], option: true, other: 5
fun +: 1
--- ---
@ -55,6 +57,45 @@ fun [1, 2], option: true, other: 5
(identifier) (identifier)
(arguments (arguments
(identifier))) (identifier)))
(call
(identifier)
(arguments
(tuple)))
(call
(identifier)
(arguments
(list
(integer)
(integer))
(keywords
(pair
(keyword
(atom_literal))
(boolean))
(pair
(keyword
(atom_literal))
(integer)))))
(call
(identifier)
(arguments
(keywords
(pair
(keyword
(atom_literal))
(integer))))))
=====================================
local call / arguments without parentheses / multiline
=====================================
fun [1, 2],
option: true,
other: 5
---
(source
(call (call
(identifier) (identifier)
(arguments (arguments
@ -92,7 +133,8 @@ outer_fun(inner_fun(a))
local call / nested without parentheses (right associativity) local call / nested without parentheses (right associativity)
===================================== =====================================
outer_fun inner_fun a outer_fun inner_fun a, b
outer_fun inner_fun do: 1
--- ---
@ -103,13 +145,24 @@ outer_fun inner_fun a
(call (call
(identifier) (identifier)
(arguments (arguments
(identifier)))))) (identifier)
(identifier)))))
(call
(identifier)
(arguments
(call
(identifier)
(arguments
(keywords
(pair
(keyword
(atom_literal))
(integer))))))))
===================================== =====================================
local call / precedence with operator local call / precedence with operator
===================================== =====================================
fun +1
outer_fun 1 + 1 outer_fun 1 + 1
1 + inner_fun 1 1 + inner_fun 1
outer_fun 1 + inner_fun 1 outer_fun 1 + inner_fun 1
@ -118,11 +171,6 @@ fun 1, 2 |> other_fun
--- ---
(source (source
(call
(identifier)
(arguments
(unary_operator
(integer))))
(call (call
(identifier) (identifier)
(arguments (arguments
@ -410,7 +458,15 @@ Mod.'fun'(a)
(call (call
(dot (dot
(alias) (alias)
(identifier)) (string
(string_content)))
(arguments
(identifier)))
(call
(dot
(alias)
(charlist
(string_content)))
(arguments (arguments
(identifier)))) (identifier))))
@ -520,11 +576,12 @@ Mod.outer_fun mid_fun inner_fun.(a)
(arguments (arguments
(call (call
(identifier) (identifier)
(arguments
(call (call
(dot (dot
(identifier)) (identifier))
(arguments (arguments
(identifier))))))) (identifier))))))))
===================================== =====================================
identifier call identifier call
@ -599,8 +656,8 @@ range call
(binary_operator (binary_operator
(integer) (integer)
(integer)) (integer))
(integer)))) (integer)))
(identifier))) (identifier))))
===================================== =====================================
multi-expression block call multi-expression block call
@ -774,14 +831,22 @@ map [key]
(identifier))))) (identifier)))))
===================================== =====================================
access syntax / precedence over dot call access syntax / precedence with dot call
===================================== =====================================
map.map[:key]
map[:mod].fun map[:mod].fun
--- ---
(source (source
(access_call
(call
(dot
(identifier)
(identifier)))
(atom
(atom_literal)))
(call (call
(dot (dot
(access_call (access_call
@ -790,6 +855,91 @@ map[:mod].fun
(atom_literal))) (atom_literal)))
(identifier)))) (identifier))))
=====================================
access syntax / precedence with operators
=====================================
-x[:key]
@x[:key]
&x[:key]
&1[:key]
---
(source
(unary_operator
(access_call
(identifier)
(atom
(atom_literal))))
(access_call
(unary_operator
(identifier))
(atom
(atom_literal)))
(unary_operator
(access_call
(identifier)
(atom
(atom_literal))))
(access_call
(unary_operator
(integer))
(atom
(atom_literal))))
=====================================
double parenthesised call
=====================================
fun()()
fun() ()
fun(1)(1)
Mod.fun()()
fun.()()
unquote(name)()
---
(source
(call
(call
(identifier)
(arguments))
(arguments))
(call
(call
(identifier)
(arguments))
(arguments))
(call
(call
(identifier)
(arguments
(integer)))
(arguments
(integer)))
(call
(call
(dot
(alias)
(identifier))
(arguments))
(arguments))
(call
(call
(dot
(identifier))
(arguments))
(arguments))
(call
(call
(identifier)
(arguments
(identifier)))
(arguments)))
===================================== =====================================
[error] leading argument separator [error] leading argument separator
===================================== =====================================
@ -804,3 +954,18 @@ fun(, a)
(arguments (arguments
(ERROR) (ERROR)
(identifier)))) (identifier))))
=====================================
[error] trailing argument separator
=====================================
fun(a,)
---
(source
(call
(identifier)
(arguments
(identifier)
(ERROR))))

View File

@ -23,6 +23,7 @@ anonymous function
(integer)))) (integer))))
(unary_operator (unary_operator
(call (call
(identifier)
(arguments (arguments
(unary_operator (unary_operator
(integer)) (integer))
@ -45,22 +46,22 @@ argument call
(call (call
(dot (dot
(unary_operator (unary_operator
(integer) (integer))
(identifier))))) (identifier))))
(unary_operator (unary_operator
(call (call
(dot (dot
(unary_operator (unary_operator
(integer) (integer))
(identifier))))) (identifier))))
(unary_operator (unary_operator
(call (call
(dot (dot
(unary_operator (unary_operator
(integer) (integer)))
(arguments (arguments
(unary_operator (unary_operator
(integer)))))))) (integer))))))
===================================== =====================================
remote MFA remote MFA

View File

@ -16,27 +16,21 @@ not arg
--- ---
(source (source
(unary_operator (unary_operator (identifier))
(identifier)) (unary_operator (identifier))
(unary_operator (unary_operator (identifier))
(identifier)) (unary_operator (identifier))
(unary_operator (unary_operator (identifier))
(identifier)) (unary_operator (identifier))
(unary_operator (unary_operator (identifier))
(identifier)) (unary_operator (identifier)))
(unary_operator
(identifier))
(unary_operator
(identifier))
(unary_operator
(identifier))
(unary_operator
(identifier)))
===================================== =====================================
binary left associative binary left associative
===================================== =====================================
a ** b ** c
a * b * c a * b * c
a / b / c a / b / c
@ -115,6 +109,7 @@ a \\ b \\ c
(binary_operator (binary_operator (identifier) (identifier)) (identifier)) (binary_operator (binary_operator (identifier) (identifier)) (identifier))
(binary_operator (binary_operator (identifier) (identifier)) (identifier)) (binary_operator (binary_operator (identifier) (identifier)) (identifier))
(binary_operator (binary_operator (identifier) (identifier)) (identifier)) (binary_operator (binary_operator (identifier) (identifier)) (identifier))
(binary_operator (binary_operator (identifier) (identifier)) (identifier))
(binary_operator (binary_operator (identifier) (identifier)) (identifier))) (binary_operator (binary_operator (identifier) (identifier)) (identifier)))
===================================== =====================================
@ -190,6 +185,7 @@ a - b ++ c
a = b <<< c a = b <<< c
a + b * c - d a + b * c - d
a ** b + c ** d
--- ---
@ -220,7 +216,14 @@ a + b * c - d
(binary_operator (binary_operator
(identifier) (identifier)
(identifier))) (identifier)))
(identifier))) (identifier))
(binary_operator
(binary_operator
(identifier)
(identifier))
(binary_operator
(identifier)
(identifier))))
===================================== =====================================
precedence determined by parentheses precedence determined by parentheses
@ -234,8 +237,9 @@ precedence determined by parentheses
(source (source
(binary_operator (binary_operator
(block
(unary_operator (unary_operator
(identifier)) (identifier)))
(identifier)) (identifier))
(binary_operator (binary_operator
(block (block
@ -248,25 +252,413 @@ precedence determined by parentheses
(identifier))))) (identifier)))))
===================================== =====================================
multiline "not in" spacing
===================================== =====================================
a not in b
---
(source
(binary_operator
(identifier)
(identifier)))
=====================================
"not in" boundary
=====================================
fun not inARG
---
(source
(call
(identifier)
(arguments
(unary_operator
(identifier)))))
=====================================
multiline / unary
=====================================
@
arg
+
arg
- -
x arg
!
arg
^
arg
not
arg
~~~
arg
&
arg
---
(source
(unary_operator (identifier))
(unary_operator (identifier))
(unary_operator (identifier))
(unary_operator (identifier))
(unary_operator (identifier))
(unary_operator (identifier))
(unary_operator (identifier))
(unary_operator (identifier)))
=====================================
multiline / binary
=====================================
a
**
b
a
*
b
a
/
b
a
++
b
a
--
b
a
+++
b
a
---
b
a
..
b
a
<>
b
a
^^^
b
a
in
b
a
not in
b
a
|>
b
a
<<<
b
a
>>>
b
a
<<~
b
a
~>>
b
a
<~
b
a
~>
b
a
<~>
b
a
<|>
b
a
<
b
a
>
b
a
<=
b
a
>=
b
a
==
b
a
!=
b
a
=~
b
a
===
b
a
!==
b
a
&&
b
a
&&&
b
a
and
b
a
||
b
a
|||
b
a
or
b
a
=
b
a
|
b
a
::
b
a
when
b
a
<-
b
a
\\
b
---
(source
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier))
(binary_operator (identifier) (identifier)))
=====================================
multiline / unary over binary
=====================================
a
+
b
a
-
b
---
(source
(identifier)
(unary_operator
(identifier))
(identifier)
(unary_operator
(identifier)))
=====================================
multiline / right operands
=====================================
x x
not in not in
[y] [y]
x
not in[y]
:a
++:b
:a++
:b
--- ---
(source (source
(unary_operator
(identifier))
(binary_operator (binary_operator
(identifier) (identifier)
(list (list
(identifier)))
(binary_operator
(identifier)
(list
(identifier)))
(binary_operator
(atom
(atom_literal))
(atom
(atom_literal)))
(binary_operator
(atom
(atom_literal))
(atom
(atom_literal))))
=====================================
multiline / unary over binary (precedence)
=====================================
x
-
y
x
+
y
---
(source
(identifier)
(unary_operator
(identifier))
(identifier)
(unary_operator
(identifier)))
=====================================
plus minus
=====================================
x+y
x + y
x+ y
x +y
x +y +z
---
(source
(binary_operator
(identifier)
(identifier))
(binary_operator
(identifier)
(identifier))
(binary_operator
(identifier)
(identifier))
(call
(identifier)
(arguments
(unary_operator
(identifier)))) (identifier))))
(call
(identifier)
(arguments
(unary_operator
(call
(identifier)
(arguments
(unary_operator
(identifier))))))))
===================================== =====================================
stepped range stepped range

View File

@ -87,7 +87,7 @@ nested interpolation
(sigil_name) (sigil_name)
(string_content) (string_content)
(interpolation (interpolation
(identifier)))) (integer))))
(string_content))) (string_content)))
===================================== =====================================
@ -126,7 +126,7 @@ escape sequence
escaped interpolation escaped interpolation
===================================== =====================================
~s{\#{1}} ~s/\#{1}/
--- ---
@ -229,6 +229,7 @@ modifiers
(sigil_modifiers)) (sigil_modifiers))
(sigil (sigil
(sigil_name) (sigil_name)
(string_content)
(sigil_modifiers))) (sigil_modifiers)))
===================================== =====================================
@ -240,7 +241,7 @@ modifiers
--- ---
(source (source
(sigil
(sigil_name)
(ERROR) (ERROR)
(call (string_content)))
(string
(string_content))))

View File

@ -29,8 +29,7 @@ end
(call (call
(identifier) (identifier)
(arguments (arguments
(call (identifier))
(identifier)))
(do_block))) (do_block)))
===================================== =====================================
@ -48,9 +47,9 @@ end
(identifier) (identifier)
(arguments (arguments
(call (call
(identifier (identifier)
(arguments (arguments
(identifier))))) (identifier))))
(do_block (do_block
(identifier)))) (identifier))))
@ -69,9 +68,9 @@ end
(identifier) (identifier)
(arguments (arguments
(call (call
(identifier (identifier)
(arguments (arguments
(identifier))))) (identifier))))
(do_block (do_block
(identifier)))) (identifier))))
@ -90,10 +89,10 @@ end
(identifier) (identifier)
(arguments (arguments
(call (call
(identifier (identifier)
(arguments (arguments
(identifier) (identifier)
(identifier))))) (identifier))))
(do_block (do_block
(binary_operator (binary_operator
(identifier) (identifier)
@ -114,10 +113,10 @@ end
(identifier) (identifier)
(arguments (arguments
(call (call
(identifier (identifier)
(arguments (arguments
(identifier) (identifier)
(identifier))))) (identifier))))
(do_block (do_block
(binary_operator (binary_operator
(identifier) (identifier)
@ -142,12 +141,12 @@ end
(identifier) (identifier)
(arguments (arguments
(call (call
(identifier (identifier)
(arguments (arguments
(identifier) (identifier)
(binary_operator (binary_operator
(identifier) (identifier)
(integer)))))) (integer)))))
(do_block (do_block
(binary_operator (binary_operator
(identifier) (identifier)
@ -156,12 +155,12 @@ end
(identifier) (identifier)
(arguments (arguments
(call (call
(identifier (identifier)
(arguments (arguments
(identifier) (identifier)
(binary_operator (binary_operator
(identifier) (identifier)
(integer)))))) (integer)))))
(do_block (do_block
(binary_operator (binary_operator
(identifier) (identifier)
@ -181,24 +180,25 @@ def fun(x), do: x
(identifier) (identifier)
(arguments (arguments
(call (call
(identifier)
(arguments))
(keywords
(pair
(keyword
(atom_literal))
(integer)))))
(call
(identifier)
(arguments
(call
(identifier)
(arguments
(identifier))) (identifier)))
(keywords (keywords
(pair (pair
(keyword (keyword
(atom_literal)) (atom_literal))
(integer)))) (identifier))))))
(call
(identifier)
(arguments
(call
(identifier
(arguments
(identifier)))))
(keywords
(pair
(keyword
(atom_literal))
(identifier)))))
===================================== =====================================
def / pattern matching def / pattern matching
@ -244,9 +244,9 @@ end
(arguments (arguments
(binary_operator (binary_operator
(call (call
(identifier (identifier)
(arguments (arguments
(identifier)))) (identifier)))
(binary_operator (binary_operator
(identifier) (identifier)
(integer)))) (integer))))
@ -269,9 +269,9 @@ end
(arguments (arguments
(binary_operator (binary_operator
(call (call
(identifier (identifier)
(arguments (arguments
(identifier)))) (identifier)))
(binary_operator (binary_operator
(binary_operator (binary_operator
(identifier) (identifier)
@ -297,9 +297,9 @@ end
(identifier) (identifier)
(arguments (arguments
(call (call
(identifier (identifier)
(arguments (arguments
(identifier))))) (identifier))))
(do_block (do_block
(identifier)))) (identifier))))
@ -320,9 +320,9 @@ end
(identifier) (identifier)
(arguments (arguments
(call (call
(identifier (identifier)
(arguments (arguments
(identifier))))) (identifier))))
(do_block (do_block
(call (call
(identifier) (identifier)
@ -347,9 +347,9 @@ defguard is_even(term) when is_integer(term) and rem(term, 2) == 0
(arguments (arguments
(binary_operator (binary_operator
(call (call
(identifier (identifier)
(arguments (arguments
(identifier)))) (identifier)))
(binary_operator (binary_operator
(call (call
(identifier) (identifier)
@ -362,3 +362,33 @@ defguard is_even(term) when is_integer(term) and rem(term, 2) == 0
(identifier) (identifier)
(integer))) (integer)))
(integer))))))) (integer)))))))
=====================================
def in macro
=====================================
def unquote(name)(unquote_splicing(args)) do
unquote(compiled)
end
---
(source
(call
(identifier)
(arguments
(call
(call
(identifier)
(arguments
(identifier)))
(arguments
(call
(identifier)
(arguments
(identifier))))))
(do_block
(call
(identifier)
(arguments
(identifier))))))

View File

@ -14,14 +14,14 @@ for n <- [1, 2], do: n * 2
(identifier) (identifier)
(list (list
(integer) (integer)
(integer)))) (integer)))
(keywords (keywords
(pair (pair
(keyword (keyword
(atom_literal)) (atom_literal))
(binary_operator (binary_operator
(identifier) (identifier)
(integer)))))) (integer)))))))
===================================== =====================================
for / enumerable / with options and block for / enumerable / with options and block
@ -42,7 +42,8 @@ end
(call (call
(dot (dot
(alias) (alias)
(identifier)))) (identifier))
(arguments)))
(keywords (keywords
(pair (pair
(keyword (keyword
@ -50,7 +51,8 @@ end
(call (call
(dot (dot
(alias) (alias)
(identifier)))))) (identifier))
(arguments)))))
(do_block (do_block
(call (call
(dot (dot
@ -71,7 +73,7 @@ for <<c <- " hello world ">>, c != ?\s, into: "", do: <<c>>
(call (call
(identifier) (identifier)
(arguments (arguments
(binary (bitstring
(binary_operator (binary_operator
(identifier) (identifier)
(string (string
@ -83,12 +85,11 @@ for <<c <- " hello world ">>, c != ?\s, into: "", do: <<c>>
(pair (pair
(keyword (keyword
(atom_literal)) (atom_literal))
(string (string))
(string_content)))
(pair (pair
(keyword (keyword
(atom_literal)) (atom_literal))
(binary (bitstring
(identifier))))))) (identifier)))))))
===================================== =====================================

View File

@ -38,13 +38,17 @@ with type parentheses
(identifier) (identifier)
(arguments (arguments
(call (call
(identifier)) (identifier)
(arguments))
(call (call
(identifier)) (identifier)
(arguments))
(call (call
(identifier)))) (identifier)
(arguments))))
(call (call
(identifier))))))) (identifier)
(arguments)))))))
===================================== =====================================
with literals with literals
@ -68,8 +72,8 @@ with literals
(keywords (keywords
(pair (pair
(keyword (keyword
(atom_literal) (atom_literal))
(identifier)))))))) (identifier)))))))
(binary_operator (binary_operator
(tuple (tuple
(atom (atom
@ -97,14 +101,16 @@ with function reference
(call (call
(identifier) (identifier)
(arguments (arguments
(block
(stab_clause (stab_clause
(body (body
(identifier))) (identifier))))
(block
(stab_clause (stab_clause
(arguments (arguments
(identifier)) (identifier))
(body (body
(identifier))))) (identifier))))))
(identifier)))))) (identifier))))))
===================================== =====================================
@ -127,11 +133,13 @@ with remote type
(call (call
(dot (dot
(alias) (alias)
(identifier))))) (identifier))
(arguments))))
(call (call
(dot (dot
(alias) (alias)
(identifier)))))))) (identifier))
(arguments)))))))
===================================== =====================================
with type guard with type guard
@ -208,10 +216,11 @@ nonempty list
(identifier) (identifier)
(arguments (arguments
(binary_operator (binary_operator
(identifier)
(list
(call (call
(identifier)) (identifier)
(arguments))
(list
(identifier)
(identifier))))))) (identifier)))))))
===================================== =====================================

View File

@ -8,7 +8,6 @@ AZ_az_19_
--- ---
(source (source
(alias)
(alias) (alias)
(alias)) (alias))
@ -21,6 +20,22 @@ Mod.Child.Child
--- ---
(source
(alias)
(alias))
=====================================
spacing
=====================================
Mod . Child
Mod
.
Child
---
(source (source
(alias) (alias)
(alias)) (alias))
@ -72,17 +87,12 @@ __MODULE__.Child
[error] does not support characters outside ASCII [error] does not support characters outside ASCII
===================================== =====================================
Modこ
Ólá Ólá
Olá Olá
--- ---
(source (source
(alias)
(ERROR (ERROR
(identifier)) (atom_literal)
(ERROR (atom_literal)))
(identifier))
(ERROR
(identifier)))

View File

@ -26,12 +26,12 @@ simple literal
operators operators
===================================== =====================================
[:~~~, :~>>, :~>, :|||, :||, :|>, :|, :>>>, :>=, :>, :=~, :===, :==, :=, :<~>, :<~, :<|>, :<>, :<=, :<<~, :<<<, :<-, :<, :+++, :++, :+, :^^^, :^, :&&&, :&&, :&, :\\, :/, :*, :@, :.., :., :!==, :!=, :!, :::, :->, :---, :--, :-] [:~~~, :~>>, :~>, :|||, :||, :|>, :|, :>>>, :>=, :>, :=~, :===, :==, :=, :<~>, :<~, :<|>, :<>, :<=, :<<~, :<<<, :<-, :<, :+++, :++, :+, :^^^, :^, :&&&, :&&, :&, :\\, :/, :**, :*, :@, :.., :., :!==, :!=, :!, :::, :->, :---, :--, :-]
--- ---
(source (source
(list (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)))) (list (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal))))
===================================== =====================================
special operator-like atoms special operator-like atoms

View File

@ -80,33 +80,38 @@ multiple modifiers
(string_content)) (string_content))
(binary_operator (binary_operator
(identifier) (identifier)
(identifier))) (identifier))))
(bitstring
(binary_operator (binary_operator
(string (string
(string_content)) (string_content))
(binary_operator (binary_operator
(identifier) (identifier)
(identifier))) (identifier))))
(bitstring
(binary_operator (binary_operator
(string (string
(string_content)) (string_content))
(binary_operator (binary_operator
(identifier) (identifier)
(identifier))) (identifier))))
(bitstring
(binary_operator (binary_operator
(integer) (integer)
(binary_operator (binary_operator
(binary_operator (binary_operator
(integer) (integer)
(identifier)) (identifier))
(identifier))) (identifier))))
(bitstring
(binary_operator (binary_operator
(integer) (integer)
(binary_operator (binary_operator
(binary_operator (binary_operator
(identifier) (identifier)
(identifier)) (identifier))
(identifier))) (identifier))))
(bitstring
(binary_operator (binary_operator
(float) (float)
(binary_operator (binary_operator
@ -143,6 +148,7 @@ multiple components with modifiers
(binary_operator (binary_operator
(identifier) (identifier)
(call (call
(identifier)
(arguments (arguments
(identifier))))))) (identifier)))))))
@ -169,6 +175,7 @@ spacing
(binary_operator (binary_operator
(identifier) (identifier)
(call (call
(identifier)
(arguments (arguments
(identifier))))))) (identifier)))))))

View File

@ -67,7 +67,7 @@ nested interpolation
(charlist (charlist
(string_content) (string_content)
(interpolation (interpolation
(identifier)))) (integer))))
(string_content))) (string_content)))
===================================== =====================================
@ -167,7 +167,8 @@ this is #{
(charlist (charlist
(string_content) (string_content)
(interpolation (interpolation
(identifier)))) (integer))
(string_content)))
(string_content))) (string_content)))
===================================== =====================================
@ -186,9 +187,13 @@ heredoc / escaped delimiter
(source (source
(charlist (charlist
(escape_sequence)
(string_content) (string_content)
(escape_sequence)
(string_content))
(charlist (charlist
(string_content)
(escape_sequence)
(escape_sequence)
(escape_sequence) (escape_sequence)
(string_content))) (string_content)))
@ -204,5 +209,6 @@ heredoc / escaped interpolation
(source (source
(charlist (charlist
(string_content)
(escape_sequence) (escape_sequence)
(string_content))) (string_content)))

View File

@ -14,7 +14,6 @@ decimal
(unary_operator (unary_operator
(integer)) (integer))
(integer) (integer)
(integer)
(integer)) (integer))
===================================== =====================================

View File

@ -2,13 +2,25 @@
simple literal simple literal
===================================== =====================================
[a_b@12?: 1, A_B@12!: 2] [a: 1, a_b@12?: 2, A_B@12!: 3, Mod: 4, __struct__: 5]
--- ---
(source (source
(list (list
(keywords (keywords
(pair
(keyword
(atom_literal))
(integer))
(pair
(keyword
(atom_literal))
(integer))
(pair
(keyword
(atom_literal))
(integer))
(pair (pair
(keyword (keyword
(atom_literal)) (atom_literal))
@ -83,6 +95,73 @@ operator key
(atom_literal)) (atom_literal))
(integer))))) (integer)))))
=====================================
special atom key
=====================================
[...: 1, %{}: 2, {}: 3, %: 4, <<>>: 5, ..//: 6]
---
(source
(list
(keywords
(pair
(keyword
(atom_literal))
(integer))
(pair
(keyword
(atom_literal))
(integer))
(pair
(keyword
(atom_literal))
(integer))
(pair
(keyword
(atom_literal))
(integer))
(pair
(keyword
(atom_literal))
(integer))
(pair
(keyword
(atom_literal))
(integer)))))
=====================================
reserved token key
=====================================
[not: 1, and: 2]
[nil: 1, true: 2]
---
(source
(list
(keywords
(pair
(keyword
(atom_literal))
(integer))
(pair
(keyword
(atom_literal))
(integer))))
(list
(keywords
(pair
(keyword
(atom_literal))
(integer))
(pair
(keyword
(atom_literal))
(integer)))))
===================================== =====================================
quoted key quoted key
===================================== =====================================
@ -141,18 +220,21 @@ key interpolation
[error] with trailing items [error] with trailing items
===================================== =====================================
[a: 1, b: 2, 1] [a: 1, b: 2, 1 => 1]
--- ---
(source (source
(list (list
(ERROR
(keywords (keywords
(pair (pair
(keyword (keyword
(atom_literal))) (atom_literal))
(integer))
(pair (pair
(keyword (keyword
(atom_literal))))) (atom_literal))
(integer))) (integer)))
(ERROR
(integer)
(integer))))

View File

@ -3,6 +3,8 @@ simple literal
===================================== =====================================
[] []
[a]
[A]
[1] [1]
[1, 2] [1, 2]
[1,2] [1,2]
@ -12,6 +14,10 @@ simple literal
(source (source
(list) (list)
(list
(identifier))
(list
(alias))
(list (list
(integer)) (integer))
(list (list
@ -64,8 +70,8 @@ trailing separator
(source (source
(list (list
(ERROR (ERROR)
(integer)))) (integer)))
===================================== =====================================
[error] missing separator [error] missing separator

View File

@ -126,30 +126,31 @@ update syntax
(binary_operator (binary_operator
(string (string
(string_content)) (string_content))
(integer)))))) (string
(string_content)))))))
===================================== =====================================
[error] ordering [error] ordering
===================================== =====================================
%{b: 2, c: 3, "a" => 1} %{b: 2, c: 3, 1 => 1}
--- ---
(source (source
(map (map
(map_content) (map_content
(ERROR
(keywords (keywords
(pair (pair
(keyword (keyword
(atom_literal))) (atom_literal))
(integer))
(pair (pair
(keyword (keyword
(atom_literal))))) (atom_literal))
(binary_operator (integer))))
(string (ERROR
(string_content)) (integer)
(integer)))) (integer))))
===================================== =====================================
@ -162,40 +163,12 @@ update syntax
(source (source
(map (map
(map_content) (map_content
(ERROR
(binary_operator (binary_operator
(string (string
(string_content)) (string_content))
(integer))) (ERROR (integer))
(binary_operator (binary_operator
(string (string
(string_content)) (string_content))
(integer)))) (integer))))))
=====================================
[error] invalid content
=====================================
%{1}
%{1, 1}
%{a, [], {}}
---
(source
(map
(map_content
(ERROR
(integer))))
(map
(map_content
(ERROR
(integer)
(integer))))
(map
(map_content
(ERROR
(identifier)
(list)
(tuple)))))

View File

@ -67,7 +67,7 @@ nested interpolation
(string (string
(string_content) (string_content)
(interpolation (interpolation
(identifier)))) (integer))))
(string_content))) (string_content)))
===================================== =====================================
@ -167,7 +167,8 @@ this is #{
(string (string
(string_content) (string_content)
(interpolation (interpolation
(identifier)))) (integer))
(string_content)))
(string_content))) (string_content)))
===================================== =====================================
@ -186,9 +187,13 @@ heredoc / escaped delimiter
(source (source
(string (string
(escape_sequence)
(string_content) (string_content)
(escape_sequence)
(string_content))
(string (string
(string_content)
(escape_sequence)
(escape_sequence)
(escape_sequence) (escape_sequence)
(string_content))) (string_content)))
@ -204,6 +209,7 @@ heredoc / escaped interpolation
(source (source
(string (string
(string_content)
(escape_sequence) (escape_sequence)
(string_content))) (string_content)))

View File

@ -139,7 +139,8 @@ update syntax
(binary_operator (binary_operator
(string (string
(string_content)) (string_content))
(integer)))))) (string
(string_content)))))))
===================================== =====================================
unused struct identifier unused struct identifier
@ -199,3 +200,47 @@ with special identifier
(dot (dot
(special_identifier) (special_identifier)
(alias))))) (alias)))))
=====================================
with atom
=====================================
%:"Elixir.Mod"{}
---
(source
(map
(struct
(atom
(string_content)))))
=====================================
with call
=====================================
%fun(){}
%Mod.fun(){}
%fun.(){}
---
(source
(map
(struct
(call
(identifier)
(arguments))))
(map
(struct
(call
(dot
(alias)
(identifier))
(arguments))))
(map
(struct
(call
(dot
(identifier))
(arguments)))))

View File

@ -64,8 +64,8 @@ trailing separator
(source (source
(tuple (tuple
(ERROR (ERROR)
(integer)))) (integer)))
===================================== =====================================
[error] missing separator [error] missing separator

View File

@ -7,7 +7,6 @@ camelCase
az_AZ_19 az_AZ_19
bang! bang!
question? question?
__TEST__
doctest doctest
not1 not1
notfalse notfalse
@ -22,7 +21,6 @@ notfalse
(identifier) (identifier)
(identifier) (identifier)
(identifier) (identifier)
(identifier)
(identifier)) (identifier))
===================================== =====================================
@ -31,10 +29,12 @@ unused
_ _
_number _number
__TEST__
--- ---
(source (source
(unused_identifier)
(unused_identifier) (unused_identifier)
(unused_identifier)) (unused_identifier))