922 lines
28 KiB
JavaScript
922 lines
28 KiB
JavaScript
// Operator precedence:
|
|
// * https://hexdocs.pm/elixir/master/operators.html
|
|
// * https://github.com/elixir-lang/elixir/blob/master/lib/elixir/src/elixir_parser.yrl
|
|
const PREC = {
|
|
IN_MATCH_OPS: 10,
|
|
WHEN_OP: 20,
|
|
TYPE_OP: 30,
|
|
BAR_OP: 40,
|
|
ASSOC_OP: 50,
|
|
CAPTURE_OP: 60,
|
|
MATCH_OP: 70,
|
|
OR_OPS: 80,
|
|
AND_OPS: 90,
|
|
COMP_OPS: 100,
|
|
REL_OPS: 110,
|
|
ARROW_OPS: 120,
|
|
IN_OPS: 130,
|
|
XOR_OP: 140,
|
|
TERNARY_OP: 150,
|
|
CONCAT_OPS: 160,
|
|
ADD_OPS: 170,
|
|
MULT_OPS: 180,
|
|
POWER_OP: 190,
|
|
UNARY_OPS: 200,
|
|
ACCESS: 205,
|
|
DOT_OP: 210,
|
|
AT_OP: 220,
|
|
CAPTURE_OPERAND: 235,
|
|
};
|
|
|
|
const IN_MATCH_OPS = ["<-", "\\\\"];
|
|
const OR_OPS = ["||", "|||", "or"];
|
|
const AND_OPS = ["&&", "&&&", "and"];
|
|
const COMP_OPS = ["==", "!=", "=~", "===", "!=="];
|
|
const REL_OPS = ["<", ">", "<=", ">="];
|
|
const ARROW_OPS = ["|>", "<<<", ">>>", "<<~", "~>>", "<~", "~>", "<~>", "<|>"];
|
|
const IN_OPS = ["in", "not in"];
|
|
const CONCAT_OPS = ["++", "--", "+++", "---", "..", "<>"];
|
|
const ADD_OPS = ["+", "-"];
|
|
const MULT_OPS = ["*", "/"];
|
|
const UNARY_OPS = ["+", "-", "!", "^", "~~~", "not"];
|
|
|
|
const ALL_OPS = [
|
|
["->", "when", "::", "|", "=>", "&", "=", "^^^", "//", "**", ".", "@"],
|
|
IN_MATCH_OPS,
|
|
OR_OPS,
|
|
AND_OPS,
|
|
COMP_OPS,
|
|
REL_OPS,
|
|
ARROW_OPS,
|
|
IN_OPS,
|
|
CONCAT_OPS,
|
|
ADD_OPS,
|
|
MULT_OPS,
|
|
UNARY_OPS,
|
|
].flat();
|
|
|
|
// Ignore word literals and "=>" which is not a valid atom
|
|
const ATOM_OPERATOR_LITERALS = ALL_OPS.filter(
|
|
(operator) => !/[a-z]/.test(operator) && operator !== "=>"
|
|
);
|
|
|
|
// Note that for keywords we use external scanner (KEYWORD_SPECIAL_LITERAL),
|
|
// so it should be kept in sync
|
|
const ATOM_SPECIAL_LITERALS = ["...", "%{}", "{}", "%", "<<>>", "..//"];
|
|
|
|
// Word tokens used directly in the grammar
|
|
const RESERVED_WORD_TOKENS = [
|
|
// Operators
|
|
["and", "in", "not", "or", "when"],
|
|
// Literals
|
|
["true", "false", "nil"],
|
|
// Other
|
|
["after", "catch", "do", "else", "end", "fn", "rescue"],
|
|
].flat();
|
|
|
|
const SPECIAL_IDENTIFIERS = [
|
|
"__MODULE__",
|
|
"__DIR__",
|
|
"__ENV__",
|
|
"__CALLER__",
|
|
"__STACKTRACE__",
|
|
];
|
|
|
|
// Numbers
|
|
|
|
const DIGITS = /[0-9]+/;
|
|
const BIN_DIGITS = /[0-1]+/;
|
|
const OCT_DIGITS = /[0-7]+/;
|
|
const HEX_DIGITS = /[0-9a-fA-F]+/;
|
|
|
|
const numberDec = sep1(DIGITS, "_");
|
|
const numberBin = seq("0b", sep1(BIN_DIGITS, "_"));
|
|
const numberOct = seq("0o", sep1(OCT_DIGITS, "_"));
|
|
const numberHex = seq("0x", sep1(HEX_DIGITS, "_"));
|
|
|
|
const integer = choice(numberDec, numberBin, numberOct, numberHex);
|
|
|
|
const floatScientificPart = seq(/[eE]/, optional(choice("-", "+")), integer);
|
|
const float = seq(numberDec, ".", numberDec, optional(floatScientificPart));
|
|
|
|
const aliasPart = /[A-Z][_a-zA-Z0-9]*/;
|
|
|
|
module.exports = grammar({
|
|
name: "elixir",
|
|
|
|
// TODO describe stuff (also in the separate notes doc add clarification
|
|
// how we use this verbose tokens to avoid needing scanner state)
|
|
externals: ($) => [
|
|
$._quoted_content_i_single,
|
|
$._quoted_content_i_double,
|
|
$._quoted_content_i_heredoc_single,
|
|
$._quoted_content_i_heredoc_double,
|
|
$._quoted_content_i_parenthesis,
|
|
$._quoted_content_i_curly,
|
|
$._quoted_content_i_square,
|
|
$._quoted_content_i_angle,
|
|
$._quoted_content_i_bar,
|
|
$._quoted_content_i_slash,
|
|
|
|
$._quoted_content_single,
|
|
$._quoted_content_double,
|
|
$._quoted_content_heredoc_single,
|
|
$._quoted_content_heredoc_double,
|
|
$._quoted_content_parenthesis,
|
|
$._quoted_content_curly,
|
|
$._quoted_content_square,
|
|
$._quoted_content_angle,
|
|
$._quoted_content_bar,
|
|
$._quoted_content_slash,
|
|
|
|
$._keyword_special_literal,
|
|
$._atom_start,
|
|
$._keyword_end,
|
|
|
|
$._newline_before_do,
|
|
$._newline_before_binary_op,
|
|
// TODO explain this, basically we use newline ignored for newline before comment,
|
|
// as after the comment there is another newline that we then consider as usual (so
|
|
// that comments are skipped when considering newlines) <- this is chaotic need a better one
|
|
$._newline_before_comment,
|
|
|
|
// TODO explain this, basically we use this to force unary + and -
|
|
// if there is no spacing before the operand
|
|
$._before_unary_op,
|
|
|
|
$._not_in,
|
|
],
|
|
|
|
// TODO include in notes about why using extra for newline before binary op is fine
|
|
// TODO figure out how "\n" helps with the behaviour in
|
|
// [
|
|
// :a,
|
|
// ]
|
|
// and how it generally works with extras
|
|
extras: ($) => [
|
|
$.comment,
|
|
/\s|\\\n/,
|
|
$._newline_before_binary_op,
|
|
$._newline_before_comment,
|
|
"\n",
|
|
],
|
|
|
|
// TODO check if the parser doesn't compile without each conflict rule,
|
|
// otherwise it means we don't really use it (I think)
|
|
conflicts: ($) => [
|
|
// [$._newline_before_binary_op],
|
|
[$.binary_operator],
|
|
[$.keywords],
|
|
// [$.identifier, $.atom_literal],
|
|
[$._expression, $._local_call_with_arguments],
|
|
[
|
|
$._expression,
|
|
$._local_call_with_arguments,
|
|
$._local_call_without_arguments,
|
|
],
|
|
|
|
[$._remote_call, $._parenthesised_remote_call],
|
|
|
|
// stab clause `(x` may be either `(x;y) ->` or `(x, y) ->`
|
|
// [$.block, $._stab_clause_arguments],
|
|
[$.block, $._stab_clause_parentheses_arguments],
|
|
[$.block, $._stab_clause_arguments],
|
|
|
|
[$.block, $._stab_clause_arguments_expression],
|
|
|
|
// when in stab clause
|
|
[$.binary_operator, $._stab_clause_arguments_expression],
|
|
|
|
[$.tuple, $.map],
|
|
[$.tuple, $.map_content],
|
|
[$.operator_identifier, $.stab_clause],
|
|
[$.unary_operator, $.operator_identifier],
|
|
// [$.alias],
|
|
[$.body],
|
|
// [$.block, $._stab_clause_arguments],
|
|
// [$.block, $._stab_clause_parentheses_arguments],
|
|
// [$.block, $._stab_clause_parentheses_arguments],
|
|
[$.after_block],
|
|
[$.rescue_block],
|
|
[$.catch_block],
|
|
[$.else_block],
|
|
],
|
|
|
|
rules: {
|
|
source: ($) =>
|
|
seq(
|
|
optional($._terminator),
|
|
optional(
|
|
seq(sep1($._expression, $._terminator), optional($._terminator))
|
|
)
|
|
),
|
|
|
|
_terminator: ($) =>
|
|
prec.right(choice(seq(repeat("\n"), ";"), repeat1("\n"))),
|
|
|
|
_expression: ($) =>
|
|
choice(
|
|
$.block,
|
|
$._identifier,
|
|
$.alias,
|
|
$.integer,
|
|
$.float,
|
|
$.atom,
|
|
$.string,
|
|
$.charlist,
|
|
$.sigil,
|
|
$.list,
|
|
$.tuple,
|
|
$.bitstring,
|
|
$.map,
|
|
$.char,
|
|
$.boolean,
|
|
$.nil,
|
|
$.unary_operator,
|
|
$.binary_operator,
|
|
$.dot,
|
|
$.call,
|
|
$.access_call,
|
|
$.anonymous_function
|
|
),
|
|
|
|
block: ($) =>
|
|
prec(
|
|
PREC.WHEN_OP,
|
|
seq(
|
|
"(",
|
|
seq(
|
|
optional($._terminator),
|
|
optional(
|
|
seq(
|
|
sep1(choice($._expression, $.stab_clause), $._terminator),
|
|
optional($._terminator)
|
|
)
|
|
)
|
|
),
|
|
")"
|
|
)
|
|
),
|
|
|
|
_identifier: ($) =>
|
|
choice($.identifier, $.unused_identifier, $.special_identifier),
|
|
|
|
// Note: Elixir does not allow uppercase and titlecase letters
|
|
// as a variable starting character, but this regex would match
|
|
// those. This implies we would happily parse those cases, but
|
|
// since they are not valid Elixir it's unlikely to stumble upon
|
|
// them. TODO reword
|
|
// Ref: https://hexdocs.pm/elixir/master/unicode-syntax.html#variables
|
|
// TODO see if we need this in custom scanner in the end, if we do,
|
|
// then we may use the generation script from the original repo instead
|
|
// and make this an external (though I'd check if these custom unicode
|
|
// functions are efficient, does compiler optimise such checks?)
|
|
// identifier: ($) => choice(/[\p{ID_Start}][\p{ID_Continue}]*[?!]?/u, "..."),
|
|
// identifier: ($) => choice(/[\p{Ll}\p{Lm}\p{Lo}\p{Nl}\p{Other_ID_Start}][\p{ID_Continue}]*[?!]?/u, "..."),
|
|
// identifier: ($) => choice(/[\p{Ll}\p{Lm}\p{Lo}\p{Nl}][\p{ID_Continue}]*[?!]?/u, "..."),
|
|
//
|
|
// TODO elaborate, but basically
|
|
//
|
|
// we remove uppercase/titlecase letters from ID_Start as elixir does
|
|
// we remove the subtractions (we cannot express group subtraction in regex),
|
|
// but it's fine becaues at the time of writing these groups only really subtract
|
|
// a single character
|
|
// Unicode.Set.to_utf8_char "[[[:L:][:Nl:][:Other_ID_Start:]] & [[:Pattern_Syntax:][:Pattern_White_Space:]]]"
|
|
// we use hardcoded codepoints for \p{Other_ID_Start} since treesitter/js regexp doesn't
|
|
// recognise this group
|
|
//
|
|
// Other_ID_Start \u1885\u1886\u2118\u212E\u309B\u309C
|
|
// (this the list at the time of writing, it's for backward compatibility, see https://unicode.org/reports/tr31/#Backward_Compatibility)
|
|
identifier: ($) =>
|
|
choice(
|
|
/[\p{Ll}\p{Lm}\p{Lo}\p{Nl}\u1885\u1886\u2118\u212E\u309B\u309C][\p{ID_Continue}]*[?!]?/u,
|
|
"..."
|
|
),
|
|
|
|
unused_identifier: ($) => /_[\p{ID_Continue}]*[?!]?/u,
|
|
|
|
special_identifier: ($) => choice(...SPECIAL_IDENTIFIERS),
|
|
|
|
// We have a separate rule for single-part alias, so that we
|
|
// can use it in the keywords rule
|
|
alias: ($) => choice($._alias_single, $._alias_multi),
|
|
|
|
_alias_single: ($) => aliasPart,
|
|
|
|
_alias_multi: ($) => token(sep1(aliasPart, /\s*\.\s*/)),
|
|
|
|
integer: ($) => token(integer),
|
|
|
|
float: ($) => token(float),
|
|
|
|
atom: ($) =>
|
|
seq(
|
|
$._atom_start,
|
|
choice(
|
|
alias($._atom_word_literal, $.atom_literal),
|
|
alias($._atom_operator_literal, $.atom_literal),
|
|
alias($._atom_special_literal, $.atom_literal),
|
|
$._quoted_i_double,
|
|
$._quoted_i_single
|
|
)
|
|
),
|
|
|
|
// TODO comment on the unicode groups here
|
|
_atom_word_literal: ($) => /[\p{ID_Start}_][\p{ID_Continue}@]*[?!]?/u,
|
|
|
|
_atom_operator_literal: ($) => choice(...ATOM_OPERATOR_LITERALS),
|
|
|
|
_atom_special_literal: ($) => choice(...ATOM_SPECIAL_LITERALS),
|
|
|
|
// Defines $._quoted_content_i_{name} and $._quoted_content_{name} rules,
|
|
// content with and without interpolation respectively
|
|
...defineQuoted(`"`, `"`, "double"),
|
|
...defineQuoted(`'`, `'`, "single"),
|
|
...defineQuoted(`'''`, `'''`, "heredoc_single"),
|
|
...defineQuoted(`"""`, `"""`, "heredoc_double"),
|
|
...defineQuoted(`(`, `)`, "parenthesis"),
|
|
...defineQuoted(`{`, `}`, "curly"),
|
|
...defineQuoted(`[`, `]`, "square"),
|
|
...defineQuoted(`<`, `>`, "angle"),
|
|
...defineQuoted(`|`, `|`, "bar"),
|
|
...defineQuoted(`/`, `/`, "slash"),
|
|
|
|
string: ($) => choice($._quoted_i_double, $._quoted_i_heredoc_double),
|
|
|
|
charlist: ($) => choice($._quoted_i_single, $._quoted_i_heredoc_single),
|
|
|
|
interpolation: ($) => seq("#{", $._expression, "}"),
|
|
|
|
escape_sequence: ($) =>
|
|
token(
|
|
seq(
|
|
"\\",
|
|
choice(
|
|
// Single escaped character
|
|
/[^ux]/,
|
|
// Hex byte
|
|
/x[0-9a-fA-F]{1,2}/,
|
|
/x{[0-9a-fA-F]+}/,
|
|
// Unicode code point
|
|
/u{[0-9a-fA-F]+}/,
|
|
/u[0-9a-fA-F]{4}/
|
|
)
|
|
)
|
|
),
|
|
|
|
sigil: ($) =>
|
|
seq(
|
|
"~",
|
|
choice(
|
|
seq(
|
|
alias(token.immediate(/[a-z]/), $.sigil_name),
|
|
choice(
|
|
$._quoted_i_double,
|
|
$._quoted_i_single,
|
|
$._quoted_i_heredoc_single,
|
|
$._quoted_i_heredoc_double,
|
|
$._quoted_i_parenthesis,
|
|
$._quoted_i_curly,
|
|
$._quoted_i_square,
|
|
$._quoted_i_angle,
|
|
$._quoted_i_bar,
|
|
$._quoted_i_slash
|
|
)
|
|
),
|
|
seq(
|
|
alias(token.immediate(/[A-Z]/), $.sigil_name),
|
|
choice(
|
|
$._quoted_double,
|
|
$._quoted_single,
|
|
$._quoted_heredoc_single,
|
|
$._quoted_heredoc_double,
|
|
$._quoted_parenthesis,
|
|
$._quoted_curly,
|
|
$._quoted_square,
|
|
$._quoted_angle,
|
|
$._quoted_bar,
|
|
$._quoted_slash
|
|
)
|
|
)
|
|
),
|
|
optional(alias(token.immediate(/[a-zA-Z]+/), $.sigil_modifiers))
|
|
),
|
|
|
|
unary_operator: ($) =>
|
|
choice(
|
|
unaryOp($, prec, PREC.CAPTURE_OP, "&", $._capture_expression),
|
|
unaryOp($, prec, PREC.UNARY_OPS, choice(...UNARY_OPS)),
|
|
unaryOp($, prec, PREC.AT_OP, "@"),
|
|
// Capture operand like &1 is a special case with higher precedence
|
|
unaryOp($, prec, PREC.CAPTURE_OPERAND, "&", $.integer)
|
|
),
|
|
|
|
_capture_expression: ($) =>
|
|
choice(
|
|
// TODO sholud parenthesised expression be generally used (?)
|
|
// Precedence over block expression
|
|
prec(PREC.WHEN_OP + 1, seq("(", $._expression, ")")),
|
|
$._expression
|
|
),
|
|
|
|
binary_operator: ($) =>
|
|
choice(
|
|
binaryOp($, prec.left, PREC.IN_MATCH_OPS, choice(...IN_MATCH_OPS)),
|
|
binaryOp(
|
|
$,
|
|
prec.right,
|
|
PREC.WHEN_OP,
|
|
"when",
|
|
$._expression,
|
|
choice($._expression, $.keywords)
|
|
),
|
|
binaryOp($, prec.right, PREC.TYPE_OP, "::"),
|
|
binaryOp(
|
|
$,
|
|
prec.right,
|
|
PREC.BAR_OP,
|
|
"|",
|
|
$._expression,
|
|
choice($._expression, $.keywords)
|
|
),
|
|
binaryOp($, prec.right, PREC.ASSOC_OP, "=>"),
|
|
binaryOp($, prec.right, PREC.MATCH_OP, "="),
|
|
binaryOp($, prec.left, PREC.OR_OPS, choice(...OR_OPS)),
|
|
binaryOp($, prec.left, PREC.AND_OPS, choice(...AND_OPS)),
|
|
binaryOp($, prec.left, PREC.COMP_OPS, choice(...COMP_OPS)),
|
|
binaryOp($, prec.left, PREC.REL_OPS, choice(...REL_OPS)),
|
|
binaryOp($, prec.left, PREC.ARROW_OPS, choice(...ARROW_OPS)),
|
|
binaryOp($, prec.left, PREC.IN_OPS, choice("in", $._not_in)),
|
|
binaryOp($, prec.left, PREC.XOR_OP, "^^^"),
|
|
binaryOp($, prec.right, PREC.TERNARY_OP, "//"),
|
|
binaryOp($, prec.right, PREC.CONCAT_OPS, choice(...CONCAT_OPS)),
|
|
binaryOp($, prec.left, PREC.ADD_OPS, choice(...ADD_OPS)),
|
|
binaryOp($, prec.left, PREC.MULT_OPS, choice(...MULT_OPS)),
|
|
binaryOp($, prec.left, PREC.POWER_OP, "**"),
|
|
// Operator with arity
|
|
binaryOp(
|
|
$,
|
|
prec.left,
|
|
PREC.MULT_OPS,
|
|
"/",
|
|
$.operator_identifier,
|
|
$.integer
|
|
)
|
|
),
|
|
|
|
operator_identifier: ($) =>
|
|
// Operators with the following changes:
|
|
// * exclude "=>" since it's not a valid atom/operator identifier anyway (valid only in map)
|
|
// * we exclude // since it's only valid after ..
|
|
// * we remove "-" and "+" since they are both unary and binary
|
|
|
|
// We use the same precedence as unary operators, so that a sequence
|
|
// like `& /` is a conflict and is resolved via $.conflicts
|
|
// (could be be either `& / 2` or `& / / 2`)
|
|
choice(
|
|
// Unary operators
|
|
prec(PREC.CAPTURE_OP, "&"),
|
|
prec(PREC.UNARY_OPS, choice(...UNARY_OPS)),
|
|
prec(PREC.AT_OP, "@"),
|
|
// Binary operators
|
|
...IN_MATCH_OPS,
|
|
"when",
|
|
"::",
|
|
"|",
|
|
"=",
|
|
...OR_OPS,
|
|
...AND_OPS,
|
|
...COMP_OPS,
|
|
...REL_OPS,
|
|
...ARROW_OPS,
|
|
"in",
|
|
$._not_in,
|
|
"^^",
|
|
...CONCAT_OPS,
|
|
...MULT_OPS,
|
|
"**",
|
|
"->",
|
|
"."
|
|
),
|
|
|
|
dot: ($) =>
|
|
prec(
|
|
PREC.DOT_OP,
|
|
seq(choice($._expression), ".", choice($.alias, $.tuple))
|
|
),
|
|
|
|
keywords: ($) => sep1($.pair, ","),
|
|
|
|
pair: ($) => seq($.keyword, $._expression),
|
|
|
|
keyword: ($) =>
|
|
seq(
|
|
// Tree-sitter doesn't consider ambiguities within individual
|
|
// tokens (in this case regexps). So both in [a] and [a: 1] it
|
|
// would always parse "a" as the same node (based on whether
|
|
// $.identifier or $.atom_literal) is listed first in the rules.
|
|
// However, since identifiers and alias parts are valid atom
|
|
// literals, we can list them here, in which case the parser will
|
|
// consider all paths and pick the valid one.
|
|
// Also see https://github.com/tree-sitter/tree-sitter/issues/518
|
|
choice(
|
|
alias($._atom_word_literal, $.atom_literal),
|
|
alias($._atom_operator_literal, $.atom_literal),
|
|
alias($._keyword_special_literal, $.atom_literal),
|
|
alias($.identifier, $.atom_literal),
|
|
alias($.unused_identifier, $.atom_literal),
|
|
alias($.special_identifier, $.atom_literal),
|
|
alias($._alias_single, $.atom_literal),
|
|
alias(choice(...RESERVED_WORD_TOKENS), $.atom_literal),
|
|
$._quoted_i_double,
|
|
$._quoted_i_single
|
|
),
|
|
$._keyword_end
|
|
),
|
|
|
|
list: ($) => seq("[", optional($._items_with_trailing_separator), "]"),
|
|
|
|
tuple: ($) => seq("{", optional($._items_with_trailing_separator), "}"),
|
|
|
|
bitstring: ($) =>
|
|
seq("<<", optional($._items_with_trailing_separator), ">>"),
|
|
|
|
map: ($) => seq("%", optional($.struct), "{", optional($.map_content), "}"),
|
|
|
|
struct: ($) =>
|
|
prec.left(
|
|
choice(
|
|
$.alias,
|
|
$.atom,
|
|
$._identifier,
|
|
$.unary_operator,
|
|
$.dot,
|
|
alias($._parenthesised_call, $.call)
|
|
)
|
|
),
|
|
|
|
map_content: ($) => $._items_with_trailing_separator,
|
|
|
|
_items_with_trailing_separator: ($) =>
|
|
seq(
|
|
choice(
|
|
seq(sep1($._expression, ","), optional(seq(",", $.keywords))),
|
|
$.keywords
|
|
),
|
|
optional(",")
|
|
),
|
|
|
|
char: ($) => /\?(.|\\.)/,
|
|
|
|
boolean: ($) => choice("true", "false"),
|
|
|
|
nil: ($) => "nil",
|
|
|
|
call: ($) =>
|
|
choice(
|
|
$._local_call_with_arguments,
|
|
$._parenthesised_local_call_with_arguments,
|
|
$._local_call_without_arguments,
|
|
$._remote_call,
|
|
$._parenthesised_remote_call,
|
|
$._anonymous_call,
|
|
$._call_on_call
|
|
),
|
|
|
|
_parenthesised_call: ($) =>
|
|
choice(
|
|
$._parenthesised_local_call_with_arguments,
|
|
$._parenthesised_remote_call,
|
|
$._anonymous_call,
|
|
$._call_on_call
|
|
),
|
|
|
|
_call_on_call: ($) =>
|
|
prec.left(
|
|
seq(
|
|
alias(
|
|
choice(
|
|
$._parenthesised_local_call_with_arguments,
|
|
$._parenthesised_remote_call,
|
|
$._anonymous_call
|
|
),
|
|
$.call
|
|
),
|
|
// arguments in parentheses
|
|
// alias($._local_or_remote_arguments, $.arguments),
|
|
// TODO just make nonimmediate/immediate in the name
|
|
alias($._anonymous_arguments, $.arguments),
|
|
optional(seq(optional($._newline_before_do), $.do_block))
|
|
)
|
|
),
|
|
|
|
_local_call_with_arguments: ($) =>
|
|
// Given `x + y` it can be interpreted either as a binary operator
|
|
// or a call with unary operator. This is an actual ambiguity, so
|
|
// we use dynamic precedence to penalize call
|
|
// prec.dynamic(
|
|
// TODO ideally we would penalize whitespace after unary op,
|
|
// so that x + y is binary op and x +y is unary op, to reflect
|
|
// Elixir ast
|
|
// -1,
|
|
prec.left(
|
|
seq(
|
|
$._identifier,
|
|
alias($._call_arguments, $.arguments),
|
|
// TODO include this in notes:
|
|
// We use external scanner for _newline_before_do because
|
|
// this way we can lookahead through any whitespace
|
|
// (especially newlines). We cannot simply use repeat("\n")
|
|
// and conflict with expression end, because this function
|
|
// rule has left precedence (so that do-end sticks to the outermost
|
|
// call), and thus expression end would always be preferred
|
|
optional(seq(optional($._newline_before_do), $.do_block))
|
|
// optional($.do_block)
|
|
)
|
|
// )
|
|
),
|
|
|
|
_parenthesised_local_call_with_arguments: ($) =>
|
|
// Given `x + y` it can be interpreted either as a binary operator
|
|
// or a call with unary operator. This is an actual ambiguity, so
|
|
// we use dynamic precedence to penalize call
|
|
// prec.dynamic(
|
|
// TODO ideally we would penalize whitespace after unary op,
|
|
// so that x + y is binary op and x +y is unary op, to reflect
|
|
// Elixir ast
|
|
// -1,
|
|
prec.left(
|
|
seq(
|
|
$._identifier,
|
|
alias($._parenthesised_call_arguments, $.arguments),
|
|
// TODO include this in notes:
|
|
// We use external scanner for _newline_before_do because
|
|
// this way we can lookahead through any whitespace
|
|
// (especially newlines). We cannot simply use repeat("\n")
|
|
// and conflict with expression end, because this function
|
|
// rule has left precedence (so that do-end sticks to the outermost
|
|
// call), and thus expression end would always be preferred
|
|
optional(seq(optional($._newline_before_do), $.do_block))
|
|
// optional($.do_block)
|
|
)
|
|
// )
|
|
),
|
|
|
|
_local_call_without_arguments: ($) =>
|
|
// We use lower precedence, so given `fun arg do end`
|
|
// we don't tokenize `arg` as a call
|
|
|
|
// we actually need a conflict because of `foo bar do end` vs `foo bar do: 1`
|
|
// prec(-1,
|
|
prec.dynamic(-1, seq($._identifier, $.do_block)),
|
|
// )
|
|
_remote_call: ($) =>
|
|
prec.left(
|
|
seq(
|
|
alias($._remote_dot, $.dot),
|
|
optional(alias($._call_arguments, $.arguments)),
|
|
optional(seq(optional($._newline_before_do), $.do_block))
|
|
// optional($.do_block)
|
|
)
|
|
),
|
|
|
|
_parenthesised_remote_call: ($) =>
|
|
prec.left(
|
|
seq(
|
|
alias($._remote_dot, $.dot),
|
|
alias($._parenthesised_call_arguments, $.arguments),
|
|
optional(seq(optional($._newline_before_do), $.do_block))
|
|
// optional($.do_block)
|
|
)
|
|
),
|
|
|
|
_remote_dot: ($) =>
|
|
prec(
|
|
PREC.DOT_OP,
|
|
seq(
|
|
$._expression,
|
|
".",
|
|
// TODO can also be string, anything else?
|
|
// compare with the other parser
|
|
// TODO we don't want to support heredoc though
|
|
choice(
|
|
$._identifier,
|
|
alias(choice(...RESERVED_WORD_TOKENS), $.identifier),
|
|
$.operator_identifier,
|
|
alias($._quoted_i_double, $.string),
|
|
alias($._quoted_i_single, $.charlist)
|
|
)
|
|
)
|
|
),
|
|
|
|
_parenthesised_call_arguments: ($) =>
|
|
seq(token.immediate("("), optional($._call_arguments), ")"),
|
|
|
|
_anonymous_call: ($) =>
|
|
seq(
|
|
alias($._anonymous_dot, $.dot),
|
|
alias($._anonymous_arguments, $.arguments)
|
|
),
|
|
|
|
_anonymous_dot: ($) => prec(PREC.DOT_OP, seq($._expression, ".")),
|
|
|
|
_anonymous_arguments: ($) => seq("(", optional($._call_arguments), ")"),
|
|
|
|
_call_arguments: ($) =>
|
|
// Right precedence ensures that `fun1 fun2 x, y` is treated
|
|
// as `fun1(fun2(x, y))` and not `fun1(fun2(x), y)
|
|
prec.right(
|
|
seq(
|
|
choice(
|
|
seq(
|
|
sep1($._expression, ","),
|
|
optional(seq(",", $.keywords, optional(",")))
|
|
),
|
|
seq($.keywords, optional(","))
|
|
)
|
|
)
|
|
),
|
|
|
|
access_call: ($) =>
|
|
prec(
|
|
PREC.ACCESS,
|
|
seq($._expression, token.immediate("["), $._expression, "]")
|
|
),
|
|
|
|
do_block: ($) =>
|
|
seq(
|
|
sugarBlock($, "do"),
|
|
repeat(
|
|
choice($.after_block, $.rescue_block, $.catch_block, $.else_block)
|
|
),
|
|
"end"
|
|
),
|
|
|
|
after_block: ($) => sugarBlock($, "after"),
|
|
|
|
rescue_block: ($) => sugarBlock($, "rescue"),
|
|
|
|
catch_block: ($) => sugarBlock($, "catch"),
|
|
|
|
else_block: ($) => sugarBlock($, "else"),
|
|
|
|
// Specify right precedence, so that we consume as much as we can
|
|
stab_clause: ($) =>
|
|
prec.right(seq(optional($._stab_clause_left), "->", optional($.body))),
|
|
|
|
_stab_clause_left: ($) =>
|
|
choice(
|
|
// Note the first option has higher precedence, TODO clarify
|
|
alias($._stab_clause_parentheses_arguments, $.arguments),
|
|
// TODO naming/cleanup
|
|
alias(
|
|
$._stab_clause_parentheses_arguments_with_guard,
|
|
$.binary_operator
|
|
),
|
|
alias($._stab_clause_arguments, $.arguments),
|
|
alias($._stab_clause_arguments_with_guard, $.binary_operator)
|
|
),
|
|
|
|
_stab_clause_parentheses_arguments: ($) =>
|
|
// `(1) ->` may be interpreted either as block argument
|
|
// or argument in parentheses and we use dynamic precedence
|
|
// to favour the latter
|
|
prec(
|
|
PREC.WHEN_OP,
|
|
prec.dynamic(1, seq("(", optional($._stab_clause_arguments), ")"))
|
|
),
|
|
_stab_clause_parentheses_arguments_with_guard: ($) =>
|
|
seq(
|
|
alias($._stab_clause_parentheses_arguments, $.arguments),
|
|
"when",
|
|
$._expression
|
|
),
|
|
|
|
_stab_clause_arguments_with_guard: ($) =>
|
|
// `a when b ->` may be interpted either such that `a when b` is an argument
|
|
// or a guard binary operator with argument `a` and right operand `b`,
|
|
// we use dynamic precedence to favour the latter
|
|
prec.dynamic(
|
|
1,
|
|
seq(alias($._stab_clause_arguments, $.arguments), "when", $._expression)
|
|
),
|
|
_stab_clause_arguments: ($) =>
|
|
// TODO this is a variant of _items_with_trailing_separator, cleanup
|
|
choice(
|
|
seq(
|
|
sep1($._stab_clause_arguments_expression, ","),
|
|
optional(seq(",", $.keywords))
|
|
),
|
|
$.keywords
|
|
),
|
|
|
|
_stab_clause_arguments_expression: ($) =>
|
|
// Note here we use the same precedence as when operator,
|
|
// so we get a conflict and resolve it dynamically
|
|
prec(PREC.WHEN_OP, $._expression),
|
|
body: ($) =>
|
|
seq(
|
|
choice(
|
|
seq($._terminator, sep($._expression, $._terminator)),
|
|
sep1($._expression, $._terminator)
|
|
),
|
|
optional($._terminator)
|
|
),
|
|
|
|
anonymous_function: ($) =>
|
|
seq(
|
|
"fn",
|
|
optional($._terminator),
|
|
sep1($.stab_clause, $._terminator),
|
|
"end"
|
|
),
|
|
|
|
// A comment may be anywhere, we give it a lower precedence,
|
|
// so it doesn't intercept sequences such as interpolation
|
|
comment: ($) => token(prec(-1, seq("#", /.*/))),
|
|
},
|
|
});
|
|
|
|
function sep1(rule, separator) {
|
|
return seq(rule, repeat(seq(separator, rule)));
|
|
}
|
|
|
|
function sep(rule, separator) {
|
|
return optional(sep1(rule, separator));
|
|
}
|
|
|
|
function unaryOp($, assoc, precedence, operator, right = null) {
|
|
return assoc(
|
|
precedence,
|
|
// TODO clarify, we use lower precedence, so given `x + y`,
|
|
// which can be interpreted as either `x + y` or `x(+y)`
|
|
// we favour the former. The only exception is when
|
|
// _before_unary_op matches which forces the latter interpretation
|
|
// in case like `x +y`
|
|
prec.dynamic(
|
|
-1,
|
|
seq(
|
|
optional($._before_unary_op),
|
|
field("operator", operator),
|
|
right || $._expression
|
|
)
|
|
)
|
|
);
|
|
}
|
|
|
|
function binaryOp($, assoc, precedence, operator, left = null, right = null) {
|
|
return assoc(
|
|
precedence,
|
|
seq(
|
|
field("left", left || $._expression),
|
|
field("operator", operator),
|
|
field("right", right || $._expression)
|
|
)
|
|
);
|
|
}
|
|
|
|
function sugarBlock($, start) {
|
|
return seq(
|
|
start,
|
|
optional($._terminator),
|
|
optional(
|
|
choice(
|
|
sep1(choice($.stab_clause), $._terminator),
|
|
seq(sep1(choice($._expression), $._terminator), optional($._terminator))
|
|
)
|
|
)
|
|
);
|
|
}
|
|
|
|
function defineQuoted(start, end, name) {
|
|
return {
|
|
[`_quoted_i_${name}`]: ($) =>
|
|
seq(
|
|
start,
|
|
repeat(
|
|
choice(
|
|
// TODO rename the extenrals to _content
|
|
alias($[`_quoted_content_i_${name}`], $.string_content),
|
|
$.interpolation,
|
|
$.escape_sequence
|
|
)
|
|
),
|
|
end
|
|
),
|
|
|
|
[`_quoted_${name}`]: ($) =>
|
|
seq(
|
|
start,
|
|
repeat(
|
|
choice(
|
|
// TODO rename the extenrals to _content
|
|
alias($[`_quoted_content_${name}`], $.string_content),
|
|
// It's always possible to escape the end delimiter
|
|
$.escape_sequence
|
|
)
|
|
),
|
|
end
|
|
),
|
|
};
|
|
}
|