Initial parser implementation

2021-09-25 02:23:37 +02:00 · 2021-09-25 02:23:37 +02:00 · dad92d2b87
commit dad92d2b87
parent 3f19b87c70
31 changed files with 415172 additions and 337 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,8 @@
+# Deps
 /node_modules/
-/build/
+
+# Temporary files
 /tmp/
+
+# Temporary files generated by Tree-sitter
+log.html
--- a/binding.gyp
+++ b/binding.gyp
@ -9,7 +9,7 @@
      "sources": [
        "bindings/node/binding.cc",
        "src/parser.c",
-        # If your language uses an external scanner, add it here.
+        "src/scanner.cc"
      ],
      "cflags_c": [
        "-std=c99",
--- a/grammar.js
+++ b/grammar.js
@ -1,7 +1,921 @@
+// Operator precedence:
+// * https://hexdocs.pm/elixir/master/operators.html
+// * https://github.com/elixir-lang/elixir/blob/master/lib/elixir/src/elixir_parser.yrl
+const PREC = {
+  IN_MATCH_OPS: 10,
+  WHEN_OP: 20,
+  TYPE_OP: 30,
+  BAR_OP: 40,
+  ASSOC_OP: 50,
+  CAPTURE_OP: 60,
+  MATCH_OP: 70,
+  OR_OPS: 80,
+  AND_OPS: 90,
+  COMP_OPS: 100,
+  REL_OPS: 110,
+  ARROW_OPS: 120,
+  IN_OPS: 130,
+  XOR_OP: 140,
+  TERNARY_OP: 150,
+  CONCAT_OPS: 160,
+  ADD_OPS: 170,
+  MULT_OPS: 180,
+  POWER_OP: 190,
+  UNARY_OPS: 200,
+  ACCESS: 205,
+  DOT_OP: 210,
+  AT_OP: 220,
+  CAPTURE_OPERAND: 235,
+};
+
+const IN_MATCH_OPS = ["<-", "\\\\"];
+const OR_OPS = ["||", "|||", "or"];
+const AND_OPS = ["&&", "&&&", "and"];
+const COMP_OPS = ["==", "!=", "=~", "===", "!=="];
+const REL_OPS = ["<", ">", "<=", ">="];
+const ARROW_OPS = ["|>", "<<<", ">>>", "<<~", "~>>", "<~", "~>", "<~>", "<|>"];
+const IN_OPS = ["in", "not in"];
+const CONCAT_OPS = ["++", "--", "+++", "---", "..", "<>"];
+const ADD_OPS = ["+", "-"];
+const MULT_OPS = ["*", "/"];
+const UNARY_OPS = ["+", "-", "!", "^", "~~~", "not"];
+
+const ALL_OPS = [
+  ["->", "when", "::", "|", "=>", "&", "=", "^^^", "//", "**", ".", "@"],
+  IN_MATCH_OPS,
+  OR_OPS,
+  AND_OPS,
+  COMP_OPS,
+  REL_OPS,
+  ARROW_OPS,
+  IN_OPS,
+  CONCAT_OPS,
+  ADD_OPS,
+  MULT_OPS,
+  UNARY_OPS,
+].flat();
+
+// Ignore word literals and "=>" which is not a valid atom
+const ATOM_OPERATOR_LITERALS = ALL_OPS.filter(
+  (operator) => !/[a-z]/.test(operator) && operator !== "=>"
+);
+
+// Note that for keywords we use external scanner (KEYWORD_SPECIAL_LITERAL),
+// so it should be kept in sync
+const ATOM_SPECIAL_LITERALS = ["...", "%{}", "{}", "%", "<<>>", "..//"];
+
+// Word tokens used directly in the grammar
+const RESERVED_WORD_TOKENS = [
+  // Operators
+  ["and", "in", "not", "or", "when"],
+  // Literals
+  ["true", "false", "nil"],
+  // Other
+  ["after", "catch", "do", "else", "end", "fn", "rescue"],
+].flat();
+
+const SPECIAL_IDENTIFIERS = [
+  "__MODULE__",
+  "__DIR__",
+  "__ENV__",
+  "__CALLER__",
+  "__STACKTRACE__",
+];
+
+// Numbers
+
+const DIGITS = /[0-9]+/;
+const BIN_DIGITS = /[0-1]+/;
+const OCT_DIGITS = /[0-7]+/;
+const HEX_DIGITS = /[0-9a-fA-F]+/;
+
+const numberDec = sep1(DIGITS, "_");
+const numberBin = seq("0b", sep1(BIN_DIGITS, "_"));
+const numberOct = seq("0o", sep1(OCT_DIGITS, "_"));
+const numberHex = seq("0x", sep1(HEX_DIGITS, "_"));
+
+const integer = choice(numberDec, numberBin, numberOct, numberHex);
+
+const floatScientificPart = seq(/[eE]/, optional(choice("-", "+")), integer);
+const float = seq(numberDec, ".", numberDec, optional(floatScientificPart));
+
+const aliasPart = /[A-Z][_a-zA-Z0-9]*/;
+
 module.exports = grammar({
  name: "elixir",

+  // TODO describe stuff (also in the separate notes doc add clarification
+  // how we use this verbose tokens to avoid needing scanner state)
+  externals: ($) => [
+    $._quoted_content_i_single,
+    $._quoted_content_i_double,
+    $._quoted_content_i_heredoc_single,
+    $._quoted_content_i_heredoc_double,
+    $._quoted_content_i_parenthesis,
+    $._quoted_content_i_curly,
+    $._quoted_content_i_square,
+    $._quoted_content_i_angle,
+    $._quoted_content_i_bar,
+    $._quoted_content_i_slash,
+
+    $._quoted_content_single,
+    $._quoted_content_double,
+    $._quoted_content_heredoc_single,
+    $._quoted_content_heredoc_double,
+    $._quoted_content_parenthesis,
+    $._quoted_content_curly,
+    $._quoted_content_square,
+    $._quoted_content_angle,
+    $._quoted_content_bar,
+    $._quoted_content_slash,
+
+    $._keyword_special_literal,
+    $._atom_start,
+    $._keyword_end,
+
+    $._newline_before_do,
+    $._newline_before_binary_op,
+    // TODO explain this, basically we use newline ignored for newline before comment,
+    // as after the comment there is another newline that we then consider as usual (so
+    // that comments are skipped when considering newlines) <- this is chaotic need a better one
+    $._newline_before_comment,
+
+    // TODO explain this, basically we use this to force unary + and -
+    // if there is no spacing before the operand
+    $._before_unary_op,
+
+    $._not_in,
+  ],
+
+  // TODO include in notes about why using extra for newline before binary op is fine
+  // TODO figure out how "\n" helps with the behaviour in
+  // [
+  //   :a,
+  // ]
+  // and how it generally works with extras
+  extras: ($) => [
+    $.comment,
+    /\s|\\\n/,
+    $._newline_before_binary_op,
+    $._newline_before_comment,
+    "\n",
+  ],
+
+  // TODO check if the parser doesn't compile without each conflict rule,
+  // otherwise it means we don't really use it (I think)
+  conflicts: ($) => [
+    // [$._newline_before_binary_op],
+    [$.binary_operator],
+    [$.keywords],
+    // [$.identifier, $.atom_literal],
+    [$._expression, $._local_call_with_arguments],
+    [
+      $._expression,
+      $._local_call_with_arguments,
+      $._local_call_without_arguments,
+    ],
+
+    [$._remote_call, $._parenthesised_remote_call],
+
+    // stab clause `(x` may be either `(x;y) ->` or `(x, y) ->`
+    // [$.block, $._stab_clause_arguments],
+    [$.block, $._stab_clause_parentheses_arguments],
+    [$.block, $._stab_clause_arguments],
+
+    [$.block, $._stab_clause_arguments_expression],
+
+    // when in stab clause
+    [$.binary_operator, $._stab_clause_arguments_expression],
+
+    [$.tuple, $.map],
+    [$.tuple, $.map_content],
+    [$.operator_identifier, $.stab_clause],
+    [$.unary_operator, $.operator_identifier],
+    // [$.alias],
+    [$.body],
+    // [$.block, $._stab_clause_arguments],
+    // [$.block, $._stab_clause_parentheses_arguments],
+    // [$.block, $._stab_clause_parentheses_arguments],
+    [$.after_block],
+    [$.rescue_block],
+    [$.catch_block],
+    [$.else_block],
+  ],
+
  rules: {
-    source: ($) => "TODO",
+    source: ($) =>
+      seq(
+        optional($._terminator),
+        optional(
+          seq(sep1($._expression, $._terminator), optional($._terminator))
+        )
+      ),
+
+    _terminator: ($) =>
+      prec.right(choice(seq(repeat("\n"), ";"), repeat1("\n"))),
+
+    _expression: ($) =>
+      choice(
+        $.block,
+        $._identifier,
+        $.alias,
+        $.integer,
+        $.float,
+        $.atom,
+        $.string,
+        $.charlist,
+        $.sigil,
+        $.list,
+        $.tuple,
+        $.bitstring,
+        $.map,
+        $.char,
+        $.boolean,
+        $.nil,
+        $.unary_operator,
+        $.binary_operator,
+        $.dot,
+        $.call,
+        $.access_call,
+        $.anonymous_function
+      ),
+
+    block: ($) =>
+      prec(
+        PREC.WHEN_OP,
+        seq(
+          "(",
+          seq(
+            optional($._terminator),
+            optional(
+              seq(
+                sep1(choice($._expression, $.stab_clause), $._terminator),
+                optional($._terminator)
+              )
+            )
+          ),
+          ")"
+        )
+      ),
+
+    _identifier: ($) =>
+      choice($.identifier, $.unused_identifier, $.special_identifier),
+
+    // Note: Elixir does not allow uppercase and titlecase letters
+    // as a variable starting character, but this regex would match
+    // those. This implies we would happily parse those cases, but
+    // since they are not valid Elixir it's unlikely to stumble upon
+    // them. TODO reword
+    // Ref: https://hexdocs.pm/elixir/master/unicode-syntax.html#variables
+    // TODO see if we need this in custom scanner in the end, if we do,
+    // then we may use the generation script from the original repo instead
+    // and make this an external (though I'd check if these custom unicode
+    // functions are efficient, does compiler optimise such checks?)
+    // identifier: ($) => choice(/[\p{ID_Start}][\p{ID_Continue}]*[?!]?/u, "..."),
+    // identifier: ($) => choice(/[\p{Ll}\p{Lm}\p{Lo}\p{Nl}\p{Other_ID_Start}][\p{ID_Continue}]*[?!]?/u, "..."),
+    // identifier: ($) => choice(/[\p{Ll}\p{Lm}\p{Lo}\p{Nl}][\p{ID_Continue}]*[?!]?/u, "..."),
+    //
+    // TODO elaborate, but basically
+    //
+    // we remove uppercase/titlecase letters from ID_Start as elixir does
+    // we remove the subtractions (we cannot express group subtraction in regex),
+    //   but it's fine becaues at the time of writing these groups only really subtract
+    //   a single character
+    //   Unicode.Set.to_utf8_char "[[[:L:][:Nl:][:Other_ID_Start:]] & [[:Pattern_Syntax:][:Pattern_White_Space:]]]"
+    // we use hardcoded codepoints for \p{Other_ID_Start} since treesitter/js regexp doesn't
+    //   recognise this group
+    //
+    // Other_ID_Start \u1885\u1886\u2118\u212E\u309B\u309C
+    //   (this the list at the time of writing, it's for backward compatibility, see https://unicode.org/reports/tr31/#Backward_Compatibility)
+    identifier: ($) =>
+      choice(
+        /[\p{Ll}\p{Lm}\p{Lo}\p{Nl}\u1885\u1886\u2118\u212E\u309B\u309C][\p{ID_Continue}]*[?!]?/u,
+        "..."
+      ),
+
+    unused_identifier: ($) => /_[\p{ID_Continue}]*[?!]?/u,
+
+    special_identifier: ($) => choice(...SPECIAL_IDENTIFIERS),
+
+    // We have a separate rule for single-part alias, so that we
+    // can use it in the keywords rule
+    alias: ($) => choice($._alias_single, $._alias_multi),
+
+    _alias_single: ($) => aliasPart,
+
+    _alias_multi: ($) => token(sep1(aliasPart, /\s*\.\s*/)),
+
+    integer: ($) => token(integer),
+
+    float: ($) => token(float),
+
+    atom: ($) =>
+      seq(
+        $._atom_start,
+        choice(
+          alias($._atom_word_literal, $.atom_literal),
+          alias($._atom_operator_literal, $.atom_literal),
+          alias($._atom_special_literal, $.atom_literal),
+          $._quoted_i_double,
+          $._quoted_i_single
+        )
+      ),
+
+    // TODO comment on the unicode groups here
+    _atom_word_literal: ($) => /[\p{ID_Start}_][\p{ID_Continue}@]*[?!]?/u,
+
+    _atom_operator_literal: ($) => choice(...ATOM_OPERATOR_LITERALS),
+
+    _atom_special_literal: ($) => choice(...ATOM_SPECIAL_LITERALS),
+
+    // Defines $._quoted_content_i_{name} and $._quoted_content_{name} rules,
+    // content with and without interpolation respectively
+    ...defineQuoted(`"`, `"`, "double"),
+    ...defineQuoted(`'`, `'`, "single"),
+    ...defineQuoted(`'''`, `'''`, "heredoc_single"),
+    ...defineQuoted(`"""`, `"""`, "heredoc_double"),
+    ...defineQuoted(`(`, `)`, "parenthesis"),
+    ...defineQuoted(`{`, `}`, "curly"),
+    ...defineQuoted(`[`, `]`, "square"),
+    ...defineQuoted(`<`, `>`, "angle"),
+    ...defineQuoted(`|`, `|`, "bar"),
+    ...defineQuoted(`/`, `/`, "slash"),
+
+    string: ($) => choice($._quoted_i_double, $._quoted_i_heredoc_double),
+
+    charlist: ($) => choice($._quoted_i_single, $._quoted_i_heredoc_single),
+
+    interpolation: ($) => seq("#{", $._expression, "}"),
+
+    escape_sequence: ($) =>
+      token(
+        seq(
+          "\\",
+          choice(
+            // Single escaped character
+            /[^ux]/,
+            // Hex byte
+            /x[0-9a-fA-F]{1,2}/,
+            /x{[0-9a-fA-F]+}/,
+            // Unicode code point
+            /u{[0-9a-fA-F]+}/,
+            /u[0-9a-fA-F]{4}/
+          )
+        )
+      ),
+
+    sigil: ($) =>
+      seq(
+        "~",
+        choice(
+          seq(
+            alias(token.immediate(/[a-z]/), $.sigil_name),
+            choice(
+              $._quoted_i_double,
+              $._quoted_i_single,
+              $._quoted_i_heredoc_single,
+              $._quoted_i_heredoc_double,
+              $._quoted_i_parenthesis,
+              $._quoted_i_curly,
+              $._quoted_i_square,
+              $._quoted_i_angle,
+              $._quoted_i_bar,
+              $._quoted_i_slash
+            )
+          ),
+          seq(
+            alias(token.immediate(/[A-Z]/), $.sigil_name),
+            choice(
+              $._quoted_double,
+              $._quoted_single,
+              $._quoted_heredoc_single,
+              $._quoted_heredoc_double,
+              $._quoted_parenthesis,
+              $._quoted_curly,
+              $._quoted_square,
+              $._quoted_angle,
+              $._quoted_bar,
+              $._quoted_slash
+            )
+          )
+        ),
+        optional(alias(token.immediate(/[a-zA-Z]+/), $.sigil_modifiers))
+      ),
+
+    unary_operator: ($) =>
+      choice(
+        unaryOp($, prec, PREC.CAPTURE_OP, "&", $._capture_expression),
+        unaryOp($, prec, PREC.UNARY_OPS, choice(...UNARY_OPS)),
+        unaryOp($, prec, PREC.AT_OP, "@"),
+        // Capture operand like &1 is a special case with higher precedence
+        unaryOp($, prec, PREC.CAPTURE_OPERAND, "&", $.integer)
+      ),
+
+    _capture_expression: ($) =>
+      choice(
+        // TODO sholud parenthesised expression be generally used (?)
+        // Precedence over block expression
+        prec(PREC.WHEN_OP + 1, seq("(", $._expression, ")")),
+        $._expression
+      ),
+
+    binary_operator: ($) =>
+      choice(
+        binaryOp($, prec.left, PREC.IN_MATCH_OPS, choice(...IN_MATCH_OPS)),
+        binaryOp(
+          $,
+          prec.right,
+          PREC.WHEN_OP,
+          "when",
+          $._expression,
+          choice($._expression, $.keywords)
+        ),
+        binaryOp($, prec.right, PREC.TYPE_OP, "::"),
+        binaryOp(
+          $,
+          prec.right,
+          PREC.BAR_OP,
+          "|",
+          $._expression,
+          choice($._expression, $.keywords)
+        ),
+        binaryOp($, prec.right, PREC.ASSOC_OP, "=>"),
+        binaryOp($, prec.right, PREC.MATCH_OP, "="),
+        binaryOp($, prec.left, PREC.OR_OPS, choice(...OR_OPS)),
+        binaryOp($, prec.left, PREC.AND_OPS, choice(...AND_OPS)),
+        binaryOp($, prec.left, PREC.COMP_OPS, choice(...COMP_OPS)),
+        binaryOp($, prec.left, PREC.REL_OPS, choice(...REL_OPS)),
+        binaryOp($, prec.left, PREC.ARROW_OPS, choice(...ARROW_OPS)),
+        binaryOp($, prec.left, PREC.IN_OPS, choice("in", $._not_in)),
+        binaryOp($, prec.left, PREC.XOR_OP, "^^^"),
+        binaryOp($, prec.right, PREC.TERNARY_OP, "//"),
+        binaryOp($, prec.right, PREC.CONCAT_OPS, choice(...CONCAT_OPS)),
+        binaryOp($, prec.left, PREC.ADD_OPS, choice(...ADD_OPS)),
+        binaryOp($, prec.left, PREC.MULT_OPS, choice(...MULT_OPS)),
+        binaryOp($, prec.left, PREC.POWER_OP, "**"),
+        // Operator with arity
+        binaryOp(
+          $,
+          prec.left,
+          PREC.MULT_OPS,
+          "/",
+          $.operator_identifier,
+          $.integer
+        )
+      ),
+
+    operator_identifier: ($) =>
+      // Operators with the following changes:
+      //   * exclude "=>" since it's not a valid atom/operator identifier anyway (valid only in map)
+      // * we exclude // since it's only valid after ..
+      // * we remove "-" and "+" since they are both unary and binary
+
+      // We use the same precedence as unary operators, so that a sequence
+      // like `& /` is a conflict and is resolved via $.conflicts
+      // (could be be either `& / 2` or `& / / 2`)
+      choice(
+        // Unary operators
+        prec(PREC.CAPTURE_OP, "&"),
+        prec(PREC.UNARY_OPS, choice(...UNARY_OPS)),
+        prec(PREC.AT_OP, "@"),
+        // Binary operators
+        ...IN_MATCH_OPS,
+        "when",
+        "::",
+        "|",
+        "=",
+        ...OR_OPS,
+        ...AND_OPS,
+        ...COMP_OPS,
+        ...REL_OPS,
+        ...ARROW_OPS,
+        "in",
+        $._not_in,
+        "^^",
+        ...CONCAT_OPS,
+        ...MULT_OPS,
+        "**",
+        "->",
+        "."
+      ),
+
+    dot: ($) =>
+      prec(
+        PREC.DOT_OP,
+        seq(choice($._expression), ".", choice($.alias, $.tuple))
+      ),
+
+    keywords: ($) => sep1($.pair, ","),
+
+    pair: ($) => seq($.keyword, $._expression),
+
+    keyword: ($) =>
+      seq(
+        // Tree-sitter doesn't consider ambiguities within individual
+        // tokens (in this case regexps). So both in [a] and [a: 1] it
+        // would always parse "a" as the same node (based on whether
+        // $.identifier or $.atom_literal) is listed first in the rules.
+        // However, since identifiers and alias parts are valid atom
+        // literals, we can list them here, in which case the parser will
+        // consider all paths and pick the valid one.
+        // Also see https://github.com/tree-sitter/tree-sitter/issues/518
+        choice(
+          alias($._atom_word_literal, $.atom_literal),
+          alias($._atom_operator_literal, $.atom_literal),
+          alias($._keyword_special_literal, $.atom_literal),
+          alias($.identifier, $.atom_literal),
+          alias($.unused_identifier, $.atom_literal),
+          alias($.special_identifier, $.atom_literal),
+          alias($._alias_single, $.atom_literal),
+          alias(choice(...RESERVED_WORD_TOKENS), $.atom_literal),
+          $._quoted_i_double,
+          $._quoted_i_single
+        ),
+        $._keyword_end
+      ),
+
+    list: ($) => seq("[", optional($._items_with_trailing_separator), "]"),
+
+    tuple: ($) => seq("{", optional($._items_with_trailing_separator), "}"),
+
+    bitstring: ($) =>
+      seq("<<", optional($._items_with_trailing_separator), ">>"),
+
+    map: ($) => seq("%", optional($.struct), "{", optional($.map_content), "}"),
+
+    struct: ($) =>
+      prec.left(
+        choice(
+          $.alias,
+          $.atom,
+          $._identifier,
+          $.unary_operator,
+          $.dot,
+          alias($._parenthesised_call, $.call)
+        )
+      ),
+
+    map_content: ($) => $._items_with_trailing_separator,
+
+    _items_with_trailing_separator: ($) =>
+      seq(
+        choice(
+          seq(sep1($._expression, ","), optional(seq(",", $.keywords))),
+          $.keywords
+        ),
+        optional(",")
+      ),
+
+    char: ($) => /\?(.|\\.)/,
+
+    boolean: ($) => choice("true", "false"),
+
+    nil: ($) => "nil",
+
+    call: ($) =>
+      choice(
+        $._local_call_with_arguments,
+        $._parenthesised_local_call_with_arguments,
+        $._local_call_without_arguments,
+        $._remote_call,
+        $._parenthesised_remote_call,
+        $._anonymous_call,
+        $._call_on_call
+      ),
+
+    _parenthesised_call: ($) =>
+      choice(
+        $._parenthesised_local_call_with_arguments,
+        $._parenthesised_remote_call,
+        $._anonymous_call,
+        $._call_on_call
+      ),
+
+    _call_on_call: ($) =>
+      prec.left(
+        seq(
+          alias(
+            choice(
+              $._parenthesised_local_call_with_arguments,
+              $._parenthesised_remote_call,
+              $._anonymous_call
+            ),
+            $.call
+          ),
+          // arguments in parentheses
+          // alias($._local_or_remote_arguments, $.arguments),
+          // TODO just make nonimmediate/immediate in the name
+          alias($._anonymous_arguments, $.arguments),
+          optional(seq(optional($._newline_before_do), $.do_block))
+        )
+      ),
+
+    _local_call_with_arguments: ($) =>
+      // Given `x + y` it can be interpreted either as a binary operator
+      // or a call with unary operator. This is an actual ambiguity, so
+      // we use dynamic precedence to penalize call
+      // prec.dynamic(
+      // TODO ideally we would penalize whitespace after unary op,
+      // so that x + y is binary op and x +y is unary op, to reflect
+      // Elixir ast
+      // -1,
+      prec.left(
+        seq(
+          $._identifier,
+          alias($._call_arguments, $.arguments),
+          // TODO include this in notes:
+          // We use external scanner for _newline_before_do because
+          // this way we can lookahead through any whitespace
+          // (especially newlines). We cannot simply use repeat("\n")
+          // and conflict with expression end, because this function
+          // rule has left precedence (so that do-end sticks to the outermost
+          // call), and thus expression end would always be preferred
+          optional(seq(optional($._newline_before_do), $.do_block))
+          // optional($.do_block)
+        )
+        // )
+      ),
+
+    _parenthesised_local_call_with_arguments: ($) =>
+      // Given `x + y` it can be interpreted either as a binary operator
+      // or a call with unary operator. This is an actual ambiguity, so
+      // we use dynamic precedence to penalize call
+      // prec.dynamic(
+      // TODO ideally we would penalize whitespace after unary op,
+      // so that x + y is binary op and x +y is unary op, to reflect
+      // Elixir ast
+      // -1,
+      prec.left(
+        seq(
+          $._identifier,
+          alias($._parenthesised_call_arguments, $.arguments),
+          // TODO include this in notes:
+          // We use external scanner for _newline_before_do because
+          // this way we can lookahead through any whitespace
+          // (especially newlines). We cannot simply use repeat("\n")
+          // and conflict with expression end, because this function
+          // rule has left precedence (so that do-end sticks to the outermost
+          // call), and thus expression end would always be preferred
+          optional(seq(optional($._newline_before_do), $.do_block))
+          // optional($.do_block)
+        )
+        // )
+      ),
+
+    _local_call_without_arguments: ($) =>
+      // We use lower precedence, so given `fun arg do end`
+      // we don't tokenize `arg` as a call
+
+      // we actually need a conflict because of `foo bar do end` vs `foo bar do: 1`
+      // prec(-1,
+      prec.dynamic(-1, seq($._identifier, $.do_block)),
+    // )
+    _remote_call: ($) =>
+      prec.left(
+        seq(
+          alias($._remote_dot, $.dot),
+          optional(alias($._call_arguments, $.arguments)),
+          optional(seq(optional($._newline_before_do), $.do_block))
+          // optional($.do_block)
+        )
+      ),
+
+    _parenthesised_remote_call: ($) =>
+      prec.left(
+        seq(
+          alias($._remote_dot, $.dot),
+          alias($._parenthesised_call_arguments, $.arguments),
+          optional(seq(optional($._newline_before_do), $.do_block))
+          // optional($.do_block)
+        )
+      ),
+
+    _remote_dot: ($) =>
+      prec(
+        PREC.DOT_OP,
+        seq(
+          $._expression,
+          ".",
+          // TODO can also be string, anything else?
+          // compare with the other parser
+          // TODO we don't want to support heredoc though
+          choice(
+            $._identifier,
+            alias(choice(...RESERVED_WORD_TOKENS), $.identifier),
+            $.operator_identifier,
+            alias($._quoted_i_double, $.string),
+            alias($._quoted_i_single, $.charlist)
+          )
+        )
+      ),
+
+    _parenthesised_call_arguments: ($) =>
+      seq(token.immediate("("), optional($._call_arguments), ")"),
+
+    _anonymous_call: ($) =>
+      seq(
+        alias($._anonymous_dot, $.dot),
+        alias($._anonymous_arguments, $.arguments)
+      ),
+
+    _anonymous_dot: ($) => prec(PREC.DOT_OP, seq($._expression, ".")),
+
+    _anonymous_arguments: ($) => seq("(", optional($._call_arguments), ")"),
+
+    _call_arguments: ($) =>
+      // Right precedence ensures that `fun1 fun2 x, y` is treated
+      // as `fun1(fun2(x, y))` and not `fun1(fun2(x), y)
+      prec.right(
+        seq(
+          choice(
+            seq(
+              sep1($._expression, ","),
+              optional(seq(",", $.keywords, optional(",")))
+            ),
+            seq($.keywords, optional(","))
+          )
+        )
+      ),
+
+    access_call: ($) =>
+      prec(
+        PREC.ACCESS,
+        seq($._expression, token.immediate("["), $._expression, "]")
+      ),
+
+    do_block: ($) =>
+      seq(
+        sugarBlock($, "do"),
+        repeat(
+          choice($.after_block, $.rescue_block, $.catch_block, $.else_block)
+        ),
+        "end"
+      ),
+
+    after_block: ($) => sugarBlock($, "after"),
+
+    rescue_block: ($) => sugarBlock($, "rescue"),
+
+    catch_block: ($) => sugarBlock($, "catch"),
+
+    else_block: ($) => sugarBlock($, "else"),
+
+    // Specify right precedence, so that we consume as much as we can
+    stab_clause: ($) =>
+      prec.right(seq(optional($._stab_clause_left), "->", optional($.body))),
+
+    _stab_clause_left: ($) =>
+      choice(
+        // Note the first option has higher precedence, TODO clarify
+        alias($._stab_clause_parentheses_arguments, $.arguments),
+        // TODO naming/cleanup
+        alias(
+          $._stab_clause_parentheses_arguments_with_guard,
+          $.binary_operator
+        ),
+        alias($._stab_clause_arguments, $.arguments),
+        alias($._stab_clause_arguments_with_guard, $.binary_operator)
+      ),
+
+    _stab_clause_parentheses_arguments: ($) =>
+      // `(1) ->` may be interpreted either as block argument
+      // or argument in parentheses and we use dynamic precedence
+      // to favour the latter
+      prec(
+        PREC.WHEN_OP,
+        prec.dynamic(1, seq("(", optional($._stab_clause_arguments), ")"))
+      ),
+    _stab_clause_parentheses_arguments_with_guard: ($) =>
+      seq(
+        alias($._stab_clause_parentheses_arguments, $.arguments),
+        "when",
+        $._expression
+      ),
+
+    _stab_clause_arguments_with_guard: ($) =>
+      // `a when b ->` may be interpted either such that `a when b` is an argument
+      // or a guard binary operator with argument `a` and right operand `b`,
+      // we use dynamic precedence to favour the latter
+      prec.dynamic(
+        1,
+        seq(alias($._stab_clause_arguments, $.arguments), "when", $._expression)
+      ),
+    _stab_clause_arguments: ($) =>
+      // TODO this is a variant of _items_with_trailing_separator, cleanup
+      choice(
+        seq(
+          sep1($._stab_clause_arguments_expression, ","),
+          optional(seq(",", $.keywords))
+        ),
+        $.keywords
+      ),
+
+    _stab_clause_arguments_expression: ($) =>
+      // Note here we use the same precedence as when operator,
+      // so we get a conflict and resolve it dynamically
+      prec(PREC.WHEN_OP, $._expression),
+    body: ($) =>
+      seq(
+        choice(
+          seq($._terminator, sep($._expression, $._terminator)),
+          sep1($._expression, $._terminator)
+        ),
+        optional($._terminator)
+      ),
+
+    anonymous_function: ($) =>
+      seq(
+        "fn",
+        optional($._terminator),
+        sep1($.stab_clause, $._terminator),
+        "end"
+      ),
+
+    // A comment may be anywhere, we give it a lower precedence,
+    // so it doesn't intercept sequences such as interpolation
+    comment: ($) => token(prec(-1, seq("#", /.*/))),
  },
 });
+
+function sep1(rule, separator) {
+  return seq(rule, repeat(seq(separator, rule)));
+}
+
+function sep(rule, separator) {
+  return optional(sep1(rule, separator));
+}
+
+function unaryOp($, assoc, precedence, operator, right = null) {
+  return assoc(
+    precedence,
+    // TODO clarify, we use lower precedence, so given `x + y`,
+    // which can be interpreted as either `x + y` or `x(+y)`
+    // we favour the former. The only exception is when
+    // _before_unary_op matches which forces the latter interpretation
+    // in case like `x +y`
+    prec.dynamic(
+      -1,
+      seq(
+        optional($._before_unary_op),
+        field("operator", operator),
+        right || $._expression
+      )
+    )
+  );
+}
+
+function binaryOp($, assoc, precedence, operator, left = null, right = null) {
+  return assoc(
+    precedence,
+    seq(
+      field("left", left || $._expression),
+      field("operator", operator),
+      field("right", right || $._expression)
+    )
+  );
+}
+
+function sugarBlock($, start) {
+  return seq(
+    start,
+    optional($._terminator),
+    optional(
+      choice(
+        sep1(choice($.stab_clause), $._terminator),
+        seq(sep1(choice($._expression), $._terminator), optional($._terminator))
+      )
+    )
+  );
+}
+
+function defineQuoted(start, end, name) {
+  return {
+    [`_quoted_i_${name}`]: ($) =>
+      seq(
+        start,
+        repeat(
+          choice(
+            // TODO rename the extenrals to _content
+            alias($[`_quoted_content_i_${name}`], $.string_content),
+            $.interpolation,
+            $.escape_sequence
+          )
+        ),
+        end
+      ),
+
+    [`_quoted_${name}`]: ($) =>
+      seq(
+        start,
+        repeat(
+          choice(
+            // TODO rename the extenrals to _content
+            alias($[`_quoted_content_${name}`], $.string_content),
+            // It's always possible to escape the end delimiter
+            $.escape_sequence
+          )
+        ),
+        end
+      ),
+  };
+}
--- a/src/grammar.json
+++ b/src/grammar.json
--- a/src/node-types.json
+++ b/src/node-types.json
--- a/src/parser.c
+++ b/src/parser.c
--- a/src/scanner.cc
+++ b/src/scanner.cc
@ -0,0 +1,844 @@
+#include <tree_sitter/parser.h>
+
+namespace {
+
+enum TokenType {
+  // TODO add a note that all QUOTE_* tokens are mutually exclusive
+  // i.e. the valid_symbols array contains at most one truthy of these
+  QUOTED_CONTENT_I_SINGLE,
+  QUOTED_CONTENT_I_DOUBLE,
+  QUOTED_CONTENT_I_HEREDOC_SINGLE,
+  QUOTED_CONTENT_I_HEREDOC_DOUBLE,
+  QUOTED_CONTENT_I_PARENTHESIS,
+  QUOTED_CONTENT_I_CURLY,
+  QUOTED_CONTENT_I_SQUARE,
+  QUOTED_CONTENT_I_ANGLE,
+  QUOTED_CONTENT_I_BAR,
+  QUOTED_CONTENT_I_SLASH,
+
+  QUOTED_CONTENT_SINGLE,
+  QUOTED_CONTENT_DOUBLE,
+  QUOTED_CONTENT_HEREDOC_SINGLE,
+  QUOTED_CONTENT_HEREDOC_DOUBLE,
+  QUOTED_CONTENT_PARENTHESIS,
+  QUOTED_CONTENT_CURLY,
+  QUOTED_CONTENT_SQUARE,
+  QUOTED_CONTENT_ANGLE,
+  QUOTED_CONTENT_BAR,
+  QUOTED_CONTENT_SLASH,
+
+  KEYWORD_SPECIAL_LITERAL,
+  ATOM_START,
+  KEYWORD_END,
+
+  NEWLINE_BEFORE_DO,
+  NEWLINE_BEFORE_BINARY_OP,
+  NEWLINE_BEFORE_COMMENT,
+
+  BEFORE_UNARY_OP,
+
+  NOT_IN
+};
+
+bool quoted_token_type(const bool* valid_symbols, TokenType& token_type) {
+  // Quoted symbols are mutually exclusive and only one should
+  // be valid at a time. If multiple are valid it means we parse
+  // an arbitrary code outside quotes, in which case we don't
+  // want to tokenize it as quoted content.
+  if (valid_symbols[QUOTED_CONTENT_I_SINGLE] && valid_symbols[QUOTED_CONTENT_I_DOUBLE]) {
+    return false;
+  }
+
+  if (valid_symbols[QUOTED_CONTENT_I_SINGLE]) {
+    token_type = QUOTED_CONTENT_I_SINGLE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_I_DOUBLE]) {
+    token_type = QUOTED_CONTENT_I_DOUBLE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_I_HEREDOC_SINGLE]) {
+    token_type = QUOTED_CONTENT_I_HEREDOC_SINGLE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_I_HEREDOC_DOUBLE]) {
+    token_type = QUOTED_CONTENT_I_HEREDOC_DOUBLE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_I_PARENTHESIS]) {
+    token_type = QUOTED_CONTENT_I_PARENTHESIS;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_I_CURLY]) {
+    token_type = QUOTED_CONTENT_I_CURLY;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_I_SQUARE]) {
+    token_type = QUOTED_CONTENT_I_SQUARE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_I_ANGLE]) {
+    token_type = QUOTED_CONTENT_I_ANGLE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_I_BAR]) {
+    token_type = QUOTED_CONTENT_I_BAR;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_I_SLASH]) {
+    token_type = QUOTED_CONTENT_I_SLASH;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_SINGLE]) {
+    token_type = QUOTED_CONTENT_SINGLE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_DOUBLE]) {
+    token_type = QUOTED_CONTENT_DOUBLE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_HEREDOC_SINGLE]) {
+    token_type = QUOTED_CONTENT_HEREDOC_SINGLE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_HEREDOC_DOUBLE]) {
+    token_type = QUOTED_CONTENT_HEREDOC_DOUBLE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_PARENTHESIS]) {
+    token_type = QUOTED_CONTENT_PARENTHESIS;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_CURLY]) {
+    token_type = QUOTED_CONTENT_CURLY;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_SQUARE]) {
+    token_type = QUOTED_CONTENT_SQUARE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_ANGLE]) {
+    token_type = QUOTED_CONTENT_ANGLE;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_BAR]) {
+    token_type = QUOTED_CONTENT_BAR;
+    return true;
+  }
+  if (valid_symbols[QUOTED_CONTENT_SLASH]) {
+    token_type = QUOTED_CONTENT_SLASH;
+    return true;
+  }
+
+  return false;
+}
+
+int32_t quoted_end_delimiter(TokenType token_type) {
+  switch (token_type) {
+    case QUOTED_CONTENT_I_SINGLE:
+    case QUOTED_CONTENT_SINGLE:
+    case QUOTED_CONTENT_I_HEREDOC_SINGLE:
+    case QUOTED_CONTENT_HEREDOC_SINGLE:
+      return '\'';
+
+    case QUOTED_CONTENT_I_DOUBLE:
+    case QUOTED_CONTENT_DOUBLE:
+    case QUOTED_CONTENT_I_HEREDOC_DOUBLE:
+    case QUOTED_CONTENT_HEREDOC_DOUBLE:
+      return '\"';
+
+    case QUOTED_CONTENT_I_PARENTHESIS:
+    case QUOTED_CONTENT_PARENTHESIS:
+      return ')';
+
+    case QUOTED_CONTENT_I_CURLY:
+    case QUOTED_CONTENT_CURLY:
+      return '}';
+
+    case QUOTED_CONTENT_I_SQUARE:
+    case QUOTED_CONTENT_SQUARE:
+      return ']';
+
+    case QUOTED_CONTENT_I_ANGLE:
+    case QUOTED_CONTENT_ANGLE:
+      return '>';
+
+    case QUOTED_CONTENT_I_BAR:
+    case QUOTED_CONTENT_BAR:
+      return '|';
+
+    case QUOTED_CONTENT_I_SLASH:
+    case QUOTED_CONTENT_SLASH:
+      return '/';
+  }
+
+  __builtin_unreachable();
+}
+
+uint8_t quoted_delimiter_length(TokenType token_type) {
+  switch (token_type) {
+    case QUOTED_CONTENT_I_HEREDOC_SINGLE:
+    case QUOTED_CONTENT_I_HEREDOC_DOUBLE:
+    case QUOTED_CONTENT_HEREDOC_SINGLE:
+    case QUOTED_CONTENT_HEREDOC_DOUBLE:
+      return 3;
+
+    default:
+      return 1;
+  }
+}
+
+bool quoted_is_interpol(TokenType token_type) {
+  switch (token_type) {
+    case QUOTED_CONTENT_I_SINGLE:
+    case QUOTED_CONTENT_I_DOUBLE:
+    case QUOTED_CONTENT_I_HEREDOC_SINGLE:
+    case QUOTED_CONTENT_I_HEREDOC_DOUBLE:
+    case QUOTED_CONTENT_I_PARENTHESIS:
+    case QUOTED_CONTENT_I_CURLY:
+    case QUOTED_CONTENT_I_SQUARE:
+    case QUOTED_CONTENT_I_ANGLE:
+    case QUOTED_CONTENT_I_BAR:
+    case QUOTED_CONTENT_I_SLASH:
+      return true;
+
+    default:
+      return false;
+  }
+}
+
+bool is_whitespace(int32_t c) {
+  return c == ' ' || c == '\t' || c == '\v' ||
+    c == '\n' || c == '\f' || c == '\r';
+}
+
+bool is_inline_whitespace(int32_t c) {
+  return c == ' ' || c == '\t' || c == '\v';
+}
+
+// TODO what about these weird \f \r
+bool is_newline(int32_t c) {
+  return c == '\n';
+}
+
+void advance(TSLexer* lexer) {
+  lexer->advance(lexer, false);
+}
+
+void skip(TSLexer *lexer) {
+  lexer->advance(lexer, true);
+}
+
+bool finish_atom_start(TSLexer* lexer) {
+  // The first ':' is already scanned and parser advanced
+  lexer->mark_end(lexer);
+  lexer->result_symbol = ATOM_START;
+
+  if (lexer->lookahead == ':') {
+    advance(lexer);
+    if (lexer->lookahead == ':') {
+      // :::
+      return true;
+    } else {
+      return false;
+    }
+  } else {
+    return !is_whitespace(lexer->lookahead);
+  }
+}
+
+bool is_keyword_end(TSLexer* lexer) {
+  if (lexer->lookahead == ':') {
+    advance(lexer);
+    return is_whitespace(lexer->lookahead);
+  }
+  return false;
+}
+
+bool finish_keyword(TSLexer* lexer) {
+  lexer->mark_end(lexer);
+  lexer->result_symbol = KEYWORD_SPECIAL_LITERAL;
+  return is_keyword_end(lexer);
+}
+
+bool is_digit(int32_t c) {
+  return '0' <= c && c <= '9';
+}
+
+bool is_operator_end(TSLexer* lexer) {
+  // Keyword
+  if (lexer->lookahead == ':') {
+    return !is_keyword_end(lexer);
+  }
+  while (is_inline_whitespace(lexer->lookahead)) {
+    advance(lexer);
+  }
+  // Operator identifier with arity
+  if (lexer->lookahead == '/') {
+    advance(lexer);
+    while (is_whitespace(lexer->lookahead)) {
+      advance(lexer);
+    }
+    if (is_digit(lexer->lookahead)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+const char TOKEN_TERMINATORS[] = {
+  // Operator starts
+  '@', '.', '+', '-', '^', '-', '*', '/', '<', '>', '|', '~', '=', '&', '\\', '%',
+  // Delimiters
+  '{', '}', '[', ']', '(', ')', '"', '\'',
+  // Separators
+  ',', ';',
+  // Comment
+  '#'
+};
+
+// Note: this is a heuristic as we only use this to distinguish word
+// operators and we don't want to include complex Unicode ranges.
+bool is_token_end(int32_t c) {
+  for (unsigned int i = 0; i < sizeof(TOKEN_TERMINATORS); i++) {
+    if (c == TOKEN_TERMINATORS[i]) {
+      return true;
+    }
+  }
+
+  return is_whitespace(c);
+}
+
+bool scan(TSLexer* lexer, const bool* valid_symbols) {
+  TokenType token_type;
+  bool is_quoted_symbol = quoted_token_type(valid_symbols, token_type);
+
+  // Quoted content, which matches any character except for close
+  // delimiters, escapes and interpolations
+  if (is_quoted_symbol) {
+    // TODO naming
+    // TODO move all of this into a separate function like scan_quoted_content
+    int32_t end_delimiter = quoted_end_delimiter(token_type);
+    bool supports_interpol = quoted_is_interpol(token_type);
+    uint8_t delimiter_length = quoted_delimiter_length(token_type);
+
+    lexer->result_symbol = token_type;
+
+    for (bool has_content = false; true; has_content = true) {
+      lexer->mark_end(lexer);
+
+      if (lexer->lookahead == end_delimiter) {
+        uint8_t length = 1;
+
+        while (length < delimiter_length) {
+          advance(lexer);
+          if (lexer->lookahead == end_delimiter) {
+            length++;
+          } else {
+            break;
+          }
+        }
+
+        if (length == delimiter_length) {
+          return has_content;
+        }
+      } else {
+        switch (lexer->lookahead) {
+          case '#':
+            advance(lexer);
+
+            if (supports_interpol && lexer->lookahead == '{') {
+              return has_content;
+            }
+
+            break;
+
+          case '\\':
+            if (supports_interpol) {
+              return has_content;
+            } else {
+              advance(lexer);
+
+              if (lexer->lookahead == end_delimiter) {
+                return has_content;
+              }
+            }
+
+            break;
+
+          case '\0':
+            return false;
+
+          default:
+            advance(lexer);
+        }
+      }
+    }
+
+    return false;
+  }
+
+  if (lexer->lookahead == ':') {
+    if (valid_symbols[ATOM_START] || valid_symbols[KEYWORD_END]) {
+      advance(lexer);
+
+      if (is_whitespace(lexer->lookahead)) {
+        if (valid_symbols[KEYWORD_END]) {
+          lexer->result_symbol = KEYWORD_END;
+          return true;
+        }
+      } else {
+        if (valid_symbols[ATOM_START]) {
+          return finish_atom_start(lexer);
+        }
+      }
+
+      return false;
+    }
+  }
+
+  bool skipped_whitespace = false;
+
+  while (is_inline_whitespace(lexer->lookahead)) {
+    skipped_whitespace = true;
+    skip(lexer);
+  }
+
+  // TODO moves this below together with other functions on this level
+  if (lexer->lookahead == '+') {
+    if (skipped_whitespace && valid_symbols[BEFORE_UNARY_OP]) {
+      lexer->mark_end(lexer);
+      advance(lexer);
+      if (lexer->lookahead == '+' || lexer->lookahead == ':' || lexer->lookahead == '/') {
+        return false;
+      }
+      if (is_whitespace(lexer->lookahead)) {
+        return false;
+      }
+      lexer->result_symbol = BEFORE_UNARY_OP;
+      return true;
+    }
+  }
+
+  if (lexer->lookahead == '-') {
+    if (skipped_whitespace && valid_symbols[BEFORE_UNARY_OP]) {
+      lexer->mark_end(lexer);
+      advance(lexer);
+      if (lexer->lookahead == '-' || lexer->lookahead == '>' || lexer->lookahead == ':' || lexer->lookahead == '/') {
+        return false;
+      }
+      if (is_whitespace(lexer->lookahead)) {
+        return false;
+      }
+      lexer->result_symbol = BEFORE_UNARY_OP;
+      return true;
+    }
+  }
+
+  if (lexer->lookahead == 'n') {
+    lexer->result_symbol = NOT_IN;
+    advance(lexer);
+    if (lexer->lookahead == 'o') {
+      advance(lexer);
+      if (lexer->lookahead == 't') {
+        advance(lexer);
+        while (is_inline_whitespace(lexer->lookahead)) {
+          advance(lexer);
+        }
+        if (lexer->lookahead == 'i') {
+          advance(lexer);
+          if (lexer->lookahead == 'n') {
+            advance(lexer);
+            return is_token_end(lexer->lookahead);
+          }
+        }
+      }
+    }
+    return false;
+  }
+
+  // TODO can be a separate function
+
+  if (is_newline(lexer->lookahead) && (
+        valid_symbols[NEWLINE_BEFORE_DO] ||
+        valid_symbols[NEWLINE_BEFORE_BINARY_OP] ||
+        valid_symbols[NEWLINE_BEFORE_COMMENT])) {
+    advance(lexer);
+
+    while (is_whitespace(lexer->lookahead)) {
+      advance(lexer);
+    }
+
+    // Note we include all the whitespace after newline, so that the
+    // parser doesn't have to go through it again
+    lexer->mark_end(lexer);
+
+    if (lexer->lookahead == '#') {
+      lexer->result_symbol = NEWLINE_BEFORE_COMMENT;
+      return true;
+    }
+
+    if (valid_symbols[NEWLINE_BEFORE_DO] && lexer->lookahead == 'd') {
+      lexer->result_symbol = NEWLINE_BEFORE_DO;
+      advance(lexer);
+      if (lexer->lookahead == 'o') {
+        advance(lexer);
+        return is_token_end(lexer->lookahead);
+      }
+      return false;
+    }
+
+    if (valid_symbols[NEWLINE_BEFORE_BINARY_OP] ) {
+      lexer->result_symbol = NEWLINE_BEFORE_BINARY_OP;
+
+      // &&, &&&
+      if (lexer->lookahead == '&') {
+        advance(lexer);
+        if (lexer->lookahead == '&') {
+          advance(lexer);
+          if (lexer->lookahead == '&') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          } else {
+            return is_operator_end(lexer);
+          }
+        }
+      // =, ==, ===, =~, =>
+      } else if (lexer->lookahead == '=') {
+        advance(lexer);
+        if (lexer->lookahead == '=') {
+          advance(lexer);
+          if (lexer->lookahead == '=') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          } else {
+            return is_operator_end(lexer);
+          }
+        } else if (lexer->lookahead == '~') {
+          advance(lexer);
+          return is_operator_end(lexer);
+        } else if (lexer->lookahead == '>') {
+          advance(lexer);
+          return is_operator_end(lexer);
+        } else {
+          return is_operator_end(lexer);
+        }
+      // ::
+      } else if (lexer->lookahead == ':') {
+        advance(lexer);
+        if (lexer->lookahead == ':') {
+          advance(lexer);
+          // Ignore ::: atom
+          if (lexer->lookahead == ':') return false;
+          return is_operator_end(lexer);
+        }
+      // ++, +++
+      } else if (lexer->lookahead == '+') {
+        advance(lexer);
+        if (lexer->lookahead == '+') {
+          advance(lexer);
+          if (lexer->lookahead == '+') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          } else {
+            return is_operator_end(lexer);
+          }
+        }
+      // --, ---, ->
+      } else if (lexer->lookahead == '-') {
+        advance(lexer);
+        if (lexer->lookahead == '-') {
+          advance(lexer);
+          if (lexer->lookahead == '-') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          } else {
+            return is_operator_end(lexer);
+          }
+        } else if (lexer->lookahead == '>') {
+          advance(lexer);
+          return is_operator_end(lexer);
+        }
+      // <, <=, <-, <>, <~, <~>, <|>, <<<, <<~
+      } else if (lexer->lookahead == '<') {
+        advance(lexer);
+        if (lexer->lookahead == '=' ||
+            lexer->lookahead == '-' ||
+            lexer->lookahead == '>') {
+          advance(lexer);
+          return is_operator_end(lexer);
+        } else if (lexer->lookahead == '~') {
+          advance(lexer);
+          if (lexer->lookahead == '>') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          } else {
+            return is_operator_end(lexer);
+          }
+        } else if (lexer->lookahead == '|') {
+          advance(lexer);
+          if (lexer->lookahead == '>') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          }
+        } else if (lexer->lookahead == '<') {
+          advance(lexer);
+          if (lexer->lookahead == '<' ||
+              lexer->lookahead == '~') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          }
+        } else {
+          return is_operator_end(lexer);
+        }
+      // >, >=, >>>
+      } else if (lexer->lookahead == '>') {
+        advance(lexer);
+        if (lexer->lookahead == '=') {
+          advance(lexer);
+          return is_operator_end(lexer);
+        } else if (lexer->lookahead == '>') {
+          advance(lexer);
+          if (lexer->lookahead == '>') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          }
+        } else {
+          return is_operator_end(lexer);
+        }
+      // ^^^
+      } else if (lexer->lookahead == '^') {
+        advance(lexer);
+        if (lexer->lookahead == '^') {
+          advance(lexer);
+          if (lexer->lookahead == '^') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          }
+        }
+      // !=, !==
+      } else if (lexer->lookahead == '!') {
+        advance(lexer);
+        if (lexer->lookahead == '=') {
+          advance(lexer);
+          if (lexer->lookahead == '=') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          } else {
+            return is_operator_end(lexer);
+          }
+        }
+      // ~>, ~>>
+      } else if (lexer->lookahead == '~') {
+        advance(lexer);
+        if (lexer->lookahead == '>') {
+          advance(lexer);
+          if (lexer->lookahead == '>') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          } else {
+            return is_operator_end(lexer);
+          }
+        }
+      // |, ||, |||, |>
+      } else if (lexer->lookahead == '|') {
+        advance(lexer);
+        if (lexer->lookahead == '|') {
+          advance(lexer);
+          if (lexer->lookahead == '|') {
+            advance(lexer);
+            return is_operator_end(lexer);
+          } else {
+            return is_operator_end(lexer);
+          }
+        } else if (lexer->lookahead == '>') {
+          advance(lexer);
+          return is_operator_end(lexer);
+        } else {
+          return is_operator_end(lexer);
+        }
+      // *, **
+      } else if (lexer->lookahead == '*') {
+        advance(lexer);
+        if (lexer->lookahead == '*') {
+          advance(lexer);
+          return is_operator_end(lexer);
+        } else {
+          return is_operator_end(lexer);
+        }
+      // / //
+      } else if (lexer->lookahead == '/') {
+        advance(lexer);
+        if (lexer->lookahead == '/') {
+          advance(lexer);
+          return is_operator_end(lexer);
+        } else {
+          return is_operator_end(lexer);
+        }
+      // ., ..
+      } else if (lexer->lookahead == '.') {
+        advance(lexer);
+        if (lexer->lookahead == '.') {
+          advance(lexer);
+          // Ignore ... identifier
+          if (lexer->lookahead == '.') return false;
+          return is_operator_end(lexer);
+        } else {
+          return is_operator_end(lexer);
+        }
+      // double slash
+      } else if (lexer->lookahead == '\\') {
+        advance(lexer);
+        if (lexer->lookahead == '\\') {
+          advance(lexer);
+          return is_operator_end(lexer);
+        }
+      } else if (lexer->lookahead == 'w') {
+        advance(lexer);
+        if (lexer->lookahead == 'h') {
+          advance(lexer);
+          if (lexer->lookahead == 'e') {
+            advance(lexer);
+            if (lexer->lookahead == 'n') {
+              advance(lexer);
+              return is_token_end(lexer->lookahead) && is_operator_end(lexer);
+            }
+          }
+        }
+      } else if (lexer->lookahead == 'a') {
+        advance(lexer);
+        if (lexer->lookahead == 'n') {
+          advance(lexer);
+          if (lexer->lookahead == 'd') {
+            advance(lexer);
+            return is_token_end(lexer->lookahead) && is_operator_end(lexer);
+          }
+        }
+      // or
+      } else if (lexer->lookahead == 'o') {
+        advance(lexer);
+        if (lexer->lookahead == 'r') {
+          advance(lexer);
+          return is_token_end(lexer->lookahead) && is_operator_end(lexer);
+        }
+      // in
+      } else if (lexer->lookahead == 'i') {
+        advance(lexer);
+        if (lexer->lookahead == 'n') {
+          advance(lexer);
+          return is_token_end(lexer->lookahead) && is_operator_end(lexer);
+        }
+      // not in
+      } else if (lexer->lookahead == 'n') {
+        advance(lexer);
+        if (lexer->lookahead == 'o') {
+          advance(lexer);
+          if (lexer->lookahead == 't') {
+            advance(lexer);
+            while (is_inline_whitespace(lexer->lookahead)) {
+              advance(lexer);
+            }
+            if (lexer->lookahead == 'i') {
+              advance(lexer);
+              if (lexer->lookahead == 'n') {
+                advance(lexer);
+                return is_token_end(lexer->lookahead) && is_operator_end(lexer);
+              }
+            }
+          }
+        }
+      }
+    }
+
+    return false;
+  }
+
+  // ... ..//
+  if (lexer->lookahead == '.') {
+    if (valid_symbols[KEYWORD_SPECIAL_LITERAL]) {
+      advance(lexer);
+      if (lexer->lookahead == '.') {
+        advance(lexer);
+        if (lexer->lookahead == '.') {
+          advance(lexer);
+          return finish_keyword(lexer);
+        } else if (lexer->lookahead == '/') {
+          advance(lexer);
+          if (lexer->lookahead == '/') {
+            advance(lexer);
+            return finish_keyword(lexer);
+          }
+        }
+      }
+    }
+  // % %{}
+  } else if (lexer->lookahead == '%') {
+    if (valid_symbols[KEYWORD_SPECIAL_LITERAL]) {
+      advance(lexer);
+      if (lexer->lookahead == '{') {
+        advance(lexer);
+        if (lexer->lookahead == '}') {
+          advance(lexer);
+          return finish_keyword(lexer);
+        }
+      } else {
+        return finish_keyword(lexer);
+      }
+    }
+  // {}
+  } else if (lexer->lookahead == '{') {
+    if (valid_symbols[KEYWORD_SPECIAL_LITERAL]) {
+      advance(lexer);
+      if (lexer->lookahead == '}') {
+        advance(lexer);
+        return finish_keyword(lexer);
+      }
+    }
+  // <<>>
+  } else if (lexer->lookahead == '<') {
+    if (valid_symbols[KEYWORD_SPECIAL_LITERAL]) {
+      advance(lexer);
+      if (lexer->lookahead == '<') {
+        advance(lexer);
+        if (lexer->lookahead == '>') {
+          advance(lexer);
+          if (lexer->lookahead == '>') {
+            advance(lexer);
+            return finish_keyword(lexer);
+          }
+        }
+      }
+    }
+  // atom start
+  } else if (lexer->lookahead == ':') {
+    if (valid_symbols[ATOM_START]) {
+      advance(lexer);
+      return finish_atom_start(lexer);
+    }
+  }
+
+  return false;
+}
+
+// Expose the API expected by tree-sitter
+
+extern "C" {
+  void* tree_sitter_elixir_external_scanner_create() {
+    return nullptr;
+  }
+
+  bool tree_sitter_elixir_external_scanner_scan(void* payload, TSLexer* lexer, const bool* valid_symbols) {
+    return scan(lexer, valid_symbols);
+  }
+
+  unsigned tree_sitter_elixir_external_scanner_serialize(void* payload, char* buffer) {
+    return 0;
+  }
+
+  void tree_sitter_elixir_external_scanner_deserialize(void* payload, const char* buffer, unsigned length) {}
+
+  void tree_sitter_elixir_external_scanner_destroy(void* payload) {}
+}
+
+}
--- a/test/corpus/comment.txt
+++ b/test/corpus/comment.txt
@ -91,11 +91,7 @@ does not match inside a string

 (source
  (string
-    (string_start)
-    (string_content)
-    (string_end))
+    (string_content))
  (string
-    (string_start)
    (string_content)
-    (interpolation (identifier))
-    (string_end)))
+    (interpolation (identifier))))
--- a/test/corpus/do_end.txt
+++ b/test/corpus/do_end.txt
@ -87,7 +87,7 @@ end
      (call
        (identifier)
        (arguments
-          (identifier)))
+          (identifier))))
    (do_block
      (identifier))))

@ -100,11 +100,47 @@ do
  x
 end

+fun x
+# comment
+do
+  x
+end
+
+fun()
+do
+  x
+end
+
+Mod.fun x
+do
+  x
+end
+
 ---

 (source
  (call
    (identifier)
+    (arguments
+      (identifier))
+    (do_block
+      (identifier)))
+  (call
+    (identifier)
+    (arguments
+      (identifier))
+    (comment)
+    (do_block
+      (identifier)))
+  (call
+    (identifier)
+    (arguments)
+    (do_block
+      (identifier)))
+  (call
+    (dot
+      (alias)
+      (identifier))
    (arguments
      (identifier))
    (do_block
@ -265,7 +301,7 @@ end
    (do_block
      (stab_clause
        (arguments
-          (integer))
+          (identifier))
        (body
          (identifier)
          (identifier))))))
@ -318,8 +354,7 @@ end
            (identifier)
            (list)))
        (body
-          (atom
-            (atom_literal)))))))
+          (identifier))))))

 =====================================
 stab clause / with guard / multiple arguments
@ -344,8 +379,7 @@ end
            (identifier)
            (list)))
        (body
-          (atom
-            (atom_literal)))))))
+          (identifier))))))

 =====================================
 stab clause / with guard / arguments in parentheses
@ -370,8 +404,7 @@ end
            (identifier)
            (list)))
        (body
-          (atom
-            (atom_literal)))))))
+          (identifier))))))

 =====================================
 stab clause / with guard / multiple guards
@ -392,16 +425,152 @@ end
          (arguments
            (identifier))
          (binary_operator
+            (binary_operator
+              (identifier)
+              (integer))
+            (binary_operator
+              (identifier)
+              (integer))))
+        (body
+          (identifier))))))
+
+=====================================
+stab clause / edge cases / no stab
+=====================================
+
+foo do
+  a when a
+end
+
+foo do
+  ([])
+end
+
+---
+
+(source
+  (call
+    (identifier)
+    (do_block
+      (binary_operator
+        (identifier)
+        (identifier))))
+  (call
+    (identifier)
+    (do_block
+      (block
+        (list)))))
+
+=====================================
+stab clause / edge cases / "when" in arguments
+=====================================
+
+foo do
+  a when b, c when d == e -> 1
+end
+
+---
+
+(source
+  (call
+    (identifier)
+    (do_block
+      (stab_clause
+        (binary_operator
+          (arguments
            (binary_operator
              (identifier)
              (identifier))
+            (identifier))
+          (binary_operator
+            (identifier)
+            (identifier)))
+        (body
+          (integer))))))
+
+=====================================
+stab clause / edge cases / block argument
+=====================================
+
+foo do
+  (x; y) -> 1
+  ((x; y)) -> 1
+end
+
+---
+
+(source
+  (call
+    (identifier)
+    (do_block
+      (stab_clause
+        (arguments
+          (block
+            (identifier)
+            (identifier)))
+        (body
+          (integer)))
+      (stab_clause
+        (arguments
+          (block
+            (identifier)
+            (identifier)))
+        (body
+          (integer))))))
+
+=====================================
+stab clause / edge cases / operator with lower precedence than "when"
+=====================================
+
+foo do
+  x <- y when x -> y
+end
+
+foo do
+  (x <- y) when x -> y
+end
+
+---
+
+(source
+  (call
+    (identifier)
+    (do_block
+      (stab_clause
+        (arguments
+          (binary_operator
+            (identifier)
            (binary_operator
              (identifier)
              (identifier))))
        (body
-          (atom
-            (atom_literal)))))))
+          (identifier)))))
+  (call
+    (identifier)
+    (do_block
+      (stab_clause
+        (binary_operator
+          (arguments
+            (binary_operator
+              (identifier)
+              (identifier)))
+          (identifier))
+        (body
+          (identifier))))))

+=====================================
+stab clause / edge cases / empty
+=====================================
+
+fun do->end
+
+---
+
+(source
+  (call
+    (identifier)
+    (do_block
+      (stab_clause))))

 =====================================
 pattern matching
@ -424,8 +593,9 @@ end
              (identifier)
              (identifier))))
        (body
-          (atom
-            (atom_literal)))))))
+          (tuple
+            (identifier)
+            (identifier)))))))

 =====================================
 child blocks / after
@ -578,3 +748,33 @@ end
            (identifier))
          (body
            (identifier)))))))
+
+=====================================
+child blocks / keyword pattern with child block start token
+=====================================
+
+fun do
+  x
+after
+after
+  after: 1 -> y
+end
+
+---
+
+(source
+  (call
+    (identifier)
+    (do_block
+      (identifier)
+      (after_block)
+      (after_block
+        (stab_clause
+          (arguments
+            (keywords
+              (pair
+                (keyword
+                  (atom_literal))
+                (integer))))
+          (body
+            (identifier)))))))
--- a/test/corpus/edge_syntax.txt
+++ b/test/corpus/edge_syntax.txt
@ -5,13 +5,76 @@ operator with arity (valid and supported by IEx.Helpers.h)
 ::/2
@ / 1
 & / 1
+not / 1
+not in / 2
+* / 2
+h +/2

 ---

 (source
  (binary_operator
    (operator_identifier)
-    (integer)))
+    (integer))
+  (binary_operator
+    (operator_identifier)
+    (integer))
+  (binary_operator
+    (operator_identifier)
+    (integer))
+  (binary_operator
+    (operator_identifier)
+    (integer))
+  (binary_operator
+    (operator_identifier)
+    (integer))
+  (binary_operator
+    (operator_identifier)
+    (integer))
+  (call
+    (identifier)
+    (arguments
+      (binary_operator
+        (operator_identifier)
+        (integer)))))
+
+=====================================
+stab and slash ambiguity
+=====================================
+
+(-> / 2)
+(-> / / 2)
+
+---
+
+(source
+  (block
+    (binary_operator
+      (operator_identifier)
+      (integer)))
+  (block
+    (stab_clause
+      (body
+        (binary_operator
+          (operator_identifier)
+          (integer))))))
+
+=====================================
+unary operator and slash ambiguity
+=====================================
+
+& / 2
+& / / 2
+---
+
+(source
+  (binary_operator
+    (operator_identifier)
+    (integer))
+  (unary_operator
+    (binary_operator
+      (operator_identifier)
+      (integer))))

 =====================================
 map with identifiers
@ -54,16 +117,3 @@ def Mod.fun(x), do: 1
          (keyword
            (atom_literal))
          (integer))))))
-
-=====================================
-[error] arrow outside of map
-=====================================
-
-a => b
-
---
-
-(source
-  (identifier)
-  (ERROR
-    (identifier)))
--- a/test/corpus/expression/anonymous_function.txt
+++ b/test/corpus/expression/anonymous_function.txt
@ -15,6 +15,7 @@ fn () -> 1 end
        (integer))))
  (anonymous_function
    (stab_clause
+      (arguments)
      (body
        (integer)))))

@ -178,8 +179,7 @@ end
          (atom
            (atom_literal))))
      (body
-        (atom
-          (atom_literal))))))
+        (boolean)))))

 =====================================
 with guard / one argument
@ -201,8 +201,7 @@ end
          (identifier)
          (list)))
      (body
-        (atom
-          (atom_literal))))))
+        (identifier)))))

 =====================================
 with guard / multiple arguments
@ -225,8 +224,7 @@ end
          (identifier)
          (list)))
      (body
-        (atom
-          (atom_literal))))))
+        (identifier)))))

 =====================================
 with guard / arguments in parentheses
@ -249,8 +247,7 @@ end
          (identifier)
          (list)))
      (body
-        (atom
-          (atom_literal))))))
+        (identifier)))))

 =====================================
 with guard / multiple guards
@ -271,13 +268,12 @@ end
        (binary_operator
          (binary_operator
            (identifier)
-            (identifier))
+            (integer))
          (binary_operator
            (identifier)
-            (identifier))))
+            (integer))))
      (body
-        (atom
-          (atom_literal))))))
+        (identifier)))))

 =====================================
 pattern matching
@ -299,8 +295,9 @@ end
            (identifier)
            (identifier))))
      (body
-        (atom
-          (atom_literal))))
+        (tuple
+          (identifier)
+          (identifier))))
    (stab_clause
      (binary_operator
        (arguments
@ -312,12 +309,7 @@ end
                    (atom_literal))
                  (identifier))))))
        (binary_operator
-          (binary_operator
-            (identifier)
-            (identifier))
-          (binary_operator
-            (identifier)
-            (identifier))))
+          (identifier)
+          (integer)))
      (body
-        (atom
-          (atom_literal))))))
+        (integer)))))
--- a/test/corpus/expression/block.txt
+++ b/test/corpus/expression/block.txt
@ -94,3 +94,30 @@ trailing semicolon
  (block
    (integer)
    (integer)))
+
+=====================================
+stab clauses
+=====================================
+
+(x -> x; y -> y
+ z -> z)
+
+---
+
+(source
+  (block
+    (stab_clause
+      (arguments
+        (identifier))
+      (body
+        (identifier)))
+    (stab_clause
+      (arguments
+        (identifier))
+      (body
+        (identifier)))
+    (stab_clause
+      (arguments
+        (identifier))
+      (body
+        (identifier)))))
--- a/test/corpus/expression/call.txt
+++ b/test/corpus/expression/call.txt
@ -46,7 +46,9 @@ local call / arguments without parentheses
 =====================================

 fun a
+fun {}
 fun [1, 2], option: true, other: 5
+fun +: 1

 ---

@ -55,6 +57,45 @@ fun [1, 2], option: true, other: 5
    (identifier)
    (arguments
      (identifier)))
+  (call
+    (identifier)
+    (arguments
+      (tuple)))
+  (call
+    (identifier)
+    (arguments
+      (list
+        (integer)
+        (integer))
+      (keywords
+        (pair
+          (keyword
+            (atom_literal))
+          (boolean))
+        (pair
+          (keyword
+            (atom_literal))
+          (integer)))))
+    (call
+      (identifier)
+      (arguments
+        (keywords
+          (pair
+            (keyword
+              (atom_literal))
+            (integer))))))
+
+=====================================
+local call / arguments without parentheses / multiline
+=====================================
+
+fun [1, 2],
+  option: true,
+  other: 5
+
+---
+
+(source
  (call
    (identifier)
    (arguments
@ -92,7 +133,8 @@ outer_fun(inner_fun(a))
 local call / nested without parentheses (right associativity)
 =====================================

-outer_fun inner_fun a
+outer_fun inner_fun a, b
+outer_fun inner_fun do: 1

 ---

@ -103,13 +145,24 @@ outer_fun inner_fun a
      (call
        (identifier)
        (arguments
-          (identifier))))))
+          (identifier)
+          (identifier)))))
+  (call
+    (identifier)
+    (arguments
+      (call
+        (identifier)
+        (arguments
+          (keywords
+            (pair
+              (keyword
+                (atom_literal))
+              (integer))))))))

 =====================================
 local call / precedence with operator
 =====================================

-fun +1
 outer_fun 1 + 1
 1 + inner_fun 1
 outer_fun 1 + inner_fun 1
@ -118,11 +171,6 @@ fun 1, 2 |> other_fun
 ---

 (source
-  (call
-    (identifier)
-    (arguments
-      (unary_operator
-        (integer))))
  (call
    (identifier)
    (arguments
@ -337,15 +385,15 @@ Mod.outer_fun 1 + Mod.inner_fun 1
      (dot
        (alias)
        (identifier))
-    (arguments
-      (binary_operator
-        (integer)
-        (call
-          (dot
-            (alias)
-            (identifier))
-          (arguments
-            (integer)))))))
+      (arguments
+        (binary_operator
+          (integer)
+          (call
+            (dot
+              (alias)
+              (identifier))
+            (arguments
+              (integer)))))))

 =====================================
 remote call / treats nonimmediate parentheses as a block argument
@ -410,7 +458,15 @@ Mod.'fun'(a)
  (call
    (dot
      (alias)
-      (identifier))
+      (string
+        (string_content)))
+    (arguments
+      (identifier)))
+  (call
+    (dot
+      (alias)
+      (charlist
+        (string_content)))
    (arguments
      (identifier))))

@ -520,11 +576,12 @@ Mod.outer_fun mid_fun inner_fun.(a)
    (arguments
      (call
        (identifier)
-        (call
-          (dot
-            (identifier))
-          (arguments
-            (identifier)))))))
+        (arguments
+          (call
+            (dot
+              (identifier))
+            (arguments
+              (identifier))))))))

 =====================================
 identifier call
@ -599,8 +656,8 @@ range call
          (binary_operator
            (integer)
            (integer))
-          (integer))))
-    (identifier)))
+          (integer)))
+      (identifier))))

 =====================================
 multi-expression block call
@ -774,14 +831,22 @@ map [key]
        (identifier)))))

 =====================================
-access syntax / precedence over dot call
+access syntax / precedence with dot call
 =====================================

+map.map[:key]
 map[:mod].fun

 ---

 (source
+  (access_call
+    (call
+      (dot
+        (identifier)
+        (identifier)))
+    (atom
+      (atom_literal)))
  (call
    (dot
      (access_call
@ -790,6 +855,91 @@ map[:mod].fun
          (atom_literal)))
      (identifier))))

+=====================================
+access syntax / precedence with operators
+=====================================
+
+-x[:key]
+@x[:key]
+&x[:key]
+&1[:key]
+
+---
+
+(source
+  (unary_operator
+    (access_call
+      (identifier)
+      (atom
+        (atom_literal))))
+  (access_call
+    (unary_operator
+      (identifier))
+    (atom
+      (atom_literal)))
+  (unary_operator
+    (access_call
+      (identifier)
+      (atom
+        (atom_literal))))
+  (access_call
+    (unary_operator
+      (integer))
+    (atom
+      (atom_literal))))
+
+=====================================
+double parenthesised call
+=====================================
+
+fun()()
+fun() ()
+fun(1)(1)
+Mod.fun()()
+fun.()()
+
+unquote(name)()
+
+---
+
+(source
+  (call
+    (call
+      (identifier)
+      (arguments))
+    (arguments))
+  (call
+    (call
+      (identifier)
+      (arguments))
+    (arguments))
+  (call
+    (call
+      (identifier)
+      (arguments
+        (integer)))
+    (arguments
+      (integer)))
+  (call
+    (call
+      (dot
+        (alias)
+        (identifier))
+      (arguments))
+    (arguments))
+  (call
+    (call
+      (dot
+        (identifier))
+      (arguments))
+    (arguments))
+  (call
+    (call
+      (identifier)
+      (arguments
+        (identifier)))
+    (arguments)))
+
 =====================================
 [error] leading argument separator
 =====================================
@ -804,3 +954,18 @@ fun(, a)
    (arguments
      (ERROR)
      (identifier))))
+
+=====================================
+[error] trailing argument separator
+=====================================
+
+fun(a,)
+
+---
+
+(source
+  (call
+    (identifier)
+    (arguments
+      (identifier)
+      (ERROR))))
--- a/test/corpus/expression/capture.txt
+++ b/test/corpus/expression/capture.txt
@ -23,6 +23,7 @@ anonymous function
        (integer))))
  (unary_operator
    (call
+      (identifier)
      (arguments
        (unary_operator
          (integer))
@ -45,22 +46,22 @@ argument call
    (call
      (dot
        (unary_operator
-          (integer)
-        (identifier)))))
+          (integer))
+        (identifier))))
  (unary_operator
    (call
      (dot
        (unary_operator
-          (integer)
-        (identifier)))))
+          (integer))
+        (identifier))))
  (unary_operator
    (call
      (dot
        (unary_operator
-          (integer)
-        (arguments
-          (unary_operator
-            (integer))))))))
+          (integer)))
+      (arguments
+        (unary_operator
+          (integer))))))

 =====================================
 remote MFA
--- a/test/corpus/expression/operator.txt
+++ b/test/corpus/expression/operator.txt
@ -16,27 +16,21 @@ not arg
 ---

 (source
-  (unary_operator
-    (identifier))
-  (unary_operator
-    (identifier))
-  (unary_operator
-    (identifier))
-  (unary_operator
-    (identifier))
-  (unary_operator
-    (identifier))
-  (unary_operator
-    (identifier))
-  (unary_operator
-    (identifier))
-  (unary_operator
-    (identifier)))
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier)))

 =====================================
 binary left associative
 =====================================

+a ** b ** c
+
 a * b * c
 a / b / c

@ -115,6 +109,7 @@ a \\ b \\ c
  (binary_operator (binary_operator (identifier) (identifier)) (identifier))
  (binary_operator (binary_operator (identifier) (identifier)) (identifier))
  (binary_operator (binary_operator (identifier) (identifier)) (identifier))
+  (binary_operator (binary_operator (identifier) (identifier)) (identifier))
  (binary_operator (binary_operator (identifier) (identifier)) (identifier)))

 =====================================
@ -190,6 +185,7 @@ a - b ++ c
 a = b <<< c

 a + b * c - d
+a ** b + c ** d

 ---

@ -220,7 +216,14 @@ a + b * c - d
      (binary_operator
        (identifier)
        (identifier)))
-    (identifier)))
+    (identifier))
+  (binary_operator
+    (binary_operator
+      (identifier)
+      (identifier))
+    (binary_operator
+      (identifier)
+      (identifier))))

 =====================================
 precedence determined by parentheses
@ -234,8 +237,9 @@ precedence determined by parentheses

 (source
  (binary_operator
-    (unary_operator
-      (identifier))
+    (block
+      (unary_operator
+        (identifier)))
    (identifier))
  (binary_operator
    (block
@ -248,25 +252,413 @@ precedence determined by parentheses
        (identifier)))))

 =====================================
-multiline
+"not in" spacing
 =====================================

+a not    in b
+
+---
+
+(source
+  (binary_operator
+    (identifier)
+    (identifier)))
+
+=====================================
+"not in" boundary
+=====================================
+
+fun not inARG
+
+---
+
+(source
+  (call
+    (identifier)
+    (arguments
+      (unary_operator
+        (identifier)))))
+
+=====================================
+multiline / unary
+=====================================
+
+@
+arg
+
+
+arg
+
 -
-x
+arg
+
+!
+arg
+
+^
+arg
+
+not
+arg
+
+~~~
+arg
+
+&
+arg
+
+---
+
+(source
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier))
+  (unary_operator (identifier)))
+
+=====================================
+multiline / binary
+=====================================
+
+a
+**
+b
+
+a
+*
+b
+
+a
+/
+b
+
+a
++
+b
+
+a
+--
+b
+
+a
+++
+b
+
+a
+---
+b
+
+a
+..
+b
+
+a
+<>
+b
+
+
+a
+^^^
+b
+
+a
+in
+b
+
+a
+not in
+b
+
+a
+|>
+b
+
+a
+<<<
+b
+
+a
+>>>
+b
+
+a
+<<~
+b
+
+a
+~>>
+b
+
+a
+<~
+b
+
+a
+~>
+b
+
+a
+<~>
+b
+
+a
+<|>
+b
+
+a
+<
+b
+
+a
+>
+b
+
+a
+<=
+b
+
+a
+>=
+b
+
+a
+==
+b
+
+a
+!=
+b
+
+a
+=~
+b
+
+a
+===
+b
+
+a
+!==
+b
+
+a
+&&
+b
+
+a
+&&&
+b
+
+a
+and
+b
+
+a
+||
+b
+
+a
+|||
+b
+
+a
+or
+b
+
+a
+=
+b
+
+a
+|
+b
+
+a
+::
+b
+
+a
+when
+b
+
+a
+<-
+b
+
+a
+\\
+b
+
+---
+
+(source
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier))
+  (binary_operator (identifier) (identifier)))
+
+=====================================
+multiline / unary over binary
+=====================================
+
+a
+
+b
+
+a
+-
+b
+
+---
+
+(source
+  (identifier)
+  (unary_operator
+    (identifier))
+  (identifier)
+  (unary_operator
+    (identifier)))
+
+=====================================
+multiline / right operands
+=====================================

 x
 not in
 [y]

+x
+not in[y]
+
+:a
++:b
+
+:a++
+:b
+
 ---

 (source
-  (unary_operator
-    (identifier))
  (binary_operator
    (identifier)
    (list
-      (identifier))))
+      (identifier)))
+  (binary_operator
+    (identifier)
+    (list
+      (identifier)))
+  (binary_operator
+    (atom
+      (atom_literal))
+    (atom
+      (atom_literal)))
+  (binary_operator
+    (atom
+      (atom_literal))
+    (atom
+      (atom_literal))))
+
+=====================================
+multiline / unary over binary (precedence)
+=====================================
+
+x
+-
+y
+
+x
+
+y
+
+---
+
+(source
+  (identifier)
+  (unary_operator
+    (identifier))
+  (identifier)
+  (unary_operator
+    (identifier)))
+
+=====================================
+plus minus
+=====================================
+
+x+y
+x + y
+x+ y
+
+x +y
+x +y +z
+
+
+---
+
+(source
+  (binary_operator
+    (identifier)
+    (identifier))
+  (binary_operator
+    (identifier)
+    (identifier))
+  (binary_operator
+    (identifier)
+    (identifier))
+  (call
+    (identifier)
+    (arguments
+      (unary_operator
+        (identifier))))
+  (call
+    (identifier)
+    (arguments
+      (unary_operator
+        (call
+          (identifier)
+          (arguments
+            (unary_operator
+              (identifier))))))))

 =====================================
 stepped range
--- a/test/corpus/expression/sigil.txt
+++ b/test/corpus/expression/sigil.txt
@ -87,7 +87,7 @@ nested interpolation
        (sigil_name)
        (string_content)
        (interpolation
-          (identifier))))
+          (integer))))
    (string_content)))

 =====================================
@ -126,7 +126,7 @@ escape sequence
 escaped interpolation
 =====================================

-~s{\#{1}}
+~s/\#{1}/

 ---

@ -229,6 +229,7 @@ modifiers
    (sigil_modifiers))
  (sigil
    (sigil_name)
+    (string_content)
    (sigil_modifiers)))

 =====================================
@ -240,7 +241,7 @@ modifiers
 ---

 (source
-  (ERROR)
-  (call
-    (string
-      (string_content))))
+  (sigil
+    (sigil_name)
+    (ERROR)
+    (string_content)))
--- a/test/corpus/integration/function_definition.txt
+++ b/test/corpus/integration/function_definition.txt
@ -29,8 +29,7 @@ end
  (call
    (identifier)
    (arguments
-      (call
-        (identifier)))
+      (identifier))
    (do_block)))

 =====================================
@ -48,9 +47,9 @@ end
    (identifier)
    (arguments
      (call
-        (identifier
-          (arguments
-            (identifier)))))
+        (identifier)
+        (arguments
+          (identifier))))
    (do_block
      (identifier))))

@ -69,9 +68,9 @@ end
    (identifier)
    (arguments
      (call
-        (identifier
-          (arguments
-            (identifier)))))
+        (identifier)
+        (arguments
+          (identifier))))
    (do_block
      (identifier))))

@ -90,10 +89,10 @@ end
    (identifier)
    (arguments
      (call
-        (identifier
-          (arguments
-            (identifier)
-            (identifier)))))
+        (identifier)
+        (arguments
+          (identifier)
+          (identifier))))
    (do_block
      (binary_operator
        (identifier)
@ -114,10 +113,10 @@ end
    (identifier)
    (arguments
      (call
-        (identifier
-          (arguments
-            (identifier)
-            (identifier)))))
+        (identifier)
+        (arguments
+          (identifier)
+          (identifier))))
    (do_block
      (binary_operator
        (identifier)
@ -142,12 +141,12 @@ end
    (identifier)
    (arguments
      (call
-        (identifier
-          (arguments
+        (identifier)
+        (arguments
+          (identifier)
+          (binary_operator
            (identifier)
-            (binary_operator
-              (identifier)
-              (integer))))))
+            (integer)))))
    (do_block
      (binary_operator
        (identifier)
@ -156,12 +155,12 @@ end
    (identifier)
    (arguments
      (call
-        (identifier
-          (arguments
+        (identifier)
+        (arguments
+          (identifier)
+          (binary_operator
            (identifier)
-            (binary_operator
-              (identifier)
-              (integer))))))
+            (integer)))))
    (do_block
      (binary_operator
        (identifier)
@ -181,24 +180,25 @@ def fun(x), do: x
    (identifier)
    (arguments
      (call
-        (identifier)))
-    (keywords
-      (pair
-        (keyword
-          (atom_literal))
-        (integer))))
+        (identifier)
+        (arguments))
+      (keywords
+        (pair
+          (keyword
+            (atom_literal))
+          (integer)))))
  (call
    (identifier)
    (arguments
      (call
-        (identifier
-          (arguments
-            (identifier)))))
-    (keywords
-      (pair
-        (keyword
-          (atom_literal))
-        (identifier)))))
+        (identifier)
+        (arguments
+          (identifier)))
+      (keywords
+        (pair
+          (keyword
+            (atom_literal))
+          (identifier))))))

 =====================================
 def / pattern matching
@ -244,9 +244,9 @@ end
    (arguments
      (binary_operator
        (call
-          (identifier
-            (arguments
-              (identifier))))
+          (identifier)
+          (arguments
+            (identifier)))
        (binary_operator
          (identifier)
          (integer))))
@ -269,9 +269,9 @@ end
    (arguments
      (binary_operator
        (call
-          (identifier
-            (arguments
-              (identifier))))
+          (identifier)
+          (arguments
+            (identifier)))
        (binary_operator
          (binary_operator
            (identifier)
@ -297,9 +297,9 @@ end
    (identifier)
    (arguments
      (call
-        (identifier
-          (arguments
-            (identifier)))))
+        (identifier)
+        (arguments
+          (identifier))))
    (do_block
      (identifier))))

@ -320,9 +320,9 @@ end
    (identifier)
    (arguments
      (call
-        (identifier
-          (arguments
-            (identifier)))))
+        (identifier)
+        (arguments
+          (identifier))))
    (do_block
      (call
        (identifier)
@ -347,9 +347,9 @@ defguard is_even(term) when is_integer(term) and rem(term, 2) == 0
    (arguments
      (binary_operator
        (call
-          (identifier
-            (arguments
-              (identifier))))
+          (identifier)
+          (arguments
+            (identifier)))
        (binary_operator
          (call
            (identifier)
@ -362,3 +362,33 @@ defguard is_even(term) when is_integer(term) and rem(term, 2) == 0
                (identifier)
                (integer)))
            (integer)))))))
+
+=====================================
+def in macro
+=====================================
+
+def unquote(name)(unquote_splicing(args)) do
+  unquote(compiled)
+end
+
+---
+
+(source
+  (call
+    (identifier)
+    (arguments
+      (call
+        (call
+          (identifier)
+          (arguments
+            (identifier)))
+        (arguments
+          (call
+            (identifier)
+            (arguments
+              (identifier))))))
+    (do_block
+      (call
+        (identifier)
+        (arguments
+          (identifier))))))
--- a/test/corpus/integration/kernel.txt
+++ b/test/corpus/integration/kernel.txt
@ -14,14 +14,14 @@ for n <- [1, 2], do: n * 2
        (identifier)
        (list
          (integer)
-          (integer))))
+          (integer)))
      (keywords
        (pair
          (keyword
            (atom_literal))
          (binary_operator
            (identifier)
-            (integer))))))
+            (integer)))))))

 =====================================
 for / enumerable / with options and block
@ -42,7 +42,8 @@ end
        (call
          (dot
            (alias)
-            (identifier))))
+            (identifier))
+          (arguments)))
      (keywords
        (pair
          (keyword
@ -50,7 +51,8 @@ end
          (call
            (dot
              (alias)
-              (identifier))))))
+              (identifier))
+            (arguments)))))
    (do_block
      (call
        (dot
@ -71,7 +73,7 @@ for <<c <- " hello world ">>, c != ?\s, into: "", do: <<c>>
  (call
    (identifier)
    (arguments
-      (binary
+      (bitstring
        (binary_operator
          (identifier)
          (string
@ -83,12 +85,11 @@ for <<c <- " hello world ">>, c != ?\s, into: "", do: <<c>>
        (pair
          (keyword
            (atom_literal))
-          (string
-            (string_content)))
+          (string))
        (pair
          (keyword
            (atom_literal))
-          (binary
+          (bitstring
            (identifier)))))))

 =====================================
--- a/test/corpus/integration/spec.txt
+++ b/test/corpus/integration/spec.txt
@ -38,13 +38,17 @@ with type parentheses
            (identifier)
            (arguments
              (call
-                (identifier))
+                (identifier)
+                (arguments))
              (call
-                (identifier))
+                (identifier)
+                (arguments))
              (call
-                (identifier))))
+                (identifier)
+                (arguments))))
          (call
-            (identifier)))))))
+            (identifier)
+            (arguments)))))))

 =====================================
 with literals
@ -68,8 +72,8 @@ with literals
                  (keywords
                    (pair
                      (keyword
-                        (atom_literal)
-                      (identifier))))))))
+                        (atom_literal))
+                      (identifier)))))))
          (binary_operator
            (tuple
              (atom
@ -97,14 +101,16 @@ with function reference
          (call
            (identifier)
            (arguments
-              (stab_clause
-                (body
-                  (identifier)))
-              (stab_clause
-                (arguments
-                  (identifier))
-                (body
-                  (identifier)))))
+              (block
+                (stab_clause
+                  (body
+                    (identifier))))
+              (block
+                (stab_clause
+                  (arguments
+                    (identifier))
+                  (body
+                    (identifier))))))
          (identifier))))))

 =====================================
@ -127,11 +133,13 @@ with remote type
              (call
                (dot
                  (alias)
-                  (identifier)))))
+                  (identifier))
+                (arguments))))
          (call
            (dot
              (alias)
-              (identifier))))))))
+              (identifier))
+            (arguments)))))))

 =====================================
 with type guard
@ -208,10 +216,11 @@ nonempty list
      (identifier)
      (arguments
        (binary_operator
-          (identifier)
+          (call
+            (identifier)
+            (arguments))
          (list
-            (call
-              (identifier))
+            (identifier)
            (identifier)))))))

 =====================================
--- a/test/corpus/term/alias.txt
+++ b/test/corpus/term/alias.txt
@ -8,7 +8,6 @@ AZ_az_19_
 ---

 (source
-  (alias)
  (alias)
  (alias))

@ -21,6 +20,22 @@ Mod.Child.Child

 ---

+(source
+  (alias)
+  (alias))
+
+=====================================
+spacing
+=====================================
+
+Mod . Child
+
+Mod
+.
+Child
+
+---
+
 (source
  (alias)
  (alias))
@ -72,17 +87,12 @@ __MODULE__.Child
 [error] does not support characters outside ASCII
 =====================================

-Modこ
 Ólá
 Olá

 ---

 (source
-  (alias)
  (ERROR
-    (identifier))
-  (ERROR
-    (identifier))
-  (ERROR
-    (identifier)))
+    (atom_literal)
+    (atom_literal)))
--- a/test/corpus/term/atom.txt
+++ b/test/corpus/term/atom.txt
@ -26,12 +26,12 @@ simple literal
 operators
 =====================================

-[:~~~, :~>>, :~>, :|||, :||, :|>, :|, :>>>, :>=, :>, :=~, :===, :==, :=, :<~>, :<~, :<|>, :<>, :<=, :<<~, :<<<, :<-, :<, :+++, :++, :+, :^^^, :^, :&&&, :&&, :&, :\\, :/, :*, :@, :.., :., :!==, :!=, :!, :::, :->, :---, :--, :-]
+[:~~~, :~>>, :~>, :|||, :||, :|>, :|, :>>>, :>=, :>, :=~, :===, :==, :=, :<~>, :<~, :<|>, :<>, :<=, :<<~, :<<<, :<-, :<, :+++, :++, :+, :^^^, :^, :&&&, :&&, :&, :\\, :/, :**, :*, :@, :.., :., :!==, :!=, :!, :::, :->, :---, :--, :-]

 ---

 (source
-  (list (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal))))
+  (list (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal)) (atom (atom_literal))))

 =====================================
 special operator-like atoms
--- a/test/corpus/term/bitstring.txt
+++ b/test/corpus/term/bitstring.txt
@ -80,33 +80,38 @@ multiple modifiers
        (string_content))
      (binary_operator
        (identifier)
-        (identifier)))
+        (identifier))))
+  (bitstring
    (binary_operator
      (string
        (string_content))
      (binary_operator
        (identifier)
-        (identifier)))
+        (identifier))))
+  (bitstring
    (binary_operator
      (string
        (string_content))
      (binary_operator
        (identifier)
-        (identifier)))
+        (identifier))))
+  (bitstring
    (binary_operator
      (integer)
      (binary_operator
        (binary_operator
          (integer)
          (identifier))
-        (identifier)))
+        (identifier))))
+  (bitstring
    (binary_operator
      (integer)
      (binary_operator
        (binary_operator
          (identifier)
          (identifier))
-        (identifier)))
+        (identifier))))
+  (bitstring
    (binary_operator
      (float)
      (binary_operator
@ -143,6 +148,7 @@ multiple components with modifiers
      (binary_operator
        (identifier)
        (call
+          (identifier)
          (arguments
            (identifier)))))))

@ -169,6 +175,7 @@ spacing
      (binary_operator
        (identifier)
        (call
+          (identifier)
          (arguments
            (identifier)))))))

--- a/test/corpus/term/charlist.txt
+++ b/test/corpus/term/charlist.txt
@ -67,7 +67,7 @@ nested interpolation
      (charlist
        (string_content)
        (interpolation
-          (identifier))))
+          (integer))))
    (string_content)))

 =====================================
@ -167,7 +167,8 @@ this is #{
      (charlist
        (string_content)
        (interpolation
-          (identifier))))
+          (integer))
+        (string_content)))
    (string_content)))

 =====================================
@ -186,9 +187,13 @@ heredoc / escaped delimiter

 (source
  (charlist
-    (escape_sequence)
    (string_content)
+    (escape_sequence)
+    (string_content))
  (charlist
+    (string_content)
+    (escape_sequence)
+    (escape_sequence)
    (escape_sequence)
    (string_content)))

@ -204,5 +209,6 @@ heredoc / escaped interpolation

 (source
  (charlist
+    (string_content)
    (escape_sequence)
    (string_content)))
--- a/test/corpus/term/integer.txt
+++ b/test/corpus/term/integer.txt
@ -14,7 +14,6 @@ decimal
  (unary_operator
    (integer))
  (integer)
-  (integer)
  (integer))

 =====================================
--- a/test/corpus/term/keyword_list.txt
+++ b/test/corpus/term/keyword_list.txt
@ -2,13 +2,25 @@
 simple literal
 =====================================

-[a_b@12?: 1, A_B@12!: 2]
+[a: 1, a_b@12?: 2, A_B@12!: 3, Mod: 4, __struct__: 5]

 ---

 (source
  (list
    (keywords
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))
      (pair
        (keyword
          (atom_literal))
@ -83,6 +95,73 @@ operator key
          (atom_literal))
        (integer)))))

+=====================================
+special atom key
+=====================================
+
+[...: 1, %{}: 2, {}: 3, %: 4, <<>>: 5, ..//: 6]
+
+---
+
+(source
+  (list
+    (keywords
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))
+      (pair
+        (keyword
+          (atom_literal))
+        (integer)))))
+
+=====================================
+reserved token key
+=====================================
+
+[not: 1, and: 2]
+[nil: 1, true: 2]
+
+---
+
+(source
+  (list
+    (keywords
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))))
+  (list
+    (keywords
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))
+      (pair
+        (keyword
+          (atom_literal))
+        (integer)))))
+
 =====================================
 quoted key
 =====================================
@ -141,18 +220,21 @@ key interpolation
 [error] with trailing items
 =====================================

-[a: 1, b: 2, 1]
+[a: 1, b: 2, 1 => 1]

 ---

 (source
  (list
+    (keywords
+      (pair
+        (keyword
+          (atom_literal))
+        (integer))
+      (pair
+        (keyword
+          (atom_literal))
+        (integer)))
    (ERROR
-      (keywords
-        (pair
-          (keyword
-            (atom_literal)))
-        (pair
-          (keyword
-            (atom_literal)))))
-    (integer)))
+      (integer)
+      (integer))))
--- a/test/corpus/term/list.txt
+++ b/test/corpus/term/list.txt
@ -3,6 +3,8 @@ simple literal
 =====================================

 []
+[a]
+[A]
 [1]
 [1, 2]
 [1,2]
@ -12,6 +14,10 @@ simple literal

 (source
  (list)
+  (list
+    (identifier))
+  (list
+    (alias))
  (list
    (integer))
  (list
@ -64,8 +70,8 @@ trailing separator

 (source
  (list
-    (ERROR
-      (integer))))
+    (ERROR)
+    (integer)))

 =====================================
 [error] missing separator
--- a/test/corpus/term/map.txt
+++ b/test/corpus/term/map.txt
@ -126,31 +126,32 @@ update syntax
        (binary_operator
          (string
            (string_content))
-          (integer))))))
+          (string
+            (string_content)))))))

 =====================================
 [error] ordering
 =====================================

-%{b: 2, c: 3, "a" => 1}
+%{b: 2, c: 3, 1 => 1}

 ---

 (source
  (map
-    (map_content)
-      (ERROR
-        (keywords
-          (pair
-            (keyword
-              (atom_literal)))
-          (pair
-            (keyword
-              (atom_literal)))))
-      (binary_operator
-        (string
-          (string_content))
-        (integer))))
+    (map_content
+      (keywords
+        (pair
+          (keyword
+            (atom_literal))
+          (integer))
+        (pair
+          (keyword
+            (atom_literal))
+          (integer))))
+    (ERROR
+      (integer)
+      (integer))))

 =====================================
 [error] missing separator
@ -162,40 +163,12 @@ update syntax

 (source
  (map
-    (map_content)
-      (ERROR
-        (binary_operator
-          (string
-            (string_content))
-          (integer)))
+    (map_content
      (binary_operator
        (string
          (string_content))
-        (integer))))
-
-=====================================
-[error] invalid content
-=====================================
-
-%{1}
-%{1, 1}
-%{a, [], {}}
-
---
-
-(source
-  (map
-    (map_content
-      (ERROR
-        (integer))))
-  (map
-    (map_content
-      (ERROR
-        (integer)
-        (integer))))
-  (map
-    (map_content
-      (ERROR
-        (identifier)
-        (list)
-        (tuple)))))
+        (ERROR (integer))
+        (binary_operator
+          (string
+            (string_content))
+          (integer))))))
--- a/test/corpus/term/string.txt
+++ b/test/corpus/term/string.txt
@ -67,7 +67,7 @@ nested interpolation
      (string
        (string_content)
        (interpolation
-          (identifier))))
+          (integer))))
    (string_content)))

 =====================================
@ -167,7 +167,8 @@ this is #{
      (string
        (string_content)
        (interpolation
-          (identifier))))
+          (integer))
+        (string_content)))
    (string_content)))

 =====================================
@ -186,9 +187,13 @@ heredoc / escaped delimiter

 (source
  (string
-    (escape_sequence)
    (string_content)
+    (escape_sequence)
+    (string_content))
  (string
+    (string_content)
+    (escape_sequence)
+    (escape_sequence)
    (escape_sequence)
    (string_content)))

@ -204,6 +209,7 @@ heredoc / escaped interpolation

 (source
  (string
+    (string_content)
    (escape_sequence)
    (string_content)))

--- a/test/corpus/term/struct.txt
+++ b/test/corpus/term/struct.txt
@ -139,7 +139,8 @@ update syntax
        (binary_operator
          (string
            (string_content))
-          (integer))))))
+          (string
+            (string_content)))))))

 =====================================
 unused struct identifier
@ -199,3 +200,47 @@ with special identifier
      (dot
        (special_identifier)
        (alias)))))
+
+=====================================
+with atom
+=====================================
+
+%:"Elixir.Mod"{}
+
+---
+
+(source
+  (map
+    (struct
+      (atom
+        (string_content)))))
+
+=====================================
+with call
+=====================================
+
+%fun(){}
+%Mod.fun(){}
+%fun.(){}
+
+---
+
+(source
+  (map
+    (struct
+      (call
+        (identifier)
+        (arguments))))
+  (map
+    (struct
+      (call
+        (dot
+          (alias)
+          (identifier))
+        (arguments))))
+  (map
+    (struct
+      (call
+        (dot
+          (identifier))
+        (arguments)))))
--- a/test/corpus/term/tuple.txt
+++ b/test/corpus/term/tuple.txt
@ -64,8 +64,8 @@ trailing separator

 (source
  (tuple
-    (ERROR
-      (integer))))
+    (ERROR)
+    (integer)))

 =====================================
 [error] missing separator
--- a/test/corpus/variable.txt
+++ b/test/corpus/variable.txt
@ -7,7 +7,6 @@ camelCase
 az_AZ_19
 bang!
 question?
-__TEST__
 doctest
 not1
 notfalse
@ -22,7 +21,6 @@ notfalse
  (identifier)
  (identifier)
  (identifier)
-  (identifier)
  (identifier))

 =====================================
@ -31,10 +29,12 @@ unused

 _
 _number
+__TEST__

 ---

 (source
+  (unused_identifier)
  (unused_identifier)
  (unused_identifier))