tree-sitter-html/src/scanner.cc

#include <tree_sitter/parser.h>
#include <algorithm>
#include <vector>
#include <string>
#include <cwctype>
#include <cstring>
#include "tag.h"

namespace {

using std::vector;
using std::string;

enum TokenType {
  START_TAG_NAME,
  SCRIPT_START_TAG_NAME,
  STYLE_START_TAG_NAME,
  END_TAG_NAME,
  ERRONEOUS_END_TAG_NAME,
  SELF_CLOSING_TAG_DELIMITER,
  IMPLICIT_END_TAG,
  RAW_TEXT,
  COMMENT
};

struct Scanner {
  Scanner() {}

  unsigned serialize(char *buffer) {
    uint16_t tag_count = tags.size() > UINT16_MAX ? UINT16_MAX : tags.size();
    uint16_t serialized_tag_count = 0;

    unsigned i = sizeof(tag_count);
    std::memcpy(&buffer[i], &tag_count, sizeof(tag_count));
    i += sizeof(tag_count);

    for (; serialized_tag_count < tag_count; serialized_tag_count++) {
      Tag &tag = tags[serialized_tag_count];
      if (tag.type == CUSTOM) {
        unsigned name_length = tag.custom_tag_name.size();
        if (name_length > UINT8_MAX) name_length = UINT8_MAX;
        if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;
        buffer[i++] = static_cast<char>(tag.type);
        buffer[i++] = name_length;
        tag.custom_tag_name.copy(&buffer[i], name_length);
        i += name_length;
      } else {
        if (i + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;
        buffer[i++] = static_cast<char>(tag.type);
      }
    }

    std::memcpy(&buffer[0], &serialized_tag_count, sizeof(serialized_tag_count));
    return i;
  }

  void deserialize(const char *buffer, unsigned length) {
    tags.clear();
    if (length > 0) {
      unsigned i = 0;
      uint16_t tag_count, serialized_tag_count;

      std::memcpy(&serialized_tag_count, &buffer[i], sizeof(serialized_tag_count));
      i += sizeof(serialized_tag_count);

      std::memcpy(&tag_count, &buffer[i], sizeof(tag_count));
      i += sizeof(tag_count);

      tags.resize(tag_count);
      for (unsigned j = 0; j < serialized_tag_count; j++) {
        Tag &tag = tags[j];
        tag.type = static_cast<TagType>(buffer[i++]);
        if (tag.type == CUSTOM) {
          uint16_t name_length = (uint16_t)buffer[i++];
          tag.custom_tag_name.assign(&buffer[i], &buffer[i + name_length]);
          i += name_length;
        }
      }
    }
  }

  string scan_tag_name(TSLexer *lexer) {
    string tag_name;
    while (iswalnum(lexer->lookahead) ||
           lexer->lookahead == '-' ||
           lexer->lookahead == ':') {
      tag_name += towupper(lexer->lookahead);
      lexer->advance(lexer, false);
    }
    return tag_name;
  }

  bool scan_comment(TSLexer *lexer) {
    if (lexer->lookahead != '-') return false;
    lexer->advance(lexer, false);
    if (lexer->lookahead != '-') return false;
    lexer->advance(lexer, false);

    unsigned dashes = 0;
    while (lexer->lookahead) {
      switch (lexer->lookahead) {
        case '-':
          ++dashes;
          break;
        case '>':
          if (dashes >= 2) {
            lexer->result_symbol = COMMENT;
            lexer->advance(lexer, false);
            lexer->mark_end(lexer);
            return true;
          }
        default:
          dashes = 0;
      }
      lexer->advance(lexer, false);
    }
    return false;
  }

  bool scan_raw_text(TSLexer *lexer) {
    if (!tags.size()) return false;

    lexer->mark_end(lexer);

    const string &end_delimiter = tags.back().type == SCRIPT
      ? "</script"
      : "</style";

    unsigned delimiter_index = 0;
    while (lexer->lookahead) {
      if (lexer->lookahead == end_delimiter[delimiter_index]) {
        delimiter_index++;
        if (delimiter_index == end_delimiter.size()) break;
        lexer->advance(lexer, false);
      } else {
        delimiter_index = 0;
        lexer->advance(lexer, false);
        lexer->mark_end(lexer);
      }
    }

    lexer->result_symbol = RAW_TEXT;
    return true;
  }

  bool scan_implicit_end_tag(TSLexer *lexer) {
    Tag *parent = tags.empty() ? NULL : &tags.back();

    bool is_closing_tag = false;
    if (lexer->lookahead == '/') {
      is_closing_tag = true;
      lexer->advance(lexer, false);
    } else {
      if (parent && parent->is_void()) {
        tags.pop_back();
        lexer->result_symbol = IMPLICIT_END_TAG;
        return true;
      }
    }

    string tag_name = scan_tag_name(lexer);
    if (tag_name.empty()) return false;

    Tag next_tag = Tag::for_name(tag_name);

    if (is_closing_tag) {
      // The tag correctly closes the topmost element on the stack
      if (!tags.empty() && tags.back() == next_tag) return false;

      // Otherwise, dig deeper and queue implicit end tags (to be nice in
      // the case of malformed HTML)
      if (std::find(tags.begin(), tags.end(), next_tag) != tags.end()) {
        tags.pop_back();
        lexer->result_symbol = IMPLICIT_END_TAG;
        return true;
      }
    } else if (parent && !parent->can_contain(next_tag)) {
      tags.pop_back();
      lexer->result_symbol = IMPLICIT_END_TAG;
      return true;
    }

    return false;
  }

  bool scan_start_tag_name(TSLexer *lexer) {
    string tag_name = scan_tag_name(lexer);
    if (tag_name.empty()) return false;
    Tag tag = Tag::for_name(tag_name);
    tags.push_back(tag);
    switch (tag.type) {
      case SCRIPT:
        lexer->result_symbol = SCRIPT_START_TAG_NAME;
        break;
      case STYLE:
        lexer->result_symbol = STYLE_START_TAG_NAME;
        break;
      default:
        lexer->result_symbol = START_TAG_NAME;
        break;
    }
    return true;
  }

  bool scan_end_tag_name(TSLexer *lexer) {
    string tag_name = scan_tag_name(lexer);
    if (tag_name.empty()) return false;
    Tag tag = Tag::for_name(tag_name);
    if (!tags.empty() && tags.back() == tag) {
      tags.pop_back();
      lexer->result_symbol = END_TAG_NAME;
    } else {
      lexer->result_symbol = ERRONEOUS_END_TAG_NAME;
    }
    return true;
  }

  bool scan_self_closing_tag_delimiter(TSLexer *lexer) {
    lexer->advance(lexer, false);
    if (lexer->lookahead == '>') {
      lexer->advance(lexer, false);
      if (!tags.empty()) {
        tags.pop_back();
        lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER;
      }
      return true;
    }
    return false;
  }

  bool scan(TSLexer *lexer, const bool *valid_symbols) {
    while (iswspace(lexer->lookahead)) {
      lexer->advance(lexer, true);
    }

    if (valid_symbols[RAW_TEXT] && !valid_symbols[START_TAG_NAME] && !valid_symbols[END_TAG_NAME]) {
      return scan_raw_text(lexer);
    }

    switch (lexer->lookahead) {
      case '<':
        lexer->mark_end(lexer);
        lexer->advance(lexer, false);

        if (lexer->lookahead == '!') {
          lexer->advance(lexer, false);
          return scan_comment(lexer);
        }

        if (valid_symbols[IMPLICIT_END_TAG]) {
          return scan_implicit_end_tag(lexer);
        }
        break;

      case '\0':
        if (valid_symbols[IMPLICIT_END_TAG]) {
          return scan_implicit_end_tag(lexer);
        }
        break;

      case '/':
        if (valid_symbols[SELF_CLOSING_TAG_DELIMITER]) {
          return scan_self_closing_tag_delimiter(lexer);
        }
        break;

      default:
        if (valid_symbols[START_TAG_NAME] || valid_symbols[END_TAG_NAME]) {
          return valid_symbols[START_TAG_NAME]
            ? scan_start_tag_name(lexer)
            : scan_end_tag_name(lexer);
        }
    }

    return false;
  }

  vector<Tag> tags;
};

}

extern "C" {

void *tree_sitter_html_external_scanner_create() {
  return new Scanner();
}

bool tree_sitter_html_external_scanner_scan(void *payload, TSLexer *lexer,
                                            const bool *valid_symbols) {
  Scanner *scanner = static_cast<Scanner *>(payload);
  return scanner->scan(lexer, valid_symbols);
}

unsigned tree_sitter_html_external_scanner_serialize(void *payload, char *buffer) {
  Scanner *scanner = static_cast<Scanner *>(payload);
  return scanner->serialize(buffer);
}

void tree_sitter_html_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
  Scanner *scanner = static_cast<Scanner *>(payload);
  scanner->deserialize(buffer, length);
}

void tree_sitter_html_external_scanner_destroy(void *payload) {
  Scanner *scanner = static_cast<Scanner *>(payload);
  delete scanner;
}

}
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`#include <tree_sitter/parser.h>`
			`#include <algorithm>`
			`#include <vector>`
			`#include <string>`
Use toupper, iswspace from cwctype header 2018-06-12 00:12:34 +00:00			`#include <cwctype>`
Use std::memcpy from <cstring> 2018-07-18 18:30:49 +00:00			`#include <cstring>`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`#include "tag.h"`

			`namespace {`

			`using std::vector;`
			`using std::string;`

			`enum TokenType {`
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`START_TAG_NAME,`
Make distinct nodes for style/script, set up injection 2018-12-15 01:43:41 +00:00			`SCRIPT_START_TAG_NAME,`
			`STYLE_START_TAG_NAME,`
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`END_TAG_NAME,`
			`ERRONEOUS_END_TAG_NAME,`
			`SELF_CLOSING_TAG_DELIMITER,`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`IMPLICIT_END_TAG,`
Add raw text elements and doctypes Co-Authored-By: Ashi Krishnan <queerviolet@github.com> 2018-06-11 23:56:33 +00:00			`RAW_TEXT,`
			`COMMENT`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`};`

			`struct Scanner {`
			`Scanner() {}`

			`unsigned serialize(char *buffer) {`
Always serialize the full depth of the tag stack 2018-08-29 18:01:00 +00:00			`uint16_t tag_count = tags.size() > UINT16_MAX ? UINT16_MAX : tags.size();`
			`uint16_t serialized_tag_count = 0;`

Serialize as many tags as possible 2018-08-07 20:38:54 +00:00			`unsigned i = sizeof(tag_count);`
Always serialize the full depth of the tag stack 2018-08-29 18:01:00 +00:00			`std::memcpy(&buffer[i], &tag_count, sizeof(tag_count));`
			`i += sizeof(tag_count);`
Serialize as many tags as possible 2018-08-07 20:38:54 +00:00
Always serialize the full depth of the tag stack 2018-08-29 18:01:00 +00:00			`for (; serialized_tag_count < tag_count; serialized_tag_count++) {`
			`Tag &tag = tags[serialized_tag_count];`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`if (tag.type == CUSTOM) {`
Include tag stack size when serializing external scanner 2018-06-18 17:04:46 +00:00			`unsigned name_length = tag.custom_tag_name.size();`
Always serialize the full depth of the tag stack 2018-08-29 18:01:00 +00:00			`if (name_length > UINT8_MAX) name_length = UINT8_MAX;`
Serialize as many tags as possible 2018-08-07 20:38:54 +00:00			`if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;`
Fix memory errors for deeply-nested HTML 2018-07-18 18:24:15 +00:00			`buffer[i++] = static_cast<char>(tag.type);`
Include tag stack size when serializing external scanner 2018-06-18 17:04:46 +00:00			`buffer[i++] = name_length;`
			`tag.custom_tag_name.copy(&buffer[i], name_length);`
			`i += name_length;`
Fix memory errors for deeply-nested HTML 2018-07-18 18:24:15 +00:00			`} else {`
Serialize as many tags as possible 2018-08-07 20:38:54 +00:00			`if (i + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;`
Fix memory errors for deeply-nested HTML 2018-07-18 18:24:15 +00:00			`buffer[i++] = static_cast<char>(tag.type);`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`}`
			`}`
Serialize as many tags as possible 2018-08-07 20:38:54 +00:00
Always serialize the full depth of the tag stack 2018-08-29 18:01:00 +00:00			`std::memcpy(&buffer[0], &serialized_tag_count, sizeof(serialized_tag_count));`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`return i;`
			`}`

			`void deserialize(const char *buffer, unsigned length) {`
			`tags.clear();`
Include tag stack size when serializing external scanner 2018-06-18 17:04:46 +00:00			`if (length > 0) {`
			`unsigned i = 0;`
Always serialize the full depth of the tag stack 2018-08-29 18:01:00 +00:00			`uint16_t tag_count, serialized_tag_count;`

			`std::memcpy(&serialized_tag_count, &buffer[i], sizeof(serialized_tag_count));`
			`i += sizeof(serialized_tag_count);`

			`std::memcpy(&tag_count, &buffer[i], sizeof(tag_count));`
			`i += sizeof(tag_count);`

			`tags.resize(tag_count);`
			`for (unsigned j = 0; j < serialized_tag_count; j++) {`
Include tag stack size when serializing external scanner 2018-06-18 17:04:46 +00:00			`Tag &tag = tags[j];`
			`tag.type = static_cast<TagType>(buffer[i++]);`
			`if (tag.type == CUSTOM) {`
Always serialize the full depth of the tag stack 2018-08-29 18:01:00 +00:00			`uint16_t name_length = (uint16_t)buffer[i++];`
Include tag stack size when serializing external scanner 2018-06-18 17:04:46 +00:00			`tag.custom_tag_name.assign(&buffer[i], &buffer[i + name_length]);`
			`i += name_length;`
			`}`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`}`
			`}`
			`}`

			`string scan_tag_name(TSLexer *lexer) {`
			`string tag_name;`
Include tag stack size when serializing external scanner 2018-06-18 17:04:46 +00:00			`while (iswalnum(lexer->lookahead) \|\|`
			`lexer->lookahead == '-' \|\|`
			`lexer->lookahead == ':') {`
Use toupper, iswspace from cwctype header 2018-06-12 00:12:34 +00:00			`tag_name += towupper(lexer->lookahead);`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`lexer->advance(lexer, false);`
			`}`
			`return tag_name;`
			`}`

Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`bool scan_comment(TSLexer *lexer) {`
Add comments, allow slashes in unquoted attribute values Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:36:18 +00:00			`if (lexer->lookahead != '-') return false;`
			`lexer->advance(lexer, false);`
			`if (lexer->lookahead != '-') return false;`
			`lexer->advance(lexer, false);`

			`unsigned dashes = 0;`
Use explicit types instead of auto 2018-06-15 22:32:21 +00:00			`while (lexer->lookahead) {`
			`switch (lexer->lookahead) {`
Add comments, allow slashes in unquoted attribute values Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:36:18 +00:00			`case '-':`
			`++dashes;`
			`break;`
			`case '>':`
			`if (dashes >= 2) {`
			`lexer->result_symbol = COMMENT;`
			`lexer->advance(lexer, false);`
			`lexer->mark_end(lexer);`
			`return true;`
			`}`
			`default:`
			`dashes = 0;`
			`}`
			`lexer->advance(lexer, false);`
			`}`
			`return false;`
			`}`

Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`bool scan_raw_text(TSLexer *lexer) {`
Add raw text elements and doctypes Co-Authored-By: Ashi Krishnan <queerviolet@github.com> 2018-06-11 23:56:33 +00:00			`if (!tags.size()) return false;`

			`lexer->mark_end(lexer);`

			`const string &end_delimiter = tags.back().type == SCRIPT`
			`? "</script"`
			`: "</style";`

			`unsigned delimiter_index = 0;`
			`while (lexer->lookahead) {`
			`if (lexer->lookahead == end_delimiter[delimiter_index]) {`
			`delimiter_index++;`
			`if (delimiter_index == end_delimiter.size()) break;`
Fix range of script tag content 2018-06-29 03:32:27 +00:00			`lexer->advance(lexer, false);`
Add raw text elements and doctypes Co-Authored-By: Ashi Krishnan <queerviolet@github.com> 2018-06-11 23:56:33 +00:00			`} else {`
			`delimiter_index = 0;`
Fix range of script tag content 2018-06-29 03:32:27 +00:00			`lexer->advance(lexer, false);`
Add raw text elements and doctypes Co-Authored-By: Ashi Krishnan <queerviolet@github.com> 2018-06-11 23:56:33 +00:00			`lexer->mark_end(lexer);`
			`}`
			`}`

			`lexer->result_symbol = RAW_TEXT;`
			`return true;`
			`}`

Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`bool scan_implicit_end_tag(TSLexer *lexer) {`
Use NULL instead of nullptr 2018-06-18 17:17:38 +00:00			`Tag *parent = tags.empty() ? NULL : &tags.back();`
Handle elements with optional end tags (li, p, etc) Co-Authored-By: Ashi Krishnan <queerviolet@github.com> 2018-06-12 17:51:03 +00:00
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`bool is_closing_tag = false;`
			`if (lexer->lookahead == '/') {`
			`is_closing_tag = true;`
			`lexer->advance(lexer, false);`
			`} else {`
			`if (parent && parent->is_void()) {`
			`tags.pop_back();`
			`lexer->result_symbol = IMPLICIT_END_TAG;`
			`return true;`
			`}`
Add comments, allow slashes in unquoted attribute values Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:36:18 +00:00			`}`

Use explicit types instead of auto 2018-06-15 22:32:21 +00:00			`string tag_name = scan_tag_name(lexer);`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`if (tag_name.empty()) return false;`

Handle elements with optional end tags (li, p, etc) Co-Authored-By: Ashi Krishnan <queerviolet@github.com> 2018-06-12 17:51:03 +00:00			`Tag next_tag = Tag::for_name(tag_name);`

Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`if (is_closing_tag) {`
			`// The tag correctly closes the topmost element on the stack`
Fix crash on unexpected closing tag after error 2018-06-15 22:35:50 +00:00			`if (!tags.empty() && tags.back() == next_tag) return false;`
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00
			`// Otherwise, dig deeper and queue implicit end tags (to be nice in`
			`// the case of malformed HTML)`
			`if (std::find(tags.begin(), tags.end(), next_tag) != tags.end()) {`
			`tags.pop_back();`
			`lexer->result_symbol = IMPLICIT_END_TAG;`
			`return true;`
			`}`
			`} else if (parent && !parent->can_contain(next_tag)) {`
Handle elements with optional end tags (li, p, etc) Co-Authored-By: Ashi Krishnan <queerviolet@github.com> 2018-06-12 17:51:03 +00:00			`tags.pop_back();`
			`lexer->result_symbol = IMPLICIT_END_TAG;`
			`return true;`
			`}`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`return false;`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`}`

Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`bool scan_start_tag_name(TSLexer *lexer) {`
Use explicit types instead of auto 2018-06-15 22:32:21 +00:00			`string tag_name = scan_tag_name(lexer);`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`if (tag_name.empty()) return false;`
			`Tag tag = Tag::for_name(tag_name);`
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`tags.push_back(tag);`
Make distinct nodes for style/script, set up injection 2018-12-15 01:43:41 +00:00			`switch (tag.type) {`
			`case SCRIPT:`
			`lexer->result_symbol = SCRIPT_START_TAG_NAME;`
			`break;`
			`case STYLE:`
			`lexer->result_symbol = STYLE_START_TAG_NAME;`
			`break;`
			`default:`
			`lexer->result_symbol = START_TAG_NAME;`
			`break;`
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`}`
			`return true;`
			`}`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`bool scan_end_tag_name(TSLexer *lexer) {`
Use explicit types instead of auto 2018-06-15 22:32:21 +00:00			`string tag_name = scan_tag_name(lexer);`
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`if (tag_name.empty()) return false;`
			`Tag tag = Tag::for_name(tag_name);`
			`if (!tags.empty() && tags.back() == tag) {`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`tags.pop_back();`
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`lexer->result_symbol = END_TAG_NAME;`
			`} else {`
			`lexer->result_symbol = ERRONEOUS_END_TAG_NAME;`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`}`
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`return true;`
			`}`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`bool scan_self_closing_tag_delimiter(TSLexer *lexer) {`
			`lexer->advance(lexer, false);`
			`if (lexer->lookahead == '>') {`
			`lexer->advance(lexer, false);`
Fixed error where empty vector was being popped from Co-authored-by: Rahul Zhade <zhade3@github.com> 2018-08-07 18:18:18 +00:00			`if (!tags.empty()) {`
			`tags.pop_back();`
			`lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER;`
			`}`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`return true;`
			`}`
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`return false;`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`}`

			`bool scan(TSLexer lexer, const bool valid_symbols) {`
			`while (iswspace(lexer->lookahead)) {`
			`lexer->advance(lexer, true);`
			`}`

Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`if (valid_symbols[RAW_TEXT] && !valid_symbols[START_TAG_NAME] && !valid_symbols[END_TAG_NAME]) {`
			`return scan_raw_text(lexer);`
Add raw text elements and doctypes Co-Authored-By: Ashi Krishnan <queerviolet@github.com> 2018-06-11 23:56:33 +00:00			`}`

Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`switch (lexer->lookahead) {`
			`case '<':`
Add comments, allow slashes in unquoted attribute values Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:36:18 +00:00			`lexer->mark_end(lexer);`
			`lexer->advance(lexer, false);`

			`if (lexer->lookahead == '!') {`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`lexer->advance(lexer, false);`
Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`return scan_comment(lexer);`
Add comments, allow slashes in unquoted attribute values Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:36:18 +00:00			`}`

Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`if (valid_symbols[IMPLICIT_END_TAG]) {`
			`return scan_implicit_end_tag(lexer);`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`}`
			`break;`

Handle void tags at EOF 2018-06-12 21:06:16 +00:00			`case '\0':`
			`if (valid_symbols[IMPLICIT_END_TAG]) {`
			`return scan_implicit_end_tag(lexer);`
			`}`
			`break;`

Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`case '/':`
			`if (valid_symbols[SELF_CLOSING_TAG_DELIMITER]) {`
			`return scan_self_closing_tag_delimiter(lexer);`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`}`
			`break;`

Parse tag names as separate tokens 2018-06-12 19:20:13 +00:00			`default:`
			`if (valid_symbols[START_TAG_NAME] \|\| valid_symbols[END_TAG_NAME]) {`
			`return valid_symbols[START_TAG_NAME]`
			`? scan_start_tag_name(lexer)`
			`: scan_end_tag_name(lexer);`
Start tracking element nesting in external scanner Co-Authored-By: Ashi Krishan <queerviolet@github.com> 2018-06-11 22:12:01 +00:00			`}`
			`}`

			`return false;`
			`}`

			`vector<Tag> tags;`
			`};`

			`}`

			`extern "C" {`

			`void *tree_sitter_html_external_scanner_create() {`
			`return new Scanner();`
			`}`

			`bool tree_sitter_html_external_scanner_scan(void payload, TSLexer lexer,`
			`const bool *valid_symbols) {`
			`Scanner scanner = static_cast<Scanner >(payload);`
			`return scanner->scan(lexer, valid_symbols);`
			`}`

			`unsigned tree_sitter_html_external_scanner_serialize(void payload, char buffer) {`
			`Scanner scanner = static_cast<Scanner >(payload);`
			`return scanner->serialize(buffer);`
			`}`

			`void tree_sitter_html_external_scanner_deserialize(void payload, const char buffer, unsigned length) {`
			`Scanner scanner = static_cast<Scanner >(payload);`
			`scanner->deserialize(buffer, length);`
			`}`

			`void tree_sitter_html_external_scanner_destroy(void *payload) {`
			`Scanner scanner = static_cast<Scanner >(payload);`
			`delete scanner;`
			`}`

			`}`