From b21591e6d3b76e7b54cad989b92d505e9e84b430 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 4 Mar 2021 14:10:32 -0800 Subject: [PATCH] Bump tree-sitter-cli to 0.19 --- .gitignore | 2 + .npmignore | 2 + Cargo.toml | 25 +++++ binding.gyp | 2 +- {src => bindings/node}/binding.cc | 0 bindings/node/index.js | 19 ++++ bindings/rust/build.rs | 25 +++++ bindings/rust/lib.rs | 52 ++++++++++ index.js | 13 --- package.json | 4 +- src/grammar.json | 3 +- src/node-types.json | 4 + src/parser.c | 24 +++-- src/tree_sitter/parser.h | 158 ++++++++++++++---------------- 14 files changed, 223 insertions(+), 110 deletions(-) create mode 100644 Cargo.toml rename {src => bindings/node}/binding.cc (100%) create mode 100644 bindings/node/index.js create mode 100644 bindings/rust/build.rs create mode 100644 bindings/rust/lib.rs delete mode 100644 index.js diff --git a/.gitignore b/.gitignore index ad15b4f..8edaf8d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ node_modules build *.log package-lock.json +target +Cargo.lock diff --git a/.npmignore b/.npmignore index 3e9f164..e9c0f7e 100644 --- a/.npmignore +++ b/.npmignore @@ -2,3 +2,5 @@ corpus examples build script +target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..e65e954 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "tree-sitter-html" +description = "html grammar for the tree-sitter parsing library" +version = "0.19.0" +keywords = ["incremental", "parsing", "html"] +categories = ["parsing", "text-editors"] +repository = "https://github.com/tree-sitter/tree-sitter-html" +edition = "2018" + +build = "bindings/rust/build.rs" +include = [ + "bindings/rust/*", + "grammar.js", + "queries/*", + "src/*", +] + +[lib] +path = "bindings/rust/lib.rs" + +[dependencies] +tree-sitter = "0.19" + +[build-dependencies] +cc = "1.0" diff --git a/binding.gyp b/binding.gyp index 5ed124d..9837a89 100644 --- a/binding.gyp +++ b/binding.gyp @@ -9,7 +9,7 @@ "sources": [ "src/parser.c", "src/scanner.cc", - "src/binding.cc" + "bindings/node/binding.cc" ], "cflags_c": [ "-std=c99", diff --git a/src/binding.cc b/bindings/node/binding.cc similarity index 100% rename from src/binding.cc rename to bindings/node/binding.cc diff --git a/bindings/node/index.js b/bindings/node/index.js new file mode 100644 index 0000000..801b0d0 --- /dev/null +++ b/bindings/node/index.js @@ -0,0 +1,19 @@ +try { + module.exports = require("../../build/Release/tree_sitter_html_binding"); +} catch (error1) { + if (error1.code !== 'MODULE_NOT_FOUND') { + throw error1; + } + try { + module.exports = require("../../build/Debug/tree_sitter_html_binding"); + } catch (error2) { + if (error2.code !== 'MODULE_NOT_FOUND') { + throw error2; + } + throw error1 + } +} + +try { + module.exports.nodeTypeInfo = require("../../src/node-types.json"); +} catch (_) {} diff --git a/bindings/rust/build.rs b/bindings/rust/build.rs new file mode 100644 index 0000000..65350f1 --- /dev/null +++ b/bindings/rust/build.rs @@ -0,0 +1,25 @@ +fn main() { + let src_dir = std::path::Path::new("src"); + + let mut c_config = cc::Build::new(); + c_config.include(&src_dir); + c_config + .flag_if_supported("-Wno-unused-parameter") + .flag_if_supported("-Wno-unused-but-set-variable") + .flag_if_supported("-Wno-trigraphs"); + let parser_path = src_dir.join("parser.c"); + c_config.file(&parser_path); + println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); + c_config.compile("parser"); + + let mut cpp_config = cc::Build::new(); + cpp_config.cpp(true); + cpp_config.include(&src_dir); + cpp_config + .flag_if_supported("-Wno-unused-parameter") + .flag_if_supported("-Wno-unused-but-set-variable"); + let scanner_path = src_dir.join("scanner.cc"); + cpp_config.file(&scanner_path); + println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); + cpp_config.compile("scanner"); +} diff --git a/bindings/rust/lib.rs b/bindings/rust/lib.rs new file mode 100644 index 0000000..88b6abd --- /dev/null +++ b/bindings/rust/lib.rs @@ -0,0 +1,52 @@ +//! This crate provides html language support for the [tree-sitter][] parsing library. +//! +//! Typically, you will use the [language][language func] function to add this language to a +//! tree-sitter [Parser][], and then use the parser to parse some code: +//! +//! ``` +//! let code = ""; +//! let mut parser = tree_sitter::Parser::new(); +//! parser.set_language(tree_sitter_html::language()).expect("Error loading html grammar"); +//! let tree = parser.parse(code, None).unwrap(); +//! ``` +//! +//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html +//! [language func]: fn.language.html +//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html +//! [tree-sitter]: https://tree-sitter.github.io/ + +use tree_sitter::Language; + +extern "C" { + fn tree_sitter_html() -> Language; +} + +/// Get the tree-sitter [Language][] for this grammar. +/// +/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html +pub fn language() -> Language { + unsafe { tree_sitter_html() } +} + +/// The content of the [`node-types.json`][] file for this grammar. +/// +/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types +pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json"); + +// Uncomment these to include any queries that this grammar contains + +// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm"); +// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm"); +// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm"); +// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm"); + +#[cfg(test)] +mod tests { + #[test] + fn test_can_load_grammar() { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(super::language()) + .expect("Error loading html language"); + } +} diff --git a/index.js b/index.js deleted file mode 100644 index 4da1230..0000000 --- a/index.js +++ /dev/null @@ -1,13 +0,0 @@ -try { - module.exports = require("./build/Release/tree_sitter_html_binding"); -} catch (error) { - try { - module.exports = require("./build/Debug/tree_sitter_html_binding"); - } catch (_) { - throw error - } -} - -try { - module.exports.nodeTypeInfo = require("./src/node-types.json"); -} catch (_) {} diff --git a/package.json b/package.json index 3bf1c72..a08a5d0 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "tree-sitter-html", "version": "0.16.0", "description": "HTML grammar for tree-sitter", - "main": "index.js", + "main": "bindings/node", "keywords": [ "parser", "lexer" @@ -16,7 +16,7 @@ "nan": "^2.14.0" }, "devDependencies": { - "tree-sitter-cli": "^0.16.9" + "tree-sitter-cli": "^0.19.1" }, "scripts": { "test": "tree-sitter test && tree-sitter parse examples/*.html --quiet --time", diff --git a/src/grammar.json b/src/grammar.json index fb4c6ac..736a0fa 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -361,7 +361,7 @@ }, "attribute_name": { "type": "PATTERN", - "value": "[^<>\"'\\/=\\s]+" + "value": "[^<>\"'/=\\s]+" }, "attribute_value": { "type": "PATTERN", @@ -448,6 +448,7 @@ } ], "conflicts": [], + "precedences": [], "externals": [ { "type": "SYMBOL", diff --git a/src/node-types.json b/src/node-types.json index 65f158a..5d975ea 100644 --- a/src/node-types.json +++ b/src/node-types.json @@ -278,6 +278,10 @@ "type": "attribute_value", "named": true }, + { + "type": "comment", + "named": true + }, { "type": "doctype", "named": false diff --git a/src/parser.c b/src/parser.c index 911aef5..8da4d90 100644 --- a/src/parser.c +++ b/src/parser.c @@ -5,7 +5,7 @@ #pragma GCC diagnostic ignored "-Wmissing-field-initializers" #endif -#define LANGUAGE_VERSION 11 +#define LANGUAGE_VERSION 13 #define STATE_COUNT 94 #define LARGE_STATE_COUNT 2 #define SYMBOL_COUNT 40 @@ -14,6 +14,7 @@ #define EXTERNAL_TOKEN_COUNT 9 #define FIELD_COUNT 0 #define MAX_ALIAS_SEQUENCE_LENGTH 4 +#define PRODUCTION_ID_COUNT 1 enum { anon_sym_LT_BANG = 1, @@ -306,10 +307,14 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { }, }; -static TSSymbol ts_alias_sequences[1][MAX_ALIAS_SEQUENCE_LENGTH] = { +static TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = { [0] = {0}, }; +static uint16_t ts_non_terminal_alias_map[] = { + 0, +}; + static bool ts_lex(TSLexer *lexer, TSStateId state) { START_LEXER(); eof = lexer->eof(lexer); @@ -1886,20 +1891,23 @@ extern const TSLanguage *tree_sitter_html(void) { .symbol_count = SYMBOL_COUNT, .alias_count = ALIAS_COUNT, .token_count = TOKEN_COUNT, + .external_token_count = EXTERNAL_TOKEN_COUNT, + .state_count = STATE_COUNT, .large_state_count = LARGE_STATE_COUNT, - .symbol_metadata = ts_symbol_metadata, - .parse_table = (const unsigned short *)ts_parse_table, + .production_id_count = PRODUCTION_ID_COUNT, + .field_count = FIELD_COUNT, + .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, + .parse_table = (const uint16_t *)ts_parse_table, .small_parse_table = (const uint16_t *)ts_small_parse_table, .small_parse_table_map = (const uint32_t *)ts_small_parse_table_map, .parse_actions = ts_parse_actions, - .lex_modes = ts_lex_modes, .symbol_names = ts_symbol_names, + .symbol_metadata = ts_symbol_metadata, .public_symbol_map = ts_symbol_map, + .alias_map = ts_non_terminal_alias_map, .alias_sequences = (const TSSymbol *)ts_alias_sequences, - .field_count = FIELD_COUNT, - .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, + .lex_modes = ts_lex_modes, .lex_fn = ts_lex, - .external_token_count = EXTERNAL_TOKEN_COUNT, .external_scanner = { (const bool *)ts_external_scanner_states, ts_external_scanner_symbol_map, diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h index 11bf4fc..a3a87bd 100644 --- a/src/tree_sitter/parser.h +++ b/src/tree_sitter/parser.h @@ -13,6 +13,8 @@ extern "C" { #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 +typedef uint16_t TSStateId; + #ifndef TREE_SITTER_API_H_ typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; @@ -30,11 +32,10 @@ typedef struct { uint16_t length; } TSFieldMapSlice; -typedef uint16_t TSStateId; - typedef struct { - bool visible : 1; - bool named : 1; + bool visible; + bool named; + bool supertype; } TSSymbolMetadata; typedef struct TSLexer TSLexer; @@ -56,21 +57,21 @@ typedef enum { TSParseActionTypeRecover, } TSParseActionType; -typedef struct { - union { - struct { - TSStateId state; - bool extra : 1; - bool repetition : 1; - } shift; - struct { - TSSymbol symbol; - int16_t dynamic_precedence; - uint8_t child_count; - uint8_t production_id; - } reduce; - } params; - TSParseActionType type : 4; +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; } TSParseAction; typedef struct { @@ -82,7 +83,7 @@ typedef union { TSParseAction action; struct { uint8_t count; - bool reusable : 1; + bool reusable; } entry; } TSParseActionEntry; @@ -92,13 +93,24 @@ struct TSLanguage { uint32_t alias_count; uint32_t token_count; uint32_t external_token_count; - const char **symbol_names; - const TSSymbolMetadata *symbol_metadata; - const uint16_t *parse_table; - const TSParseActionEntry *parse_actions; - const TSLexMode *lex_modes; - const TSSymbol *alias_sequences; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSParseActionEntry *parse_actions; + const char **symbol_names; + const char **field_names; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; + const TSSymbol *alias_sequences; + const TSLexMode *lex_modes; bool (*lex_fn)(TSLexer *, TSStateId); bool (*keyword_lex_fn)(TSLexer *, TSStateId); TSSymbol keyword_capture_token; @@ -111,14 +123,6 @@ struct TSLanguage { unsigned (*serialize)(void *, char *); void (*deserialize)(void *, const char *, unsigned); } external_scanner; - uint32_t field_count; - const TSFieldMapSlice *field_map_slices; - const TSFieldMapEntry *field_map_entries; - const char **field_names; - uint32_t large_state_count; - const uint16_t *small_parse_table; - const uint32_t *small_parse_table_map; - const TSSymbol *public_symbol_map; }; /* @@ -167,66 +171,50 @@ struct TSLanguage { #define ACTIONS(id) id -#define SHIFT(state_value) \ - { \ - { \ - .params = { \ - .shift = { \ - .state = state_value \ - } \ - }, \ - .type = TSParseActionTypeShift \ - } \ - } +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value \ + } \ + }} #define SHIFT_REPEAT(state_value) \ - { \ - { \ - .params = { \ - .shift = { \ - .state = state_value, \ - .repetition = true \ - } \ - }, \ - .type = TSParseActionTypeShift \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value, \ + .repetition = true \ } \ - } - -#define RECOVER() \ - { \ - { .type = TSParseActionTypeRecover } \ - } + }} #define SHIFT_EXTRA() \ - { \ - { \ - .params = { \ - .shift = { \ - .extra = true \ - } \ - }, \ - .type = TSParseActionTypeShift \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ } \ - } + }} #define REDUCE(symbol_val, child_count_val, ...) \ - { \ - { \ - .params = { \ - .reduce = { \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - __VA_ARGS__ \ - }, \ - }, \ - .type = TSParseActionTypeReduce \ - } \ - } + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_val, \ + .child_count = child_count_val, \ + __VA_ARGS__ \ + }, \ + }} -#define ACCEPT_INPUT() \ - { \ - { .type = TSParseActionTypeAccept } \ - } +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} #ifdef __cplusplus }