Compare commits

..

5 Commits

Author SHA1 Message Date
Shadowfacts 2b49f95b16 tree-sitter 0.20, --abi=14 2022-05-28 10:40:53 -04:00
Matt Massicotte 29f53d8f4f
Makefile and C bindings (#38) 2022-05-06 09:01:49 -07:00
Microsoft Provenance Contributions 161a92474a
Update package.json to include the repository key (#31)
With the rise in supply chain attacks and OSS dependencies being used as a attack vector, Microsoft is working with our ecosystem partners, such as the Linux Foundation's OpenSSF, to enable OSS consumers to track packages back to their public sources.
We've identified that the following packages published to NPM do not report where sources can be found, typically accomplished by including a link to your GitHub repository in your `package.json` REPOSITORY field. This PR was created to add this value, ensuring future releases will include this provenance information.
Published NPM packages with repository information:
* tree-sitter-html
2021-08-17 11:20:56 -07:00
Santos Gallegos af9339f3de
Dont include trailing spaces in text nodes (#27)
* Don't include trailing spaces in text nodes

* Update tests
2021-07-11 11:04:28 -07:00
Max Brunsfeld d93af487cc 0.19.0 2021-03-04 14:11:18 -08:00
12 changed files with 535 additions and 316 deletions

6
.gitignore vendored
View File

@ -4,3 +4,9 @@ build
package-lock.json package-lock.json
target target
Cargo.lock Cargo.lock
*.a
*.dylib
*.so
*.o
bindings/c/*.h
bindings/c/*.pc

View File

@ -1,7 +1,7 @@
[package] [package]
name = "tree-sitter-html" name = "tree-sitter-html"
description = "html grammar for the tree-sitter parsing library" description = "html grammar for the tree-sitter parsing library"
version = "0.19.0" version = "0.20.0"
keywords = ["incremental", "parsing", "html"] keywords = ["incremental", "parsing", "html"]
categories = ["parsing", "text-editors"] categories = ["parsing", "text-editors"]
repository = "https://github.com/tree-sitter/tree-sitter-html" repository = "https://github.com/tree-sitter/tree-sitter-html"
@ -19,7 +19,7 @@ include = [
path = "bindings/rust/lib.rs" path = "bindings/rust/lib.rs"
[dependencies] [dependencies]
tree-sitter = "0.19" tree-sitter = "0.20"
[build-dependencies] [build-dependencies]
cc = "1.0" cc = "1.0"

99
Makefile Normal file
View File

@ -0,0 +1,99 @@
VERSION := 0.19.0
# Repository
SRC_DIR := src
PARSER_REPO_URL ?= $(shell git -C $(SRC_DIR) remote get-url origin )
# the # in the sed pattern has to be escaped or it will be interpreted as a comment
PARSER_NAME ?= $(shell basename $(PARSER_REPO_URL) | cut -d '-' -f3 | sed 's\#.git\#\#')
UPPER_PARSER_NAME := $(shell echo $(PARSER_NAME) | tr a-z A-Z )
# install directory layout
PREFIX ?= /usr/local
INCLUDEDIR ?= $(PREFIX)/include
LIBDIR ?= $(PREFIX)/lib
PCLIBDIR ?= $(LIBDIR)/pkgconfig
# collect C++ sources, and link if necessary
CPPSRC := $(wildcard $(SRC_DIR)/*.cc)
ifeq (, $(CPPSRC))
ADDITIONALLIBS :=
else
ADDITIONALLIBS := -lc++
endif
# collect sources
SRC := $(wildcard $(SRC_DIR)/*.c)
SRC += $(CPPSRC)
OBJ := $(addsuffix .o,$(basename $(SRC)))
# ABI versioning
SONAME_MAJOR := 0
SONAME_MINOR := 0
CFLAGS ?= -O3 -Wall -Wextra -I$(SRC_DIR)
CXXFLAGS ?= -O3 -Wall -Wextra -I$(SRC_DIR)
override CFLAGS += -std=gnu99 -fPIC
override CXXFLAGS += -fPIC
# OS-specific bits
ifeq ($(shell uname),Darwin)
SOEXT = dylib
SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
LINKSHARED := $(LINKSHARED)-dynamiclib -Wl,
ifneq ($(ADDITIONALLIBS),)
LINKSHARED := $(LINKSHARED)$(ADDITIONALLIBS),
endif
LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/libtree-sitter-$(PARSER_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks
else
SOEXT = so
SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
LINKSHARED := $(LINKSHARED)-shared -Wl,
ifneq ($(ADDITIONALLIBS),)
LINKSHARED := $(LINKSHARED)$(ADDITIONALLIBS)
endif
LINKSHARED := $(LINKSHARED)-soname,libtree-sitter-$(PARSER_NAME).so.$(SONAME_MAJOR)
endif
ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly))
PCLIBDIR := $(PREFIX)/libdata/pkgconfig
endif
all: libtree-sitter-$(PARSER_NAME).a libtree-sitter-$(PARSER_NAME).$(SOEXTVER) bindings/c/$(PARSER_NAME).h
libtree-sitter-$(PARSER_NAME).a: $(OBJ)
$(AR) rcs $@ $^
libtree-sitter-$(PARSER_NAME).$(SOEXTVER): $(OBJ)
$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
ln -sf $@ libtree-sitter-$(PARSER_NAME).$(SOEXT)
ln -sf $@ libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR)
bindings/c/$(PARSER_NAME).h:
sed -e 's|@UPPER_PARSERNAME@|$(UPPER_PARSER_NAME)|' \
-e 's|@PARSERNAME@|$(PARSER_NAME)|' \
bindings/c/tree-sitter.h.in > $@
install: all
install -d '$(DESTDIR)$(LIBDIR)'
install -m755 libtree-sitter-$(PARSER_NAME).a '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).a
install -m755 libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXTVER)
ln -sf libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR)
ln -sf libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXT)
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
install -m644 bindings/c/$(PARSER_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/
install -d '$(DESTDIR)$(PCLIBDIR)'
sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \
-e 's|=$(PREFIX)|=$${prefix}|' \
-e 's|@PREFIX@|$(PREFIX)|' \
-e 's|@ADDITIONALLIBS@|$(ADDITIONALLIBS)|' \
-e 's|@PARSERNAME@|$(PARSER_NAME)|' \
-e 's|@PARSERREPOURL@|$(PARSER_REPO_URL)|' \
bindings/c/tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter-$(PARSER_NAME).pc
clean:
rm -f $(OBJ) libtree-sitter-$(PARSER_NAME).a libtree-sitter-$(PARSER_NAME).$(SOEXT) libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR) libtree-sitter-$(PARSER_NAME).$(SOEXTVER) bindings/c/$(PARSER_NAME).h
.PHONY: all install clean

View File

@ -0,0 +1,16 @@
#ifndef TREE_SITTER_@UPPER_PARSERNAME@_H_
#define TREE_SITTER_@UPPER_PARSERNAME@_H_
#include <tree_sitter/parser.h>
#ifdef __cplusplus
extern "C" {
#endif
extern TSLanguage *tree_sitter_@PARSERNAME@();
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_@UPPER_PARSERNAME@_H_

View File

@ -0,0 +1,11 @@
prefix=@PREFIX@
libdir=@LIBDIR@
includedir=@INCLUDEDIR@
additionallibs=@ADDITIONALLIBS@
Name: tree-sitter-@PARSERNAME@
Description: A tree-sitter grammar for the @PARSERNAME@ programming language.
URL: @PARSERREPOURL@
Version: @VERSION@
Libs: -L${libdir} ${additionallibs} -ltree-sitter-@PARSERNAME@
Cflags: -I${includedir}

View File

@ -35,7 +35,7 @@ pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
// Uncomment these to include any queries that this grammar contains // Uncomment these to include any queries that this grammar contains
// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm"); pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm"); // pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm"); // pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm"); // pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");

View File

@ -37,13 +37,14 @@ Nested tags
<span>a</span> <span>a</span>
b b
<b>c</b> <b>c</b>
Multi-line
text
</div> </div>
--- ---
(fragment (fragment
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(text)
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(text) (text)
@ -102,12 +103,10 @@ Custom tags
(fragment (fragment
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(text)
(element (element
(start_tag (tag_name) (attribute (attribute_name))) (start_tag (tag_name) (attribute (attribute_name)))
(text) (text)
(end_tag (tag_name))) (end_tag (tag_name)))
(text)
(end_tag (tag_name)))) (end_tag (tag_name))))
================================== ==================================
@ -123,11 +122,9 @@ Comments
(fragment (fragment
(comment) (comment)
(comment) (comment)
(text)
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(comment) (comment)
(text)
(end_tag (tag_name)))) (end_tag (tag_name))))
================================== ==================================
@ -155,17 +152,14 @@ Raw text elements
(start_tag (tag_name)) (start_tag (tag_name))
(raw_text) (raw_text)
(end_tag (tag_name))) (end_tag (tag_name)))
(text)
(style_element (style_element
(start_tag (tag_name)) (start_tag (tag_name))
(raw_text) (raw_text)
(end_tag (tag_name))) (end_tag (tag_name)))
(text)
(script_element (script_element
(start_tag (tag_name)) (start_tag (tag_name))
(raw_text) (raw_text)
(end_tag (tag_name))) (end_tag (tag_name))))
(text))
================================== ==================================
All-caps doctype All-caps doctype
@ -199,7 +193,6 @@ LI elements without close tags
(fragment (fragment
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(text)
(element (start_tag (tag_name)) (text)) (element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text)) (element (start_tag (tag_name)) (text))
(end_tag (tag_name)))) (end_tag (tag_name))))
@ -219,7 +212,6 @@ DT and DL elements without close tags
(fragment (fragment
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(text)
(element (start_tag (tag_name)) (text)) (element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text)) (element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text)) (element (start_tag (tag_name)) (text))
@ -240,7 +232,6 @@ P elements without close tags
(fragment (fragment
(element (start_tag (tag_name)) (text)) (element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text) (end_tag (tag_name))) (element (start_tag (tag_name)) (text) (end_tag (tag_name)))
(text)
(element (start_tag (tag_name)) (text)) (element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text)) (element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))) (element (start_tag (tag_name)) (text) (end_tag (tag_name))))
@ -278,10 +269,8 @@ COLGROUP elements without end tags
(fragment (fragment
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(text)
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(text)
(element (start_tag (element (start_tag
(tag_name) (tag_name)
(attribute (attribute_name) (quoted_attribute_value (attribute_value))))) (attribute (attribute_name) (quoted_attribute_value (attribute_value)))))
@ -290,15 +279,10 @@ COLGROUP elements without end tags
(attribute (attribute_name) (quoted_attribute_value (attribute_value)))))) (attribute (attribute_name) (quoted_attribute_value (attribute_value))))))
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(text)
(element (start_tag (tag_name)) (text) (end_tag (tag_name))) (element (start_tag (tag_name)) (text) (end_tag (tag_name)))
(text)
(element (start_tag (tag_name)) (text) (end_tag (tag_name))) (element (start_tag (tag_name)) (text) (end_tag (tag_name)))
(text)
(element (start_tag (tag_name)) (text) (end_tag (tag_name))) (element (start_tag (tag_name)) (text) (end_tag (tag_name)))
(text)
(end_tag (tag_name))) (end_tag (tag_name)))
(text)
(end_tag (tag_name)))) (end_tag (tag_name))))
========================================= =========================================
@ -317,15 +301,12 @@ TR, TD, and TH elements without end tags
(fragment (fragment
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(text)
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(text)
(element (start_tag (tag_name)) (text)) (element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text))) (element (start_tag (tag_name)) (text)))
(element (element
(start_tag (tag_name)) (start_tag (tag_name))
(text)
(element (start_tag (tag_name)) (text)) (element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text))) (element (start_tag (tag_name)) (text)))
(end_tag (tag_name)))) (end_tag (tag_name))))

View File

@ -120,6 +120,6 @@ module.exports = grammar({
seq('"', optional(alias(/[^"]+/, $.attribute_value)), '"') seq('"', optional(alias(/[^"]+/, $.attribute_value)), '"')
), ),
text: $ => /[^<>]+/ text: $ => /[^<>\s]([^<>]*[^<>\s])?/
} }
}); });

View File

@ -1,12 +1,16 @@
{ {
"name": "tree-sitter-html", "name": "tree-sitter-html",
"version": "0.16.0", "version": "0.20.0",
"description": "HTML grammar for tree-sitter", "description": "HTML grammar for tree-sitter",
"main": "bindings/node", "main": "bindings/node",
"keywords": [ "keywords": [
"parser", "parser",
"lexer" "lexer"
], ],
"repository": {
"type": "git",
"url": "https://github.com/tree-sitter/tree-sitter-html.git"
},
"authors": [ "authors": [
"Max Brunsfeld <maxbrunsfeld@gmail.com>", "Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Ashi Krishnan <queerviolet@github.com>" "Ashi Krishnan <queerviolet@github.com>"
@ -16,7 +20,7 @@
"nan": "^2.14.0" "nan": "^2.14.0"
}, },
"devDependencies": { "devDependencies": {
"tree-sitter-cli": "^0.19.1" "tree-sitter-cli": "^0.20.0"
}, },
"scripts": { "scripts": {
"test": "tree-sitter test && tree-sitter parse examples/*.html --quiet --time", "test": "tree-sitter test && tree-sitter parse examples/*.html --quiet --time",

View File

@ -434,7 +434,7 @@
}, },
"text": { "text": {
"type": "PATTERN", "type": "PATTERN",
"value": "[^<>]+" "value": "[^<>\\s]([^<>]*[^<>\\s])?"
} }
}, },
"extras": [ "extras": [

671
src/parser.c vendored

File diff suppressed because it is too large Load Diff

View File

@ -102,8 +102,8 @@ struct TSLanguage {
const uint16_t *small_parse_table; const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map; const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions; const TSParseActionEntry *parse_actions;
const char **symbol_names; const char * const *symbol_names;
const char **field_names; const char * const *field_names;
const TSFieldMapSlice *field_map_slices; const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries; const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata; const TSSymbolMetadata *symbol_metadata;
@ -123,6 +123,7 @@ struct TSLanguage {
unsigned (*serialize)(void *, char *); unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned); void (*deserialize)(void *, const char *, unsigned);
} external_scanner; } external_scanner;
const TSStateId *primary_state_ids;
}; };
/* /*