From 3e586b10fc00909035ace18aabe5ca2906914a82 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Tue, 7 Aug 2018 14:18:18 -0400 Subject: [PATCH 1/7] Fixed error where empty vector was being popped from Co-authored-by: Rahul Zhade --- src/scanner.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index 196153e..31feb38 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -202,8 +202,10 @@ struct Scanner { lexer->advance(lexer, false); if (lexer->lookahead == '>') { lexer->advance(lexer, false); - tags.pop_back(); - lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER; + if (!tags.empty()) { + tags.pop_back(); + lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER; + } return true; } return false; From 2eda6161d67d80a6ee9f2d2d018b893637928a2c Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Tue, 7 Aug 2018 12:24:25 -0400 Subject: [PATCH 2/7] Avoid integer truncation when serializing tag lengths A tag longer than 255 characters will be incorrectly serialized. Co-authored-by: Rahul Zhade --- src/scanner.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/scanner.cc b/src/scanner.cc index 31feb38..0077c98 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -34,6 +34,7 @@ struct Scanner { Tag &tag = tags[j]; if (tag.type == CUSTOM) { unsigned name_length = tag.custom_tag_name.size(); + if (name_length > UINT8_MAX) break; if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break; buffer[i++] = static_cast(tag.type); buffer[i++] = name_length; From e541c9b64b829761fae3389d1759a72b1d3ccc94 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Tue, 7 Aug 2018 12:29:06 -0400 Subject: [PATCH 3/7] Avoid integer cast issues when deserializing tags `buffer` contains signed chars, so if a tag length is greater than 128 then it is treated as a negative value when deserializing. The negative signed char is then implicitly cast to a large unsigned integer. Explicitly cast the values to signed chars Co-authored-by: Rahul Zhade --- src/scanner.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanner.cc b/src/scanner.cc index 0077c98..69e595b 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -60,7 +60,7 @@ struct Scanner { Tag &tag = tags[j]; tag.type = static_cast(buffer[i++]); if (tag.type == CUSTOM) { - unsigned name_length = buffer[i++]; + unsigned name_length = (unsigned char)buffer[i++]; tag.custom_tag_name.assign(&buffer[i], &buffer[i + name_length]); i += name_length; } From 5877d9a30de13ddafac596ba94373ed40efec60d Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Tue, 7 Aug 2018 12:44:20 -0400 Subject: [PATCH 4/7] Prevent out-of-bounds read when deserializing We first serialize the total number of tags, then serialize each individual tag. If we don't have enough space to serialize a particular tag we stop serializing any remaining tags. However, this causes an out-of-bounds read when deserializing because there are less tags than expected. Just bail when there are too many tags to serialize. --- src/scanner.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index 69e595b..dc1be41 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -34,14 +34,14 @@ struct Scanner { Tag &tag = tags[j]; if (tag.type == CUSTOM) { unsigned name_length = tag.custom_tag_name.size(); - if (name_length > UINT8_MAX) break; - if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break; + if (name_length > UINT8_MAX) return 0; + if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return 0; buffer[i++] = static_cast(tag.type); buffer[i++] = name_length; tag.custom_tag_name.copy(&buffer[i], name_length); i += name_length; } else { - if (i + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break; + if (i + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return 0; buffer[i++] = static_cast(tag.type); } } From 37b1bd4eef902785613e6aa9bf1a7b937b5126a3 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Tue, 7 Aug 2018 16:38:54 -0400 Subject: [PATCH 5/7] Serialize as many tags as possible --- src/scanner.cc | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index dc1be41..f4b83a5 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -26,25 +26,26 @@ struct Scanner { Scanner() {} unsigned serialize(char *buffer) { - unsigned i = 0; - unsigned n = tags.size(); - std::memcpy(buffer, &n, sizeof(n)); - i += sizeof(n); - for (unsigned j = 0; j < n; j++) { - Tag &tag = tags[j]; + unsigned tag_count = 0; + unsigned i = sizeof(tag_count); + + for (unsigned n = tags.size(); tag_count < n; tag_count++) { + Tag &tag = tags[tag_count]; if (tag.type == CUSTOM) { unsigned name_length = tag.custom_tag_name.size(); - if (name_length > UINT8_MAX) return 0; - if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return 0; + if (name_length > UINT8_MAX) break; + if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break; buffer[i++] = static_cast(tag.type); buffer[i++] = name_length; tag.custom_tag_name.copy(&buffer[i], name_length); i += name_length; } else { - if (i + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return 0; + if (i + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break; buffer[i++] = static_cast(tag.type); } } + + std::memcpy(buffer, &tag_count, sizeof(tag_count)); return i; } From 0f2d7e10839739dd753e187af77c26c517f757fa Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 29 Aug 2018 11:01:00 -0700 Subject: [PATCH 6/7] Always serialize the full depth of the tag stack --- src/scanner.cc | 33 +++++++++++++++++++++------------ src/tag.h | 8 +++++++- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index f4b83a5..99f1b71 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -26,14 +26,18 @@ struct Scanner { Scanner() {} unsigned serialize(char *buffer) { - unsigned tag_count = 0; - unsigned i = sizeof(tag_count); + uint16_t tag_count = tags.size() > UINT16_MAX ? UINT16_MAX : tags.size(); + uint16_t serialized_tag_count = 0; - for (unsigned n = tags.size(); tag_count < n; tag_count++) { - Tag &tag = tags[tag_count]; + unsigned i = sizeof(tag_count); + std::memcpy(&buffer[i], &tag_count, sizeof(tag_count)); + i += sizeof(tag_count); + + for (; serialized_tag_count < tag_count; serialized_tag_count++) { + Tag &tag = tags[serialized_tag_count]; if (tag.type == CUSTOM) { unsigned name_length = tag.custom_tag_name.size(); - if (name_length > UINT8_MAX) break; + if (name_length > UINT8_MAX) name_length = UINT8_MAX; if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break; buffer[i++] = static_cast(tag.type); buffer[i++] = name_length; @@ -45,7 +49,7 @@ struct Scanner { } } - std::memcpy(buffer, &tag_count, sizeof(tag_count)); + std::memcpy(&buffer[0], &serialized_tag_count, sizeof(serialized_tag_count)); return i; } @@ -53,15 +57,20 @@ struct Scanner { tags.clear(); if (length > 0) { unsigned i = 0; - unsigned n; - std::memcpy(&n, buffer, sizeof(n)); - i += sizeof(n); - tags.resize(n); - for (unsigned j = 0; j < n; j++) { + uint16_t tag_count, serialized_tag_count; + + std::memcpy(&serialized_tag_count, &buffer[i], sizeof(serialized_tag_count)); + i += sizeof(serialized_tag_count); + + std::memcpy(&tag_count, &buffer[i], sizeof(tag_count)); + i += sizeof(tag_count); + + tags.resize(tag_count); + for (unsigned j = 0; j < serialized_tag_count; j++) { Tag &tag = tags[j]; tag.type = static_cast(buffer[i++]); if (tag.type == CUSTOM) { - unsigned name_length = (unsigned char)buffer[i++]; + uint16_t name_length = (uint16_t)buffer[i++]; tag.custom_tag_name.assign(&buffer[i], &buffer[i + name_length]); i += name_length; } diff --git a/src/tag.h b/src/tag.h index da9420c..22d1e93 100644 --- a/src/tag.h +++ b/src/tag.h @@ -310,7 +310,13 @@ struct Tag { TagType type; string custom_tag_name; - Tag() : type(DIV) {} + // This default constructor is used in the case where there is not enough space + // in the serialization buffer to store all of the tags. In that case, tags + // that cannot be serialized will be treated as having an unknown type. These + // tags will be closed via implicit end tags regardless of the next closing + // tag is encountered. + Tag() : type(END_OF_VOID_TAGS) {} + Tag(TagType type, const string &name) : type(type), custom_tag_name(name) {} bool operator==(const Tag &other) const { From 461343bc178c0a83e9f1e93ecfbf0df196463c08 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 29 Aug 2018 11:33:22 -0700 Subject: [PATCH 7/7] Try using the default clang on travis --- .travis.yml | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1ed42ad..98f7a6b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,21 +2,10 @@ language: node_js sudo: false -node_js: - - "node" - -compiler: clang-3.6 +node_js: node env: - - CXX=clang-3.6 - -addons: - apt: - sources: - - llvm-toolchain-precise-3.6 - - ubuntu-toolchain-r-test - packages: - - clang-3.6 + - CXX=clang branches: only: