diff -Nru tree-sitter-0.20.1/Cargo.lock tree-sitter-0.20.3/Cargo.lock --- tree-sitter-0.20.1/Cargo.lock 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/Cargo.lock 2022-01-22 00:36:45.000000000 +0000 @@ -592,12 +592,6 @@ checksum = "75ce4f9dc4a41b4c3476cc925f1efb11b66df373a8fde5d4b8915fa91b5d995e" [[package]] -name = "spin" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13287b4da9d1207a4f4929ac390916d64eacfe236a487e9a9f5b3be392be5162" - -[[package]] name = "strsim" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -715,22 +709,22 @@ [[package]] name = "tree-sitter" -version = "0.20.1" +version = "0.20.3" dependencies = [ "cc", "lazy_static", "regex", - "spin", ] [[package]] name = "tree-sitter-cli" -version = "0.20.1" +version = "0.20.3" dependencies = [ "ansi_term 0.12.1", "anyhow", "atty", "clap", + "ctor", "difference", "dirs", "glob", @@ -744,7 +738,6 @@ "regex-syntax", "rustc-hash", "serde", - "serde_derive", "serde_json", "smallbitvec", "tempfile", @@ -781,7 +774,7 @@ [[package]] name = "tree-sitter-loader" -version = "0.19.0" +version = "0.20.0" dependencies = [ "anyhow", "cc", @@ -798,7 +791,7 @@ [[package]] name = "tree-sitter-tags" -version = "0.20.0" +version = "0.20.1" dependencies = [ "memchr", "regex", diff -Nru tree-sitter-0.20.1/cli/Cargo.toml tree-sitter-0.20.3/cli/Cargo.toml --- tree-sitter-0.20.1/cli/Cargo.toml 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/Cargo.toml 2022-01-22 00:36:45.000000000 +0000 @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.20.1" +version = "0.20.3" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" @@ -32,8 +32,7 @@ regex = "1" regex-syntax = "0.6.4" rustc-hash = "1" -serde = "1.0" -serde_derive = "1.0" +serde = { version = "1.0.130", features = ["derive"] } smallbitvec = "2.5.1" tiny_http = "0.8" walkdir = "2.3" @@ -44,11 +43,6 @@ version = "0.20" path = "../lib" -[dev-dependencies.tree-sitter] -version = "0.20" -path = "../lib" -features = ["allocation-tracking"] - [dependencies.tree-sitter-config] version = "0.19.0" path = "config" @@ -58,7 +52,7 @@ path = "../highlight" [dependencies.tree-sitter-loader] -version = "0.19.0" +version = "0.20" path = "loader" [dependencies.tree-sitter-tags] @@ -77,6 +71,7 @@ rand = "0.8" tempfile = "3" pretty_assertions = "0.7.2" +ctor = "0.1" [build-dependencies] toml = "0.5" diff -Nru tree-sitter-0.20.1/cli/config/Cargo.toml tree-sitter-0.20.3/cli/config/Cargo.toml --- tree-sitter-0.20.1/cli/config/Cargo.toml 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/config/Cargo.toml 2022-01-22 00:36:45.000000000 +0000 @@ -13,7 +13,7 @@ [dependencies] anyhow = "1.0" dirs = "3.0" -serde = "1.0" +serde = { version = "1.0.130", features = ["derive"] } [dependencies.serde_json] version = "1.0" diff -Nru tree-sitter-0.20.1/cli/loader/Cargo.toml tree-sitter-0.20.3/cli/loader/Cargo.toml --- tree-sitter-0.20.1/cli/loader/Cargo.toml 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/loader/Cargo.toml 2022-01-22 00:36:45.000000000 +0000 @@ -1,7 +1,7 @@ [package] name = "tree-sitter-loader" description = "Locates, builds, and loads tree-sitter grammars at runtime" -version = "0.19.0" +version = "0.20.0" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" diff -Nru tree-sitter-0.20.1/cli/npm/package.json tree-sitter-0.20.3/cli/npm/package.json --- tree-sitter-0.20.1/cli/npm/package.json 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/npm/package.json 2022-01-22 00:36:45.000000000 +0000 @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.20.1", + "version": "0.20.3", "author": "Max Brunsfeld", "license": "MIT", "repository": { diff -Nru tree-sitter-0.20.1/cli/src/generate/grammars.rs tree-sitter-0.20.3/cli/src/generate/grammars.rs --- tree-sitter-0.20.1/cli/src/generate/grammars.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/generate/grammars.rs 2022-01-22 00:36:45.000000000 +0000 @@ -26,7 +26,7 @@ Symbol(String), } -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, Default, PartialEq, Eq)] pub(crate) struct InputGrammar { pub name: String, pub variables: Vec, @@ -66,7 +66,7 @@ pub field_name: Option, } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, Default, PartialEq, Eq)] pub(crate) struct Production { pub steps: Vec, pub dynamic_precedence: i32, @@ -159,15 +159,6 @@ } } -impl Default for Production { - fn default() -> Self { - Production { - dynamic_precedence: 0, - steps: Vec::new(), - } - } -} - #[cfg(test)] impl Variable { pub fn named(name: &str, rule: Rule) -> Self { diff -Nru tree-sitter-0.20.1/cli/src/generate/mod.rs tree-sitter-0.20.3/cli/src/generate/mod.rs --- tree-sitter-0.20.1/cli/src/generate/mod.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/generate/mod.rs 2022-01-22 00:36:45.000000000 +0000 @@ -40,7 +40,7 @@ pub fn generate_parser_in_directory( repo_path: &PathBuf, grammar_path: Option<&str>, - next_abi: bool, + abi_version: usize, generate_bindings: bool, report_symbol_name: Option<&str>, ) -> Result<()> { @@ -80,14 +80,14 @@ lexical_grammar, inlines, simple_aliases, - next_abi, + abi_version, report_symbol_name, )?; write_file(&src_path.join("parser.c"), c_code)?; write_file(&src_path.join("node-types.json"), node_types_json)?; - if next_abi { + if abi_version == tree_sitter::LANGUAGE_VERSION { write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; } @@ -109,7 +109,7 @@ lexical_grammar, inlines, simple_aliases, - true, + tree_sitter::LANGUAGE_VERSION, None, )?; Ok((input_grammar.name, parser.c_code)) @@ -121,7 +121,7 @@ lexical_grammar: LexicalGrammar, inlines: InlinedProductionMap, simple_aliases: AliasMap, - next_abi: bool, + abi_version: usize, report_symbol_name: Option<&str>, ) -> Result { let variable_info = @@ -149,7 +149,7 @@ syntax_grammar, lexical_grammar, simple_aliases, - next_abi, + abi_version, ); Ok(GeneratedParser { c_code, diff -Nru tree-sitter-0.20.1/cli/src/generate/node_types.rs tree-sitter-0.20.3/cli/src/generate/node_types.rs --- tree-sitter-0.20.1/cli/src/generate/node_types.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/generate/node_types.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,7 +1,7 @@ use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; use super::rules::{Alias, AliasMap, Symbol, SymbolType}; use anyhow::{anyhow, Result}; -use serde_derive::Serialize; +use serde::Serialize; use std::cmp::Ordering; use std::collections::{BTreeMap, HashMap, HashSet}; @@ -725,14 +725,6 @@ #[test] fn test_node_types_simple() { let node_types = get_node_types(InputGrammar { - name: String::new(), - word_token: None, - extra_symbols: vec![], - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], - precedence_orderings: vec![], variables: vec![ Variable { name: "v1".to_string(), @@ -755,6 +747,7 @@ rule: Rule::string("y"), }, ], + ..Default::default() }); assert_eq!(node_types.len(), 3); @@ -821,14 +814,7 @@ #[test] fn test_node_types_simple_extras() { let node_types = get_node_types(InputGrammar { - name: String::new(), extra_symbols: vec![Rule::named("v3")], - word_token: None, - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], - precedence_orderings: vec![], variables: vec![ Variable { name: "v1".to_string(), @@ -852,6 +838,7 @@ rule: Rule::string("y"), }, ], + ..Default::default() }); assert_eq!(node_types.len(), 4); @@ -928,13 +915,6 @@ #[test] fn test_node_types_with_supertypes() { let node_types = get_node_types(InputGrammar { - name: String::new(), - word_token: None, - extra_symbols: vec![], - external_tokens: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], - precedence_orderings: vec![], supertype_symbols: vec!["_v2".to_string()], variables: vec![ Variable { @@ -962,6 +942,7 @@ rule: Rule::string("y"), }, ], + ..Default::default() }); assert_eq!( @@ -1016,14 +997,6 @@ #[test] fn test_node_types_for_children_without_fields() { let node_types = get_node_types(InputGrammar { - name: String::new(), - word_token: None, - extra_symbols: vec![], - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], - precedence_orderings: vec![], variables: vec![ Variable { name: "v1".to_string(), @@ -1054,6 +1027,7 @@ rule: Rule::string("y"), }, ], + ..Default::default() }); assert_eq!( @@ -1115,13 +1089,6 @@ #[test] fn test_node_types_with_inlined_rules() { let node_types = get_node_types(InputGrammar { - name: String::new(), - word_token: None, - extra_symbols: vec![], - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - precedence_orderings: vec![], variables_to_inline: vec!["v2".to_string()], variables: vec![ Variable { @@ -1141,6 +1108,7 @@ rule: Rule::string("b"), }, ], + ..Default::default() }); assert_eq!( @@ -1171,14 +1139,6 @@ #[test] fn test_node_types_for_aliased_nodes() { let node_types = get_node_types(InputGrammar { - name: String::new(), - word_token: None, - extra_symbols: vec![], - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], - precedence_orderings: vec![], variables: vec![ Variable { name: "thing".to_string(), @@ -1220,6 +1180,7 @@ rule: Rule::pattern("[\\w-]+"), }, ], + ..Default::default() }); assert_eq!(node_types.iter().find(|t| t.kind == "foo_identifier"), None); @@ -1248,14 +1209,6 @@ #[test] fn test_node_types_with_multiple_valued_fields() { let node_types = get_node_types(InputGrammar { - name: String::new(), - word_token: None, - extra_symbols: vec![], - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], - precedence_orderings: vec![], variables: vec![ Variable { name: "a".to_string(), @@ -1279,6 +1232,7 @@ rule: Rule::string("c"), }, ], + ..Default::default() }); assert_eq!( @@ -1317,14 +1271,6 @@ #[test] fn test_node_types_with_fields_on_hidden_tokens() { let node_types = get_node_types(InputGrammar { - name: String::new(), - word_token: None, - extra_symbols: vec![], - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], - precedence_orderings: vec![], variables: vec![Variable { name: "script".to_string(), kind: VariableType::Named, @@ -1333,6 +1279,7 @@ Rule::field("b".to_string(), Rule::pattern("bye")), ]), }], + ..Default::default() }); assert_eq!( @@ -1350,14 +1297,6 @@ #[test] fn test_node_types_with_multiple_rules_same_alias_name() { let node_types = get_node_types(InputGrammar { - name: String::new(), - word_token: None, - extra_symbols: vec![], - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], - precedence_orderings: vec![], variables: vec![ Variable { name: "script".to_string(), @@ -1386,6 +1325,7 @@ ]), }, ], + ..Default::default() }); assert_eq!( @@ -1477,14 +1417,6 @@ #[test] fn test_node_types_with_tokens_aliased_to_match_rules() { let node_types = get_node_types(InputGrammar { - name: String::new(), - word_token: None, - extra_symbols: vec![], - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], - precedence_orderings: vec![], variables: vec![ Variable { name: "a".to_string(), @@ -1508,6 +1440,7 @@ ]), }, ], + ..Default::default() }); assert_eq!( diff -Nru tree-sitter-0.20.1/cli/src/generate/parse_grammar.rs tree-sitter-0.20.3/cli/src/generate/parse_grammar.rs --- tree-sitter-0.20.1/cli/src/generate/parse_grammar.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/generate/parse_grammar.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,7 +1,7 @@ use super::grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType}; use super::rules::{Precedence, Rule}; use anyhow::{anyhow, Result}; -use serde_derive::Deserialize; +use serde::Deserialize; use serde_json::{Map, Value}; #[derive(Deserialize)] diff -Nru tree-sitter-0.20.1/cli/src/generate/prepare_grammar/expand_repeats.rs tree-sitter-0.20.3/cli/src/generate/prepare_grammar/expand_repeats.rs --- tree-sitter-0.20.1/cli/src/generate/prepare_grammar/expand_repeats.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/generate/prepare_grammar/expand_repeats.rs 2022-01-22 00:36:45.000000000 +0000 @@ -283,13 +283,7 @@ fn build_grammar(variables: Vec) -> ExtractedSyntaxGrammar { ExtractedSyntaxGrammar { variables, - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - supertype_symbols: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), - precedence_orderings: Vec::new(), - word_token: None, + ..Default::default() } } } diff -Nru tree-sitter-0.20.1/cli/src/generate/prepare_grammar/extract_default_aliases.rs tree-sitter-0.20.3/cli/src/generate/prepare_grammar/extract_default_aliases.rs --- tree-sitter-0.20.1/cli/src/generate/prepare_grammar/extract_default_aliases.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/generate/prepare_grammar/extract_default_aliases.rs 2022-01-22 00:36:45.000000000 +0000 @@ -197,13 +197,7 @@ }], }, ], - extra_symbols: vec![], - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], - precedence_orderings: vec![], - word_token: None, + ..Default::default() }; let lexical_grammar = LexicalGrammar { diff -Nru tree-sitter-0.20.1/cli/src/generate/prepare_grammar/extract_tokens.rs tree-sitter-0.20.3/cli/src/generate/prepare_grammar/extract_tokens.rs --- tree-sitter-0.20.1/cli/src/generate/prepare_grammar/extract_tokens.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/generate/prepare_grammar/extract_tokens.rs 2022-01-22 00:36:45.000000000 +0000 @@ -493,13 +493,7 @@ fn build_grammar(variables: Vec) -> InternedGrammar { InternedGrammar { variables, - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - supertype_symbols: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), - precedence_orderings: Vec::new(), - word_token: None, + ..Default::default() } } } diff -Nru tree-sitter-0.20.1/cli/src/generate/prepare_grammar/intern_symbols.rs tree-sitter-0.20.3/cli/src/generate/prepare_grammar/intern_symbols.rs --- tree-sitter-0.20.1/cli/src/generate/prepare_grammar/intern_symbols.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/generate/prepare_grammar/intern_symbols.rs 2022-01-22 00:36:45.000000000 +0000 @@ -243,13 +243,7 @@ InputGrammar { variables, name: "the_language".to_string(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - supertype_symbols: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), - precedence_orderings: Vec::new(), - word_token: None, + ..Default::default() } } } diff -Nru tree-sitter-0.20.1/cli/src/generate/prepare_grammar/mod.rs tree-sitter-0.20.3/cli/src/generate/prepare_grammar/mod.rs --- tree-sitter-0.20.1/cli/src/generate/prepare_grammar/mod.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/generate/prepare_grammar/mod.rs 2022-01-22 00:36:45.000000000 +0000 @@ -47,6 +47,21 @@ pub separators: Vec, } +impl Default for IntermediateGrammar { + fn default() -> Self { + Self { + variables: Default::default(), + extra_symbols: Default::default(), + expected_conflicts: Default::default(), + precedence_orderings: Default::default(), + external_tokens: Default::default(), + variables_to_inline: Default::default(), + supertype_symbols: Default::default(), + word_token: Default::default(), + } + } +} + /// Transform an input grammar into separate components that are ready /// for parse table construction. pub(crate) fn prepare_grammar( @@ -158,13 +173,6 @@ #[test] fn test_validate_precedences_with_undeclared_precedence() { let grammar = InputGrammar { - name: String::new(), - word_token: None, - extra_symbols: vec![], - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], precedence_orderings: vec![ vec![ PrecedenceEntry::Name("a".to_string()), @@ -194,6 +202,7 @@ ])), }, ], + ..Default::default() }; let result = validate_precedences(&grammar); @@ -206,13 +215,6 @@ #[test] fn test_validate_precedences_with_conflicting_order() { let grammar = InputGrammar { - name: String::new(), - word_token: None, - extra_symbols: vec![], - external_tokens: vec![], - supertype_symbols: vec![], - expected_conflicts: vec![], - variables_to_inline: vec![], precedence_orderings: vec![ vec![ PrecedenceEntry::Name("a".to_string()), @@ -242,6 +244,7 @@ ])), }, ], + ..Default::default() }; let result = validate_precedences(&grammar); diff -Nru tree-sitter-0.20.1/cli/src/generate/render.rs tree-sitter-0.20.3/cli/src/generate/render.rs --- tree-sitter-0.20.1/cli/src/generate/render.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/generate/render.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,18 +1,25 @@ -use super::char_tree::{CharacterTree, Comparator}; -use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType}; -use super::rules::{Alias, AliasMap, Symbol, SymbolType}; -use super::tables::{ - AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable, - ParseTableEntry, +use super::{ + char_tree::{CharacterTree, Comparator}, + grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType}, + rules::{Alias, AliasMap, Symbol, SymbolType}, + tables::{ + AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable, + ParseTableEntry, + }, }; use core::ops::Range; -use std::cmp; -use std::collections::{HashMap, HashSet}; -use std::fmt::Write; -use std::mem::swap; +use std::{ + cmp, + collections::{HashMap, HashSet}, + fmt::Write, + mem::swap, +}; const LARGE_CHARACTER_RANGE_COUNT: usize = 8; const SMALL_STATE_THRESHOLD: usize = 64; +const ABI_VERSION_MIN: usize = 13; +const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION; +const ABI_VERSION_WITH_PRIMARY_STATES: usize = 14; macro_rules! add { ($this: tt, $($arg: tt)*) => {{ @@ -69,7 +76,7 @@ field_names: Vec, #[allow(unused)] - next_abi: bool, + abi_version: usize, } struct TransitionSummary { @@ -106,6 +113,7 @@ } self.add_non_terminal_alias_map(); + self.add_primary_state_id_list(); let mut main_lex_table = LexTable::default(); swap(&mut main_lex_table, &mut self.main_lex_table); @@ -290,16 +298,7 @@ }) .count(); - add_line!( - self, - "#define LANGUAGE_VERSION {}", - if self.next_abi { - tree_sitter::LANGUAGE_VERSION - } else { - tree_sitter::LANGUAGE_VERSION - 1 - } - ); - + add_line!(self, "#define LANGUAGE_VERSION {}", self.abi_version); add_line!( self, "#define STATE_COUNT {}", @@ -565,6 +564,29 @@ add_line!(self, ""); } + /// Produces a list of the "primary state" for every state in the grammar. + /// + /// The "primary state" for a given state is the first encountered state that behaves + /// identically with respect to query analysis. We derive this by keeping track of the `core_id` + /// for each state and treating the first state with a given `core_id` as primary. + fn add_primary_state_id_list(&mut self) { + add_line!( + self, + "static const TSStateId ts_primary_state_ids[STATE_COUNT] = {{" + ); + indent!(self); + let mut first_state_for_each_core_id = HashMap::new(); + for (idx, state) in self.parse_table.states.iter().enumerate() { + let primary_state = first_state_for_each_core_id + .entry(state.core_id) + .or_insert(idx); + add_line!(self, "[{}] = {},", idx, primary_state); + } + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); + } + fn add_field_sequences(&mut self) { let mut flat_field_maps = vec![]; let mut next_flat_field_map_index = 0; @@ -1339,9 +1361,7 @@ add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,"); add_line!(self, ".state_count = STATE_COUNT,"); add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); - if self.next_abi { - add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,"); - } + add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,"); add_line!(self, ".field_count = FIELD_COUNT,"); add_line!( self, @@ -1396,6 +1416,10 @@ add_line!(self, "}},"); } + if self.abi_version >= ABI_VERSION_WITH_PRIMARY_STATES { + add_line!(self, ".primary_state_ids = ts_primary_state_ids,"); + } + dedent!(self); add_line!(self, "}};"); add_line!(self, "return &language;"); @@ -1602,8 +1626,9 @@ /// * `default_aliases` - A map describing the global rename rules that should apply. /// the keys are symbols that are *always* aliased in the same way, and the values /// are the aliases that are applied to those symbols. -/// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse -/// table format. This is mainly used for testing, when developing Tree-sitter itself. +/// * `abi_version` - The language ABI version that should be generated. Usually +/// you want Tree-sitter's current version, but right after making an ABI +/// change, it may be useful to generate code with the previous ABI. pub(crate) fn render_c_code( name: &str, parse_table: ParseTable, @@ -1613,8 +1638,15 @@ syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, default_aliases: AliasMap, - next_abi: bool, + abi_version: usize, ) -> String { + if !(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version) { + panic!( + "This version of Tree-sitter can only generate parsers with ABI version {} - {}, not {}", + ABI_VERSION_MIN, ABI_VERSION_MAX, abi_version + ); + } + Generator { buffer: String::new(), indent_level: 0, @@ -1633,7 +1665,7 @@ symbol_map: HashMap::new(), unique_aliases: Vec::new(), field_names: Vec::new(), - next_abi, + abi_version, } .generate() } diff -Nru tree-sitter-0.20.1/cli/src/highlight.rs tree-sitter-0.20.3/cli/src/highlight.rs --- tree-sitter-0.20.1/cli/src/highlight.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/highlight.rs 2022-01-22 00:36:45.000000000 +0000 @@ -157,6 +157,7 @@ "function": 26, "keyword": 56, "number": {"color": 94, "bold": true}, + "module": 136, "property": 124, "operator": {"color": 239, "bold": true}, "punctuation.bracket": 239, diff -Nru tree-sitter-0.20.1/cli/src/main.rs tree-sitter-0.20.3/cli/src/main.rs --- tree-sitter-0.20.1/cli/src/main.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/main.rs 2022-01-22 00:36:45.000000000 +0000 @@ -11,6 +11,7 @@ const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION"); const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA"); +const DEFAULT_GENERATE_ABI_VERSION: usize = 13; fn main() { let result = run(); @@ -90,7 +91,19 @@ .about("Generate a parser") .arg(Arg::with_name("grammar-path").index(1)) .arg(Arg::with_name("log").long("log")) - .arg(Arg::with_name("prev-abi").long("prev-abi")) + .arg( + Arg::with_name("abi-version") + .long("abi") + .value_name("version") + .help(&format!( + concat!( + "Select the language ABI version to generate (default {}).\n", + "Use --abi=latest to generate the newest supported version ({}).", + ), + DEFAULT_GENERATE_ABI_VERSION, + tree_sitter::LANGUAGE_VERSION, + )), + ) .arg(Arg::with_name("no-bindings").long("no-bindings")) .arg( Arg::with_name("report-states-for-rule") @@ -266,12 +279,21 @@ if matches.is_present("log") { logger::init(); } - let new_abi = !matches.is_present("prev-abi"); + let abi_version = + matches + .value_of("abi-version") + .map_or(DEFAULT_GENERATE_ABI_VERSION, |version| { + if version == "latest" { + tree_sitter::LANGUAGE_VERSION + } else { + version.parse().expect("invalid abi version flag") + } + }); let generate_bindings = !matches.is_present("no-bindings"); generate::generate_parser_in_directory( ¤t_dir, grammar_path, - new_abi, + abi_version, generate_bindings, report_symbol_name, )?; diff -Nru tree-sitter-0.20.1/cli/src/playground.html tree-sitter-0.20.3/cli/src/playground.html --- tree-sitter-0.20.1/cli/src/playground.html 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/playground.html 2022-01-22 00:36:45.000000000 +0000 @@ -166,6 +166,7 @@ .query-error { text-decoration: underline red dashed; + -webkit-text-decoration: underline red dashed; } diff -Nru tree-sitter-0.20.1/cli/src/playground.rs tree-sitter-0.20.3/cli/src/playground.rs --- tree-sitter-0.20.1/cli/src/playground.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/playground.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,39 +1,48 @@ use super::wasm; use anyhow::Context; -use std::env; -use std::fs; -use std::net::TcpListener; -use std::path::{Path, PathBuf}; -use std::str::FromStr; +use std::{ + borrow::Cow, + env, fs, + net::TcpListener, + path::{Path, PathBuf}, + str::{self, FromStr as _}, +}; use tiny_http::{Header, Response, Server}; use webbrowser; -macro_rules! resource { +macro_rules! optional_resource { ($name: tt, $path: tt) => { #[cfg(TREE_SITTER_EMBED_WASM_BINDING)] - fn $name(tree_sitter_dir: &Option) -> Vec { + fn $name(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { - fs::read(tree_sitter_dir.join($path)).unwrap() + Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) } else { - include_bytes!(concat!("../../", $path)).to_vec() + Cow::Borrowed(include_bytes!(concat!("../../", $path))) } } #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))] - fn $name(tree_sitter_dir: &Option) -> Vec { + fn $name(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { - fs::read(tree_sitter_dir.join($path)).unwrap() + Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) } else { - Vec::new() + Cow::Borrowed(&[]) } } }; } -resource!(get_main_html, "cli/src/playground.html"); -resource!(get_playground_js, "docs/assets/js/playground.js"); -resource!(get_lib_js, "lib/binding_web/tree-sitter.js"); -resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm"); +optional_resource!(get_playground_js, "docs/assets/js/playground.js"); +optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js"); +optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm"); + +fn get_main_html(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { + if let Some(tree_sitter_dir) = tree_sitter_dir { + Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap()) + } else { + Cow::Borrowed(include_bytes!("playground.html")) + } +} pub fn serve(grammar_path: &Path, open_in_browser: bool) { let port = get_available_port().expect("Couldn't find an available port"); @@ -60,7 +69,7 @@ } let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok(); - let main_html = String::from_utf8(get_main_html(&tree_sitter_dir)) + let main_html = str::from_utf8(&get_main_html(&tree_sitter_dir)) .unwrap() .replace("THE_LANGUAGE_NAME", &grammar_name) .into_bytes(); diff -Nru tree-sitter-0.20.1/cli/src/test.rs tree-sitter-0.20.3/cli/src/test.rs --- tree-sitter-0.20.1/cli/src/test.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/test.rs 2022-01-22 00:36:45.000000000 +0000 @@ -16,7 +16,7 @@ lazy_static! { static ref HEADER_REGEX: ByteRegex = - ByteRegexBuilder::new(r"^===+(?P[^=\r\n][^\r\n]*)?\r?\n(?P[^=\r\n][^\r\n]*)\r?\n===+(?P[^=\r\n][^\r\n]*)?\r?\n") + ByteRegexBuilder::new(r"^===+(?P[^=\r\n][^\r\n]*)?\r?\n(?P([^=\r\n][^\r\n]*\r?\n)+)===+(?P[^=\r\n][^\r\n]*)?\r?\n") .multi_line(true) .build() .unwrap(); @@ -296,11 +296,13 @@ let mut c_iter = s.chars(); c_iter.next(); - let second_char = c_iter.next().unwrap(); - if second_char == 'M' || second_char == 'U' { - // "(MISSING node_name" or "(UNEXPECTED 'x'" - let s = s_iter.next().unwrap(); - write!(formatted, " {}", s).unwrap(); + match c_iter.next() { + Some('M') | Some('U') => { + // "(MISSING node_name" or "(UNEXPECTED 'x'" + let s = s_iter.next().unwrap(); + write!(formatted, " {}", s).unwrap(); + } + Some(_) | None => {} } } else if s.ends_with(':') { // "field:" @@ -402,7 +404,7 @@ let header_range = c.get(0).unwrap().range(); let test_name = c .name("test_name") - .map(|c| String::from_utf8_lossy(c.as_bytes()).to_string()); + .map(|c| String::from_utf8_lossy(c.as_bytes()).trim_end().to_string()); Some((header_range, test_name)) } else { None @@ -594,6 +596,7 @@ .trim() .to_string() ); + assert_eq!(format_sexp(&"()".to_string()), "()".to_string()); } #[test] @@ -788,4 +791,52 @@ } ); } + + #[test] + fn test_parse_test_content_with_newlines_in_test_names() { + let entry = parse_test_content( + "the-filename".to_string(), + r#" +=============== +name +with +newlines +=============== +a +--- +(b) + +==================== +name with === signs +==================== +code with ---- +--- +(d) +"# + .to_string(), + None, + ); + + assert_eq!( + entry, + TestEntry::Group { + name: "the-filename".to_string(), + file_path: None, + children: vec![ + TestEntry::Example { + name: "name\nwith\nnewlines".to_string(), + input: b"a".to_vec(), + output: "(b)".to_string(), + has_fields: false, + }, + TestEntry::Example { + name: "name with === signs".to_string(), + input: b"code with ----".to_vec(), + output: "(d)".to_string(), + has_fields: false, + } + ] + } + ); + } } diff -Nru tree-sitter-0.20.1/cli/src/tests/corpus_test.rs tree-sitter-0.20.3/cli/src/tests/corpus_test.rs --- tree-sitter-0.20.1/cli/src/tests/corpus_test.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/tests/corpus_test.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,4 +1,5 @@ use super::helpers::{ + allocations, edits::{get_random_edit, invert_edit}, fixtures::{fixtures_dir, get_language, get_test_language}, random::Rand, @@ -11,8 +12,8 @@ test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry}, util, }; -use std::{fs, usize}; -use tree_sitter::{allocations, LogType, Node, Parser, Tree}; +use std::fs; +use tree_sitter::{LogType, Node, Parser, Tree}; const EDIT_COUNT: usize = 3; const TRIAL_COUNT: usize = 10; diff -Nru tree-sitter-0.20.1/cli/src/tests/helpers/allocations.rs tree-sitter-0.20.3/cli/src/tests/helpers/allocations.rs --- tree-sitter-0.20.1/cli/src/tests/helpers/allocations.rs 1970-01-01 00:00:00.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/tests/helpers/allocations.rs 2022-01-22 00:36:45.000000000 +0000 @@ -0,0 +1,119 @@ +use std::{ + collections::HashMap, + os::raw::c_void, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering::SeqCst}, + Mutex, + }, +}; + +#[ctor::ctor] +unsafe fn initialize_allocation_recording() { + tree_sitter::set_allocator( + Some(ts_record_malloc), + Some(ts_record_calloc), + Some(ts_record_realloc), + Some(ts_record_free), + ); +} + +#[derive(Debug, PartialEq, Eq, Hash)] +struct Allocation(*const c_void); +unsafe impl Send for Allocation {} +unsafe impl Sync for Allocation {} + +#[derive(Default)] +struct AllocationRecorder { + enabled: AtomicBool, + allocation_count: AtomicU64, + outstanding_allocations: Mutex>, +} + +thread_local! { + static RECORDER: AllocationRecorder = Default::default(); +} + +extern "C" { + fn malloc(size: usize) -> *mut c_void; + fn calloc(count: usize, size: usize) -> *mut c_void; + fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void; + fn free(ptr: *mut c_void); +} + +pub fn record(f: impl FnOnce() -> T) -> T { + RECORDER.with(|recorder| { + recorder.enabled.store(true, SeqCst); + recorder.allocation_count.store(0, SeqCst); + recorder.outstanding_allocations.lock().unwrap().clear(); + }); + + let value = f(); + + let outstanding_allocation_indices = RECORDER.with(|recorder| { + recorder.enabled.store(false, SeqCst); + recorder.allocation_count.store(0, SeqCst); + recorder + .outstanding_allocations + .lock() + .unwrap() + .drain() + .map(|e| e.1) + .collect::>() + }); + if !outstanding_allocation_indices.is_empty() { + panic!( + "Leaked allocation indices: {:?}", + outstanding_allocation_indices + ); + } + value +} + +fn record_alloc(ptr: *mut c_void) { + RECORDER.with(|recorder| { + if recorder.enabled.load(SeqCst) { + let count = recorder.allocation_count.fetch_add(1, SeqCst); + recorder + .outstanding_allocations + .lock() + .unwrap() + .insert(Allocation(ptr), count); + } + }); +} + +fn record_dealloc(ptr: *mut c_void) { + RECORDER.with(|recorder| { + if recorder.enabled.load(SeqCst) { + recorder + .outstanding_allocations + .lock() + .unwrap() + .remove(&Allocation(ptr)); + } + }); +} + +unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void { + let result = malloc(size); + record_alloc(result); + result +} + +unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void { + let result = calloc(count, size); + record_alloc(result); + result +} + +unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void { + record_dealloc(ptr); + let result = realloc(ptr, size); + record_alloc(result); + result +} + +unsafe extern "C" fn ts_record_free(ptr: *mut c_void) { + record_dealloc(ptr); + free(ptr); +} diff -Nru tree-sitter-0.20.1/cli/src/tests/helpers/mod.rs tree-sitter-0.20.3/cli/src/tests/helpers/mod.rs --- tree-sitter-0.20.1/cli/src/tests/helpers/mod.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/tests/helpers/mod.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,3 +1,4 @@ +pub(super) mod allocations; pub(super) mod edits; pub(super) mod fixtures; pub(super) mod query_helpers; diff -Nru tree-sitter-0.20.1/cli/src/tests/parser_test.rs tree-sitter-0.20.3/cli/src/tests/parser_test.rs --- tree-sitter-0.20.1/cli/src/tests/parser_test.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/tests/parser_test.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,10 +1,13 @@ -use super::helpers::edits::ReadRecorder; -use super::helpers::fixtures::{get_language, get_test_grammar, get_test_language}; +use super::helpers::{ + allocations, + edits::ReadRecorder, + fixtures::{get_language, get_test_grammar, get_test_language}, +}; use crate::generate::generate_parser_for_grammar; use crate::parse::{perform_edit, Edit}; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{thread, time}; -use tree_sitter::{allocations, IncludedRangesError, InputEdit, LogType, Parser, Point, Range}; +use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range}; #[test] fn test_parsing_simple_string() { diff -Nru tree-sitter-0.20.1/cli/src/tests/pathological_test.rs tree-sitter-0.20.3/cli/src/tests/pathological_test.rs --- tree-sitter-0.20.1/cli/src/tests/pathological_test.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/tests/pathological_test.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,5 +1,5 @@ -use super::helpers::fixtures::get_language; -use tree_sitter::{allocations, Parser}; +use super::helpers::{allocations, fixtures::get_language}; +use tree_sitter::Parser; #[test] fn test_pathological_example_1() { diff -Nru tree-sitter-0.20.1/cli/src/tests/query_test.rs tree-sitter-0.20.3/cli/src/tests/query_test.rs --- tree-sitter-0.20.1/cli/src/tests/query_test.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/tests/query_test.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,4 +1,5 @@ use super::helpers::{ + allocations, fixtures::get_language, query_helpers::{Match, Pattern}, }; @@ -6,7 +7,7 @@ use rand::{prelude::StdRng, SeedableRng}; use std::{env, fmt::Write}; use tree_sitter::{ - allocations, Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryError, + CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryError, QueryErrorKind, QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty, }; @@ -3091,6 +3092,53 @@ } #[test] +fn test_query_captures_with_matches_removed_before_they_finish() { + allocations::record(|| { + let language = get_language("javascript"); + // When Tree-sitter detects that a pattern is guaranteed to match, + // it will start to eagerly return the captures that it has found, + // even though it hasn't matched the entire pattern yet. A + // namespace_import node always has "*", "as" and then an identifier + // for children, so captures will be emitted eagerly for this pattern. + let query = Query::new( + language, + r#" + (namespace_import + "*" @star + "as" @as + (identifier) @identifier) + "#, + ) + .unwrap(); + + let source = " + import * as name from 'module-name'; + "; + + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(&source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + let mut captured_strings = Vec::new(); + for (m, i) in cursor.captures(&query, tree.root_node(), source.as_bytes()) { + let capture = m.captures[i]; + let text = capture.node.utf8_text(source.as_bytes()).unwrap(); + if text == "as" { + m.remove(); + continue; + } + captured_strings.push(text); + } + + // .remove() removes the match before it is finished. The identifier + // "name" is part of this match, so we expect that removing the "as" + // capture from the match should prevent "name" from matching: + assert_eq!(captured_strings, &["*",]); + }); +} + +#[test] fn test_query_captures_and_matches_iterators_are_fused() { allocations::record(|| { let language = get_language("javascript"); @@ -3814,6 +3862,243 @@ ) } } + }); +} + +#[test] +fn test_capture_quantifiers() { + struct Row { + description: &'static str, + language: Language, + pattern: &'static str, + capture_quantifiers: &'static [(usize, &'static str, CaptureQuantifier)], + } + + let rows = &[ + // Simple quantifiers + Row { + description: "Top level capture", + language: get_language("python"), + pattern: r#" + (module) @mod + "#, + capture_quantifiers: &[(0, "mod", CaptureQuantifier::One)], + }, + Row { + description: "Nested list capture capture", + language: get_language("javascript"), + pattern: r#" + (array (_)* @elems) @array + "#, + capture_quantifiers: &[ + (0, "array", CaptureQuantifier::One), + (0, "elems", CaptureQuantifier::ZeroOrMore), + ], + }, + Row { + description: "Nested non-empty list capture capture", + language: get_language("javascript"), + pattern: r#" + (array (_)+ @elems) @array + "#, + capture_quantifiers: &[ + (0, "array", CaptureQuantifier::One), + (0, "elems", CaptureQuantifier::OneOrMore), + ], + }, + // Nested quantifiers + Row { + description: "capture nested in optional pattern", + language: get_language("javascript"), + pattern: r#" + (array (call_expression (arguments (_) @arg))? @call) @array + "#, + capture_quantifiers: &[ + (0, "array", CaptureQuantifier::One), + (0, "call", CaptureQuantifier::ZeroOrOne), + (0, "arg", CaptureQuantifier::ZeroOrOne), + ], + }, + Row { + description: "optional capture nested in non-empty list pattern", + language: get_language("javascript"), + pattern: r#" + (array (call_expression (arguments (_)? @arg))+ @call) @array + "#, + capture_quantifiers: &[ + (0, "array", CaptureQuantifier::One), + (0, "call", CaptureQuantifier::OneOrMore), + (0, "arg", CaptureQuantifier::ZeroOrMore), + ], + }, + Row { + description: "non-empty list capture nested in optional pattern", + language: get_language("javascript"), + pattern: r#" + (array (call_expression (arguments (_)+ @args))? @call) @array + "#, + capture_quantifiers: &[ + (0, "array", CaptureQuantifier::One), + (0, "call", CaptureQuantifier::ZeroOrOne), + (0, "args", CaptureQuantifier::ZeroOrMore), + ], + }, + // Quantifiers in alternations + Row { + description: "capture is the same in all alternatives", + language: get_language("javascript"), + pattern: r#"[ + (function_declaration name:(identifier) @name) + (call_expression function:(identifier) @name) + ]"#, + capture_quantifiers: &[(0, "name", CaptureQuantifier::One)], + }, + Row { + description: "capture appears in some alternatives", + language: get_language("javascript"), + pattern: r#"[ + (function_declaration name:(identifier) @name) + (function) + ] @fun"#, + capture_quantifiers: &[ + (0, "fun", CaptureQuantifier::One), + (0, "name", CaptureQuantifier::ZeroOrOne), + ], + }, + Row { + description: "capture has different quantifiers in alternatives", + language: get_language("javascript"), + pattern: r#"[ + (call_expression arguments:(arguments (_)+ @args)) + (new_expression arguments:(arguments (_)? @args)) + ] @call"#, + capture_quantifiers: &[ + (0, "call", CaptureQuantifier::One), + (0, "args", CaptureQuantifier::ZeroOrMore), + ], + }, + // Quantifiers in siblings + Row { + description: "siblings have different captures with different quantifiers", + language: get_language("javascript"), + pattern: r#" + (call_expression (arguments (identifier)? @self (_)* @args)) @call + "#, + capture_quantifiers: &[ + (0, "call", CaptureQuantifier::One), + (0, "self", CaptureQuantifier::ZeroOrOne), + (0, "args", CaptureQuantifier::ZeroOrMore), + ], + }, + Row { + description: "siblings have same capture with different quantifiers", + language: get_language("javascript"), + pattern: r#" + (call_expression (arguments (identifier) @args (_)* @args)) @call + "#, + capture_quantifiers: &[ + (0, "call", CaptureQuantifier::One), + (0, "args", CaptureQuantifier::OneOrMore), + ], + }, + // Combined scenarios + Row { + description: "combined nesting, alternatives, and siblings", + language: get_language("javascript"), + pattern: r#" + (array + (call_expression + (arguments [ + (identifier) @self + (_)+ @args + ]) + )+ @call + ) @array + "#, + capture_quantifiers: &[ + (0, "array", CaptureQuantifier::One), + (0, "call", CaptureQuantifier::OneOrMore), + (0, "self", CaptureQuantifier::ZeroOrMore), + (0, "args", CaptureQuantifier::ZeroOrMore), + ], + }, + // Multiple patterns + Row { + description: "multiple patterns", + language: get_language("javascript"), + pattern: r#" + (function_declaration name: (identifier) @x) + (statement_identifier) @y + (property_identifier)+ @z + (array (identifier)* @x) + "#, + capture_quantifiers: &[ + // x + (0, "x", CaptureQuantifier::One), + (1, "x", CaptureQuantifier::Zero), + (2, "x", CaptureQuantifier::Zero), + (3, "x", CaptureQuantifier::ZeroOrMore), + // y + (0, "y", CaptureQuantifier::Zero), + (1, "y", CaptureQuantifier::One), + (2, "y", CaptureQuantifier::Zero), + (3, "y", CaptureQuantifier::Zero), + // z + (0, "z", CaptureQuantifier::Zero), + (1, "z", CaptureQuantifier::Zero), + (2, "z", CaptureQuantifier::OneOrMore), + (3, "z", CaptureQuantifier::Zero), + ], + }, + Row { + description: "multiple alternatives", + language: get_language("javascript"), + pattern: r#" + [ + (array (identifier) @x) + (function_declaration name: (identifier)+ @x) + ] + [ + (array (identifier) @x) + (function_declaration name: (identifier)+ @x) + ] + "#, + capture_quantifiers: &[ + (0, "x", CaptureQuantifier::OneOrMore), + (1, "x", CaptureQuantifier::OneOrMore), + ], + }, + ]; + + allocations::record(|| { + eprintln!(""); + + for row in rows.iter() { + if let Some(filter) = EXAMPLE_FILTER.as_ref() { + if !row.description.contains(filter.as_str()) { + continue; + } + } + eprintln!(" query example: {:?}", row.description); + let query = Query::new(row.language, row.pattern).unwrap(); + for (pattern, capture, expected_quantifier) in row.capture_quantifiers { + let index = query.capture_index_for_name(capture).unwrap(); + let actual_quantifier = query.capture_quantifiers(*pattern)[index as usize]; + assert_eq!( + actual_quantifier, + *expected_quantifier, + "Description: {}, Pattern: {:?}, expected quantifier of @{} to be {:?} instead of {:?}", + row.description, + row.pattern + .split_ascii_whitespace() + .collect::>() + .join(" "), + capture, + *expected_quantifier, + actual_quantifier, + ) + } + } }); } diff -Nru tree-sitter-0.20.1/cli/src/tests/tags_test.rs tree-sitter-0.20.3/cli/src/tests/tags_test.rs --- tree-sitter-0.20.1/cli/src/tests/tags_test.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/cli/src/tests/tags_test.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,10 +1,13 @@ -use super::helpers::fixtures::{get_language, get_language_queries_path}; -use std::ffi::CStr; -use std::ffi::CString; -use std::{fs, ptr, slice, str}; -use tree_sitter::{allocations, Point}; -use tree_sitter_tags::c_lib as c; -use tree_sitter_tags::{Error, TagsConfiguration, TagsContext}; +use super::helpers::{ + allocations, + fixtures::{get_language, get_language_queries_path}, +}; +use std::{ + ffi::{CStr, CString}, + fs, ptr, slice, str, +}; +use tree_sitter::Point; +use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext}; const PYTHON_TAG_QUERY: &'static str = r#" ( diff -Nru tree-sitter-0.20.1/debian/changelog tree-sitter-0.20.3/debian/changelog --- tree-sitter-0.20.1/debian/changelog 2021-12-20 19:59:34.000000000 +0000 +++ tree-sitter-0.20.3/debian/changelog 2022-01-29 18:05:52.000000000 +0000 @@ -1,3 +1,10 @@ +tree-sitter (0.20.3-1) unstable; urgency=medium + + * New upstream release + * libtree-sitter0.symbols: Add/remove functions for 0.20.3 + + -- James McCoy Sat, 29 Jan 2022 13:05:52 -0500 + tree-sitter (0.20.1-1) unstable; urgency=medium * New upstream release diff -Nru tree-sitter-0.20.1/debian/copyright tree-sitter-0.20.3/debian/copyright --- tree-sitter-0.20.1/debian/copyright 2021-12-20 19:59:34.000000000 +0000 +++ tree-sitter-0.20.3/debian/copyright 2022-01-29 18:05:52.000000000 +0000 @@ -3,11 +3,11 @@ Source: https://tree-sitter.github.io/tree-sitter/ Files: * -Copyright: 2018-2021 Max Brunsfeld +Copyright: 2018-2022 Max Brunsfeld License: Expat Files: debian/* -Copyright: 2020-2021 James McCoy +Copyright: 2020-2022 James McCoy License: Expat Files: lib/src/unicode/* diff -Nru tree-sitter-0.20.1/debian/libtree-sitter0.symbols tree-sitter-0.20.3/debian/libtree-sitter0.symbols --- tree-sitter-0.20.1/debian/libtree-sitter0.symbols 2021-12-20 19:59:34.000000000 +0000 +++ tree-sitter-0.20.3/debian/libtree-sitter0.symbols 2022-01-29 18:05:52.000000000 +0000 @@ -1,5 +1,9 @@ libtree-sitter.so.0 libtree-sitter0 #MINVER# * Build-Depends-Package: libtree-sitter-dev + ts_current_calloc@Base 0.20.2 + ts_current_free@Base 0.20.2 + ts_current_malloc@Base 0.20.2 + ts_current_realloc@Base 0.20.2 ts_external_scanner_state_copy@Base 0.19 ts_external_scanner_state_data@Base 0.19 ts_external_scanner_state_delete@Base 0.19 @@ -80,6 +84,7 @@ ts_query__step_is_fallible@Base 0.20.1 ts_query_capture_count@Base 0.19 ts_query_capture_name_for_id@Base 0.19 + ts_query_capture_quantifier_for_id@Base 0.20.3 ts_query_cursor__compare_captures@Base 0.19 ts_query_cursor__compare_nodes@Base 0.19 ts_query_cursor_delete@Base 0.19 @@ -105,6 +110,7 @@ ts_query_string_value_for_id@Base 0.19 ts_range_array_get_changed_ranges@Base 0.19 ts_range_array_intersects@Base 0.19 + ts_set_allocator@Base 0.20.2 ts_stack_can_merge@Base 0.19 ts_stack_clear@Base 0.19 ts_stack_copy_version@Base 0.19 @@ -117,7 +123,6 @@ ts_stack_is_active@Base 0.19 ts_stack_is_halted@Base 0.19 ts_stack_is_paused@Base 0.19 - ts_stack_iterate@Base 0.19 ts_stack_last_external_token@Base 0.19 ts_stack_merge@Base 0.19 ts_stack_new@Base 0.19 @@ -148,7 +153,6 @@ ts_subtree_clone@Base 0.19 ts_subtree_compare@Base 0.19 ts_subtree_edit@Base 0.19 - ts_subtree_eq@Base 0.19 ts_subtree_external_scanner_state_eq@Base 0.19 ts_subtree_get_changed_ranges@Base 0.19 ts_subtree_last_external_token@Base 0.19 diff -Nru tree-sitter-0.20.1/docs/assets/css/style.scss tree-sitter-0.20.3/docs/assets/css/style.scss --- tree-sitter-0.20.1/docs/assets/css/style.scss 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/docs/assets/css/style.scss 2022-01-22 00:36:45.000000000 +0000 @@ -185,4 +185,5 @@ .query-error { text-decoration: underline red dashed; + -webkit-text-decoration: underline red dashed; } diff -Nru tree-sitter-0.20.1/docs/index.md tree-sitter-0.20.3/docs/index.md --- tree-sitter-0.20.1/docs/index.md 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/docs/index.md 2022-01-22 00:36:45.000000000 +0000 @@ -24,6 +24,8 @@ * [Ruby](https://github.com/tree-sitter/ruby-tree-sitter) * [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) * [Swift](https://github.com/ChimeHQ/SwiftTreeSitter) +* [Kotlin](https://github.com/oxisto/kotlintree) +* [Java](https://github.com/serenadeai/java-tree-sitter) ### Available Parsers @@ -74,6 +76,7 @@ Parsers for these languages are in development: * [Agda](https://github.com/tree-sitter/tree-sitter-agda) +* [Elixir](https://github.com/elixir-lang/tree-sitter-elixir) * [Erlang](https://github.com/AbstractMachinesLab/tree-sitter-erlang/) * [Dockerfile](https://github.com/camdencheek/tree-sitter-dockerfile) * [Go mod](https://github.com/camdencheek/tree-sitter-go-mod) @@ -84,10 +87,11 @@ * [Nix](https://github.com/cstrahan/tree-sitter-nix) * [Objective-C](https://github.com/jiyee/tree-sitter-objc) * [Perl](https://github.com/ganezdragon/tree-sitter-perl) +* [Protocol Buffers](https://github.com/mitchellh/tree-sitter-proto) * [Scala](https://github.com/tree-sitter/tree-sitter-scala) * [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn) * [Swift](https://github.com/tree-sitter/tree-sitter-swift) -* [SQL](https://github.com/m-novikov/tree-sitter-sql) +* [SQL](https://github.com/m-novikov/tree-sitter-sql) ### Talks on Tree-sitter diff -Nru tree-sitter-0.20.1/docs/section-3-creating-parsers.md tree-sitter-0.20.3/docs/section-3-creating-parsers.md --- tree-sitter-0.20.1/docs/section-3-creating-parsers.md 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/docs/section-3-creating-parsers.md 2022-01-22 00:36:45.000000000 +0000 @@ -674,7 +674,7 @@ * **`TSSymbol result_symbol`** - The symbol that was recognized. Your scan function should *assign* to this field one of the values from the `TokenType` enum, described above. * **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace. * **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token. -* **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of bytes (not characters) since the start of the current line. +* **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this function by reading from the start of the line. * **`bool (*is_at_included_range_start)(TSLexer *)`** - A function for checking if the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), your scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`. The third argument to the `scan` function is an array of booleans that indicates which of your external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic. diff -Nru tree-sitter-0.20.1/highlight/src/lib.rs tree-sitter-0.20.3/highlight/src/lib.rs --- tree-sitter-0.20.1/highlight/src/lib.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/highlight/src/lib.rs 2022-01-22 00:36:45.000000000 +0000 @@ -835,6 +835,7 @@ // highlighting patterns that are disabled for local variables. if definition_highlight.is_some() || reference_highlight.is_some() { while layer.config.non_local_variable_patterns[match_.pattern_index] { + match_.remove(); if let Some((next_match, next_capture_index)) = layer.captures.peek() { let next_capture = next_match.captures[*next_capture_index]; if next_capture.node == capture.node { diff -Nru tree-sitter-0.20.1/lib/binding_rust/allocations.rs tree-sitter-0.20.3/lib/binding_rust/allocations.rs --- tree-sitter-0.20.1/lib/binding_rust/allocations.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/binding_rust/allocations.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,120 +0,0 @@ -use spin::Mutex; -use std::{ - collections::HashMap, - os::raw::{c_ulong, c_void}, -}; - -#[derive(Debug, PartialEq, Eq, Hash)] -struct Allocation(*const c_void); -unsafe impl Send for Allocation {} -unsafe impl Sync for Allocation {} - -#[derive(Default)] -struct AllocationRecorder { - enabled: bool, - allocation_count: u64, - outstanding_allocations: HashMap, -} - -thread_local! { - static RECORDER: Mutex = Default::default(); -} - -extern "C" { - fn malloc(size: c_ulong) -> *mut c_void; - fn calloc(count: c_ulong, size: c_ulong) -> *mut c_void; - fn realloc(ptr: *mut c_void, size: c_ulong) -> *mut c_void; - fn free(ptr: *mut c_void); -} - -pub fn record(f: impl FnOnce() -> T) -> T { - RECORDER.with(|recorder| { - let mut recorder = recorder.lock(); - recorder.enabled = true; - recorder.allocation_count = 0; - recorder.outstanding_allocations.clear(); - }); - - let value = f(); - - let outstanding_allocation_indices = RECORDER.with(|recorder| { - let mut recorder = recorder.lock(); - recorder.enabled = false; - recorder.allocation_count = 0; - recorder - .outstanding_allocations - .drain() - .map(|e| e.1) - .collect::>() - }); - if !outstanding_allocation_indices.is_empty() { - panic!( - "Leaked allocation indices: {:?}", - outstanding_allocation_indices - ); - } - value -} - -fn record_alloc(ptr: *mut c_void) { - RECORDER.with(|recorder| { - let mut recorder = recorder.lock(); - if recorder.enabled { - let count = recorder.allocation_count; - recorder.allocation_count += 1; - recorder - .outstanding_allocations - .insert(Allocation(ptr), count); - } - }); -} - -fn record_dealloc(ptr: *mut c_void) { - RECORDER.with(|recorder| { - let mut recorder = recorder.lock(); - if recorder.enabled { - recorder.outstanding_allocations.remove(&Allocation(ptr)); - } - }); -} - -#[no_mangle] -pub extern "C" fn ts_record_malloc(size: c_ulong) -> *const c_void { - let result = unsafe { malloc(size) }; - record_alloc(result); - result -} - -#[no_mangle] -pub extern "C" fn ts_record_calloc(count: c_ulong, size: c_ulong) -> *const c_void { - let result = unsafe { calloc(count, size) }; - record_alloc(result); - result -} - -#[no_mangle] -pub extern "C" fn ts_record_realloc(ptr: *mut c_void, size: c_ulong) -> *const c_void { - record_dealloc(ptr); - let result = unsafe { realloc(ptr, size) }; - record_alloc(result); - result -} - -// This needs to be unsafe because it's reexported as crate::util::free_ptr, which is mapped to -// libc's `free` function when the allocation-tracking feature is disabled. Since `free` is -// unsafe, this function needs to be too. -#[no_mangle] -pub unsafe extern "C" fn ts_record_free(ptr: *mut c_void) { - record_dealloc(ptr); - free(ptr); -} - -#[no_mangle] -pub extern "C" fn ts_toggle_allocation_recording(enabled: bool) -> bool { - RECORDER.with(|recorder| { - let mut recorder = recorder.lock(); - let was_enabled = recorder.enabled; - recorder.enabled = enabled; - was_enabled - }) -} diff -Nru tree-sitter-0.20.1/lib/binding_rust/bindings.rs tree-sitter-0.20.3/lib/binding_rust/bindings.rs --- tree-sitter-0.20.1/lib/binding_rust/bindings.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/binding_rust/bindings.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,7 +1,5 @@ -/* automatically generated by rust-bindgen 0.59.1 */ +/* automatically generated by rust-bindgen 0.59.2 */ -pub type __darwin_size_t = ::std::os::raw::c_ulong; -pub type FILE = [u64; 19usize]; pub type TSSymbol = u16; pub type TSFieldId = u16; #[repr(C)] @@ -31,11 +29,11 @@ } pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1; -pub type TSInputEncoding = u32; +pub type TSInputEncoding = ::std::os::raw::c_uint; pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0; pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1; pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2; -pub type TSSymbolType = u32; +pub type TSSymbolType = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSPoint { @@ -66,7 +64,7 @@ } pub const TSLogType_TSLogTypeParse: TSLogType = 0; pub const TSLogType_TSLogTypeLex: TSLogType = 1; -pub type TSLogType = u32; +pub type TSLogType = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSLogger { @@ -109,6 +107,12 @@ pub node: TSNode, pub index: u32, } +pub const TSQuantifier_TSQuantifierZero: TSQuantifier = 0; +pub const TSQuantifier_TSQuantifierZeroOrOne: TSQuantifier = 1; +pub const TSQuantifier_TSQuantifierZeroOrMore: TSQuantifier = 2; +pub const TSQuantifier_TSQuantifierOne: TSQuantifier = 3; +pub const TSQuantifier_TSQuantifierOneOrMore: TSQuantifier = 4; +pub type TSQuantifier = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSQueryMatch { @@ -120,7 +124,7 @@ pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeDone: TSQueryPredicateStepType = 0; pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture: TSQueryPredicateStepType = 1; pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeString: TSQueryPredicateStepType = 2; -pub type TSQueryPredicateStepType = u32; +pub type TSQueryPredicateStepType = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSQueryPredicateStep { @@ -134,7 +138,7 @@ pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4; pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5; pub const TSQueryError_TSQueryErrorLanguage: TSQueryError = 6; -pub type TSQueryError = u32; +pub type TSQueryError = ::std::os::raw::c_uint; extern "C" { #[doc = " Create a new parser."] pub fn ts_parser_new() -> *mut TSParser; @@ -360,10 +364,6 @@ ) -> *mut TSRange; } extern "C" { - #[doc = " Write a DOT graph describing the syntax tree to the given file."] - pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE); -} -extern "C" { #[doc = " Get the node's type as a null-terminated string."] pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char; } @@ -672,6 +672,15 @@ ) -> *const ::std::os::raw::c_char; } extern "C" { + #[doc = " Get the quantifier of the query's captures. Each capture is * associated"] + #[doc = " with a numeric id based on the order that it appeared in the query's source."] + pub fn ts_query_capture_quantifier_for_id( + arg1: *const TSQuery, + pattern_id: u32, + capture_id: u32, + ) -> TSQuantifier; +} +extern "C" { pub fn ts_query_string_value_for_id( arg1: *const TSQuery, id: u32, @@ -829,6 +838,37 @@ #[doc = " See also `ts_parser_set_language`."] pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } +extern "C" { + #[doc = " Set the allocation functions used by the library."] + #[doc = ""] + #[doc = " By default, Tree-sitter uses the standard libc allocation functions,"] + #[doc = " but aborts the process when an allocation fails. This function lets"] + #[doc = " you supply alternative allocation functions at runtime."] + #[doc = ""] + #[doc = " If you pass `NULL` for any parameter, Tree-sitter will switch back to"] + #[doc = " its default implementation of that function."] + #[doc = ""] + #[doc = " If you call this function after the library has already been used, then"] + #[doc = " you must ensure that either:"] + #[doc = " 1. All the existing objects have been freed."] + #[doc = " 2. The new allocator shares its state with the old one, so it is capable"] + #[doc = " of freeing memory that was allocated by the old allocator."] + pub fn ts_set_allocator( + new_malloc: ::std::option::Option< + unsafe extern "C" fn(arg1: usize) -> *mut ::std::os::raw::c_void, + >, + new_calloc: ::std::option::Option< + unsafe extern "C" fn(arg1: usize, arg2: usize) -> *mut ::std::os::raw::c_void, + >, + new_realloc: ::std::option::Option< + unsafe extern "C" fn( + arg1: *mut ::std::os::raw::c_void, + arg2: usize, + ) -> *mut ::std::os::raw::c_void, + >, + new_free: ::std::option::Option, + ); +} -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 13; +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 14; pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 13; diff -Nru tree-sitter-0.20.1/lib/binding_rust/build.rs tree-sitter-0.20.3/lib/binding_rust/build.rs --- tree-sitter-0.20.1/lib/binding_rust/build.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/binding_rust/build.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,5 +1,3 @@ -extern crate cc; - use std::path::{Path, PathBuf}; use std::{env, fs}; @@ -19,13 +17,6 @@ } } - let mut config = cc::Build::new(); - - println!("cargo:rerun-if-env-changed=CARGO_FEATURE_ALLOCATION_TRACKING"); - if env::var("CARGO_FEATURE_ALLOCATION_TRACKING").is_ok() { - config.define("TREE_SITTER_ALLOCATION_TRACKING", ""); - } - let src_path = Path::new("src"); for entry in fs::read_dir(&src_path).unwrap() { let entry = entry.unwrap(); @@ -33,7 +24,7 @@ println!("cargo:rerun-if-changed={}", path.to_str().unwrap()); } - config + cc::Build::new() .flag_if_supported("-std=c99") .flag_if_supported("-Wno-unused-parameter") .include(src_path) diff -Nru tree-sitter-0.20.1/lib/binding_rust/lib.rs tree-sitter-0.20.3/lib/binding_rust/lib.rs --- tree-sitter-0.20.1/lib/binding_rust/lib.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/binding_rust/lib.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,9 +1,6 @@ mod ffi; mod util; -#[cfg(feature = "allocation-tracking")] -pub mod allocations; - #[cfg(unix)] use std::os::unix::io::AsRawFd; @@ -101,12 +98,36 @@ pub struct Query { ptr: NonNull, capture_names: Vec, + capture_quantifiers: Vec>, text_predicates: Vec>, property_settings: Vec>, property_predicates: Vec>, general_predicates: Vec>, } +/// A quantifier for captures +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum CaptureQuantifier { + Zero, + ZeroOrOne, + ZeroOrMore, + One, + OneOrMore, +} + +impl From for CaptureQuantifier { + fn from(value: ffi::TSQuantifier) -> Self { + match value { + ffi::TSQuantifier_TSQuantifierZero => CaptureQuantifier::Zero, + ffi::TSQuantifier_TSQuantifierZeroOrOne => CaptureQuantifier::ZeroOrOne, + ffi::TSQuantifier_TSQuantifierZeroOrMore => CaptureQuantifier::ZeroOrMore, + ffi::TSQuantifier_TSQuantifierOne => CaptureQuantifier::One, + ffi::TSQuantifier_TSQuantifierOneOrMore => CaptureQuantifier::OneOrMore, + _ => panic!("Unrecognized quantifier: {}", value), + } + } +} + /// A stateful object for executing a `Query` on a syntax `Tree`. pub struct QueryCursor { ptr: NonNull, @@ -1040,7 +1061,7 @@ .to_str() .unwrap() .to_string(); - unsafe { util::free_ptr(c_string as *mut c_void) }; + unsafe { (FREE_FN)(c_string as *mut c_void) }; result } @@ -1309,6 +1330,7 @@ let mut result = Query { ptr: unsafe { NonNull::new_unchecked(ptr) }, capture_names: Vec::with_capacity(capture_count as usize), + capture_quantifiers: Vec::with_capacity(pattern_count as usize), text_predicates: Vec::with_capacity(pattern_count), property_predicates: Vec::with_capacity(pattern_count), property_settings: Vec::with_capacity(pattern_count), @@ -1327,6 +1349,18 @@ } } + // Build a vector to store capture qunatifiers. + for i in 0..pattern_count { + let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); + for j in 0..capture_count { + unsafe { + let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr, i as u32, j); + capture_quantifiers.push(quantifier.into()); + } + } + result.capture_quantifiers.push(capture_quantifiers); + } + // Build a vector of strings to represent literal values used in predicates. let string_values = (0..string_count) .map(|i| unsafe { @@ -1527,6 +1561,11 @@ &self.capture_names } + /// Get the quantifiers of the captures used in the query. + pub fn capture_quantifiers(&self, index: usize) -> &[CaptureQuantifier] { + &self.capture_quantifiers[index] + } + /// Get the index for a given capture name. pub fn capture_index_for_name(&self, name: &str) -> Option { self.capture_names @@ -2164,6 +2203,22 @@ } } +extern "C" { + fn free(ptr: *mut c_void); +} + +static mut FREE_FN: unsafe extern "C" fn(ptr: *mut c_void) = free; + +pub unsafe fn set_allocator( + new_malloc: Option *mut c_void>, + new_calloc: Option *mut c_void>, + new_realloc: Option *mut c_void>, + new_free: Option, +) { + FREE_FN = new_free.unwrap_or(free); + ffi::ts_set_allocator(new_malloc, new_calloc, new_realloc, new_free); +} + impl error::Error for IncludedRangesError {} impl error::Error for LanguageError {} impl error::Error for QueryError {} diff -Nru tree-sitter-0.20.1/lib/binding_rust/util.rs tree-sitter-0.20.3/lib/binding_rust/util.rs --- tree-sitter-0.20.1/lib/binding_rust/util.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/binding_rust/util.rs 2022-01-22 00:36:45.000000000 +0000 @@ -1,19 +1,6 @@ +use super::FREE_FN; use std::os::raw::c_void; -#[cfg(not(feature = "allocation-tracking"))] -extern "C" { - /// Normally, use `free(1)` to free memory allocated from C. - #[link_name = "free"] - pub fn free_ptr(ptr: *mut c_void); -} - -/// When the `allocation-tracking` feature is enabled, the C library is compiled with -/// the `TREE_SITTER_TEST` macro, so all calls to `malloc`, `free`, etc are linked -/// against wrapper functions called `ts_record_malloc`, `ts_record_free`, etc. -/// When freeing buffers allocated from C, use the wrapper `free` function. -#[cfg(feature = "allocation-tracking")] -pub use crate::allocations::ts_record_free as free_ptr; - /// A raw pointer and a length, exposed as an iterator. pub struct CBufferIter { ptr: *mut T, @@ -50,8 +37,6 @@ impl Drop for CBufferIter { fn drop(&mut self) { - unsafe { - free_ptr(self.ptr as *mut c_void); - } + unsafe { (FREE_FN)(self.ptr as *mut c_void) }; } } diff -Nru tree-sitter-0.20.1/lib/binding_web/exports.json tree-sitter-0.20.3/lib/binding_web/exports.json --- tree-sitter-0.20.1/lib/binding_web/exports.json 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/binding_web/exports.json 2022-01-22 00:36:45.000000000 +0000 @@ -2,6 +2,7 @@ "_calloc", "_free", "_malloc", + "_realloc", "__ZNKSt3__212basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4copyEPcmm", "__ZNKSt3__220__vector_base_commonILb1EE20__throw_length_errorEv", diff -Nru tree-sitter-0.20.1/lib/binding_web/package.json tree-sitter-0.20.3/lib/binding_web/package.json --- tree-sitter-0.20.1/lib/binding_web/package.json 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/binding_web/package.json 2022-01-22 00:36:45.000000000 +0000 @@ -1,6 +1,6 @@ { "name": "web-tree-sitter", - "version": "0.20.1", + "version": "0.20.3", "description": "Tree-sitter bindings for the web", "main": "tree-sitter.js", "types": "tree-sitter-web.d.ts", diff -Nru tree-sitter-0.20.1/lib/binding_web/prefix.js tree-sitter-0.20.3/lib/binding_web/prefix.js --- tree-sitter-0.20.1/lib/binding_web/prefix.js 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/binding_web/prefix.js 2022-01-22 00:36:45.000000000 +0000 @@ -1,5 +1,9 @@ var TreeSitter = function() { var initPromise; + var document = typeof window == 'object' + ? {currentScript: window.document.currentScript} + : null; + class Parser { constructor() { this.initialize(); @@ -11,5 +15,5 @@ static init(moduleOptions) { if (initPromise) return initPromise; - Module = Object.assign({ }, Module, moduleOptions); + Module = Object.assign({}, Module, moduleOptions); return initPromise = new Promise((resolveInitPromise) => { diff -Nru tree-sitter-0.20.1/lib/Cargo.toml tree-sitter-0.20.3/lib/Cargo.toml --- tree-sitter-0.20.1/lib/Cargo.toml 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/Cargo.toml 2022-01-22 00:36:45.000000000 +0000 @@ -1,7 +1,7 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.20.1" +version = "0.20.3" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" @@ -22,17 +22,11 @@ ] [dependencies] -lazy_static = { version="1.2.0", optional=true } +lazy_static = { version = "1.2.0", optional = true } regex = "1" -spin = { version="0.7", optional=true } [build-dependencies] cc = "^1.0.58" [lib] path = "binding_rust/lib.rs" - -# This feature is only useful for testing the Tree-sitter library itself. -# It is exposed because all of Tree-sitter's tests live in the Tree-sitter CLI crate. -[features] -allocation-tracking = ["lazy_static", "spin"] diff -Nru tree-sitter-0.20.1/lib/include/tree_sitter/api.h tree-sitter-0.20.3/lib/include/tree_sitter/api.h --- tree-sitter-0.20.1/lib/include/tree_sitter/api.h 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/include/tree_sitter/api.h 2022-01-22 00:36:45.000000000 +0000 @@ -21,7 +21,7 @@ * The Tree-sitter library is generally backwards-compatible with languages * generated using older CLI versions, but is not forwards-compatible. */ -#define TREE_SITTER_LANGUAGE_VERSION 13 +#define TREE_SITTER_LANGUAGE_VERSION 14 /** * The earliest ABI version that is supported by the current version of the @@ -106,6 +106,14 @@ uint32_t index; } TSQueryCapture; +typedef enum { + TSQuantifierZero = 0, // must match the array initialization value + TSQuantifierZeroOrOne, + TSQuantifierZeroOrMore, + TSQuantifierOne, + TSQuantifierOneOrMore, +} TSQuantifier; + typedef struct { uint32_t id; uint16_t pattern_index; @@ -740,6 +748,17 @@ uint32_t id, uint32_t *length ); + +/** + * Get the quantifier of the query's captures. Each capture is * associated + * with a numeric id based on the order that it appeared in the query's source. + */ +TSQuantifier ts_query_capture_quantifier_for_id( + const TSQuery *, + uint32_t pattern_id, + uint32_t capture_id +); + const char *ts_query_string_value_for_id( const TSQuery *, uint32_t id, @@ -896,6 +915,33 @@ */ uint32_t ts_language_version(const TSLanguage *); +/**********************************/ +/* Section - Global Configuration */ +/**********************************/ + +/** + * Set the allocation functions used by the library. + * + * By default, Tree-sitter uses the standard libc allocation functions, + * but aborts the process when an allocation fails. This function lets + * you supply alternative allocation functions at runtime. + * + * If you pass `NULL` for any parameter, Tree-sitter will switch back to + * its default implementation of that function. + * + * If you call this function after the library has already been used, then + * you must ensure that either: + * 1. All the existing objects have been freed. + * 2. The new allocator shares its state with the old one, so it is capable + * of freeing memory that was allocated by the old allocator. + */ +void ts_set_allocator( + void *(*new_malloc)(size_t), + void *(*new_calloc)(size_t, size_t), + void *(*new_realloc)(void *, size_t), + void (*new_free)(void *) +); + #ifdef __cplusplus } #endif diff -Nru tree-sitter-0.20.1/lib/include/tree_sitter/parser.h tree-sitter-0.20.3/lib/include/tree_sitter/parser.h --- tree-sitter-0.20.1/lib/include/tree_sitter/parser.h 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/include/tree_sitter/parser.h 2022-01-22 00:36:45.000000000 +0000 @@ -123,6 +123,7 @@ unsigned (*serialize)(void *, char *); void (*deserialize)(void *, const char *, unsigned); } external_scanner; + const TSStateId *primary_state_ids; }; /* diff -Nru tree-sitter-0.20.1/lib/src/alloc.c tree-sitter-0.20.3/lib/src/alloc.c --- tree-sitter-0.20.1/lib/src/alloc.c 1970-01-01 00:00:00.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/alloc.c 2022-01-22 00:36:45.000000000 +0000 @@ -0,0 +1,48 @@ +#include "alloc.h" +#include + +static void *ts_malloc_default(size_t size) { + void *result = malloc(size); + if (size > 0 && !result) { + fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); + exit(1); + } + return result; +} + +static void *ts_calloc_default(size_t count, size_t size) { + void *result = calloc(count, size); + if (count > 0 && !result) { + fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); + exit(1); + } + return result; +} + +static void *ts_realloc_default(void *buffer, size_t size) { + void *result = realloc(buffer, size); + if (size > 0 && !result) { + fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); + exit(1); + } + return result; +} + +// Allow clients to override allocation functions dynamically +void *(*ts_current_malloc)(size_t) = ts_malloc_default; +void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default; +void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default; +void (*ts_current_free)(void *) = free; + +void ts_set_allocator( + void *(*new_malloc)(size_t), + void *(*new_calloc)(size_t, size_t), + void *(*new_realloc)(void *, size_t), + void (*new_free)(void *) +) { + ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default; + ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default; + ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default; + ts_current_free = new_free ? new_free : free; +} + diff -Nru tree-sitter-0.20.1/lib/src/alloc.h tree-sitter-0.20.3/lib/src/alloc.h --- tree-sitter-0.20.1/lib/src/alloc.h 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/alloc.h 2022-01-22 00:36:45.000000000 +0000 @@ -1,6 +1,8 @@ #ifndef TREE_SITTER_ALLOC_H_ #define TREE_SITTER_ALLOC_H_ +#include "tree_sitter/api.h" + #ifdef __cplusplus extern "C" { #endif @@ -9,75 +11,23 @@ #include #include -#if defined(TREE_SITTER_ALLOCATION_TRACKING) - -void *ts_record_malloc(size_t); -void *ts_record_calloc(size_t, size_t); -void *ts_record_realloc(void *, size_t); -void ts_record_free(void *); -bool ts_toggle_allocation_recording(bool); - -#define ts_malloc ts_record_malloc -#define ts_calloc ts_record_calloc -#define ts_realloc ts_record_realloc -#define ts_free ts_record_free - -#else +extern void *(*ts_current_malloc)(size_t); +extern void *(*ts_current_calloc)(size_t, size_t); +extern void *(*ts_current_realloc)(void *, size_t); +extern void (*ts_current_free)(void *); // Allow clients to override allocation functions - #ifndef ts_malloc -#define ts_malloc ts_malloc_default +#define ts_malloc ts_current_malloc #endif #ifndef ts_calloc -#define ts_calloc ts_calloc_default +#define ts_calloc ts_current_calloc #endif #ifndef ts_realloc -#define ts_realloc ts_realloc_default +#define ts_realloc ts_current_realloc #endif #ifndef ts_free -#define ts_free ts_free_default -#endif - -#include - -static inline bool ts_toggle_allocation_recording(bool value) { - (void)value; - return false; -} - - -static inline void *ts_malloc_default(size_t size) { - void *result = malloc(size); - if (size > 0 && !result) { - fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); - exit(1); - } - return result; -} - -static inline void *ts_calloc_default(size_t count, size_t size) { - void *result = calloc(count, size); - if (count > 0 && !result) { - fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); - exit(1); - } - return result; -} - -static inline void *ts_realloc_default(void *buffer, size_t size) { - void *result = realloc(buffer, size); - if (size > 0 && !result) { - fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); - exit(1); - } - return result; -} - -static inline void ts_free_default(void *buffer) { - free(buffer); -} - +#define ts_free ts_current_free #endif #ifdef __cplusplus diff -Nru tree-sitter-0.20.1/lib/src/get_changed_ranges.c tree-sitter-0.20.3/lib/src/get_changed_ranges.c --- tree-sitter-0.20.1/lib/src/get_changed_ranges.c 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/get_changed_ranges.c 2022-01-22 00:36:45.000000000 +0000 @@ -7,7 +7,11 @@ // #define DEBUG_GET_CHANGED_RANGES -static void ts_range_array_add(TSRangeArray *self, Length start, Length end) { +static void ts_range_array_add( + TSRangeArray *self, + Length start, + Length end +) { if (self->size > 0) { TSRange *last_range = array_back(self); if (start.bytes <= last_range->end_byte) { @@ -23,8 +27,12 @@ } } -bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index, - uint32_t start_byte, uint32_t end_byte) { +bool ts_range_array_intersects( + const TSRangeArray *self, + unsigned start_index, + uint32_t start_byte, + uint32_t end_byte +) { for (unsigned i = start_index; i < self->size; i++) { TSRange *range = &self->contents[i]; if (range->end_byte > start_byte) { @@ -102,9 +110,13 @@ bool in_padding; } Iterator; -static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLanguage *language) { +static Iterator iterator_new( + TreeCursor *cursor, + const Subtree *tree, + const TSLanguage *language +) { array_clear(&cursor->stack); - array_push(&cursor->stack, ((TreeCursorEntry){ + array_push(&cursor->stack, ((TreeCursorEntry) { .subtree = tree, .position = length_zero(), .child_index = 0, @@ -210,7 +222,7 @@ Length child_right = length_add(child_left, ts_subtree_size(*child)); if (child_right.bytes > goal_position) { - array_push(&self->cursor.stack, ((TreeCursorEntry){ + array_push(&self->cursor.stack, ((TreeCursorEntry) { .subtree = child, .position = position, .child_index = i, @@ -262,7 +274,7 @@ if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; const Subtree *next_child = &ts_subtree_children(*parent)[child_index]; - array_push(&self->cursor.stack, ((TreeCursorEntry){ + array_push(&self->cursor.stack, ((TreeCursorEntry) { .subtree = next_child, .position = position, .child_index = child_index, @@ -289,7 +301,10 @@ IteratorMatches, } IteratorComparison; -static IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *new_iter) { +static IteratorComparison iterator_compare( + const Iterator *old_iter, + const Iterator *new_iter +) { Subtree old_tree = NULL_SUBTREE; Subtree new_tree = NULL_SUBTREE; uint32_t old_start = 0; @@ -339,11 +354,13 @@ } #endif -unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *new_tree, - TreeCursor *cursor1, TreeCursor *cursor2, - const TSLanguage *language, - const TSRangeArray *included_range_differences, - TSRange **ranges) { +unsigned ts_subtree_get_changed_ranges( + const Subtree *old_tree, const Subtree *new_tree, + TreeCursor *cursor1, TreeCursor *cursor2, + const TSLanguage *language, + const TSRangeArray *included_range_differences, + TSRange **ranges +) { TSRangeArray results = array_new(); Iterator old_iter = iterator_new(cursor1, old_tree, language); diff -Nru tree-sitter-0.20.1/lib/src/host.h tree-sitter-0.20.3/lib/src/host.h --- tree-sitter-0.20.1/lib/src/host.h 1970-01-01 00:00:00.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/host.h 2022-01-22 00:36:45.000000000 +0000 @@ -0,0 +1,21 @@ + +// Determine endian and pointer size based on known defines. +// TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments +// to override this. + +#if !defined(TS_BIG_ENDIAN) +#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \ + || (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__))) +#define TS_BIG_ENDIAN 1 +#else +#define TS_BIG_ENDIAN 0 +#endif +#endif + +#if !defined(TS_PTR_SIZE) +#if UINTPTR_MAX == 0xFFFFFFFF +#define TS_PTR_SIZE 32 +#else +#define TS_PTR_SIZE 64 +#endif +#endif diff -Nru tree-sitter-0.20.1/lib/src/language.c tree-sitter-0.20.3/lib/src/language.c --- tree-sitter-0.20.1/lib/src/language.c 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/language.c 2022-01-22 00:36:45.000000000 +0000 @@ -40,9 +40,9 @@ TSSymbol symbol ) { if (symbol == ts_builtin_sym_error) { - return (TSSymbolMetadata){.visible = true, .named = true}; + return (TSSymbolMetadata) {.visible = true, .named = true}; } else if (symbol == ts_builtin_sym_error_repeat) { - return (TSSymbolMetadata){.visible = false, .named = false}; + return (TSSymbolMetadata) {.visible = false, .named = false}; } else { return self->symbol_metadata[symbol]; } diff -Nru tree-sitter-0.20.1/lib/src/language.h tree-sitter-0.20.3/lib/src/language.h --- tree-sitter-0.20.1/lib/src/language.h 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/language.h 2022-01-22 00:36:45.000000000 +0000 @@ -200,6 +200,19 @@ } } +// Whether the state is a "primary state". If this returns false, it indicates that there exists +// another state that behaves identically to this one with respect to query analysis. +static inline bool ts_language_state_is_primary( + const TSLanguage *self, + TSStateId state +) { + if (self->version >= 14) { + return state == self->primary_state_ids[state]; + } else { + return true; + } +} + static inline const bool *ts_language_enabled_external_tokens( const TSLanguage *self, unsigned external_scanner_state diff -Nru tree-sitter-0.20.1/lib/src/lexer.c tree-sitter-0.20.3/lib/src/lexer.c --- tree-sitter-0.20.1/lib/src/lexer.c 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/lexer.c 2022-01-22 00:36:45.000000000 +0000 @@ -152,18 +152,8 @@ } } -// Advance to the next character in the source code, retrieving a new -// chunk of source code if needed. -static void ts_lexer__advance(TSLexer *_self, bool skip) { - Lexer *self = (Lexer *)_self; - if (!self->chunk) return; - - if (skip) { - LOG("skip", self->data.lookahead); - } else { - LOG("consume", self->data.lookahead); - } - +// Intended to be called only from functions that control logging. +static void ts_lexer__do_advance(Lexer *self, bool skip) { if (self->lookahead_size) { self->current_position.bytes += self->lookahead_size; if (self->data.lookahead == '\n') { @@ -205,6 +195,21 @@ } } +// Advance to the next character in the source code, retrieving a new +// chunk of source code if needed. +static void ts_lexer__advance(TSLexer *_self, bool skip) { + Lexer *self = (Lexer *)_self; + if (!self->chunk) return; + + if (skip) { + LOG("skip", self->data.lookahead); + } else { + LOG("consume", self->data.lookahead); + } + + ts_lexer__do_advance(self, skip); +} + // Mark that a token match has completed. This can be called multiple // times if a longer match is found later. static void ts_lexer__mark_end(TSLexer *_self) { @@ -233,8 +238,25 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) { Lexer *self = (Lexer *)_self; + + uint32_t goal_byte = self->current_position.bytes; + self->did_get_column = true; - return self->current_position.extent.column; + self->current_position.bytes -= self->current_position.extent.column; + self->current_position.extent.column = 0; + + if (self->current_position.bytes < self->chunk_start) { + ts_lexer__get_chunk(self); + } + + uint32_t result = 0; + ts_lexer__get_lookahead(self); + while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self) && self->chunk) { + ts_lexer__do_advance(self, false); + result++; + } + + return result; } // Is the lexer at a boundary between two disjoint included ranges of diff -Nru tree-sitter-0.20.1/lib/src/lib.c tree-sitter-0.20.3/lib/src/lib.c --- tree-sitter-0.20.1/lib/src/lib.c 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/lib.c 2022-01-22 00:36:45.000000000 +0000 @@ -5,6 +5,7 @@ #define _POSIX_C_SOURCE 200112L +#include "./alloc.c" #include "./get_changed_ranges.c" #include "./language.c" #include "./lexer.c" diff -Nru tree-sitter-0.20.1/lib/src/parser.c tree-sitter-0.20.3/lib/src/parser.c --- tree-sitter-0.20.1/lib/src/parser.c 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/parser.c 2022-01-22 00:36:45.000000000 +0000 @@ -1017,7 +1017,7 @@ break; case TSParseActionTypeReduce: if (action.reduce.child_count > 0) - ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){ + ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) { .symbol = action.reduce.symbol, .count = action.reduce.child_count, .dynamic_precedence = action.reduce.dynamic_precedence, diff -Nru tree-sitter-0.20.1/lib/src/query.c tree-sitter-0.20.3/lib/src/query.c --- tree-sitter-0.20.1/lib/src/query.c 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/query.c 2022-01-22 00:36:45.000000000 +0000 @@ -11,9 +11,10 @@ // #define DEBUG_EXECUTE_QUERY #define MAX_STEP_CAPTURE_COUNT 3 -#define MAX_STATE_PREDECESSOR_COUNT 100 -#define MAX_ANALYSIS_STATE_DEPTH 8 #define MAX_NEGATED_FIELD_COUNT 8 +#define MAX_STATE_PREDECESSOR_COUNT 256 +#define MAX_ANALYSIS_STATE_DEPTH 8 +#define MAX_ANALYSIS_ITERATION_COUNT 256 /* * Stream - A sequence of unicode characters derived from a UTF8 string. @@ -119,6 +120,11 @@ Array(Slice) slices; } SymbolTable; +/** + * CaptureQuantififers - a data structure holding the quantifiers of pattern captures. + */ +typedef Array(uint8_t) CaptureQuantifiers; + /* * PatternEntry - Information about the starting point for matching a particular * pattern. These entries are stored in a 'pattern map' - a sorted array that @@ -224,7 +230,9 @@ uint16_t step_index; } AnalysisState; -typedef Array(AnalysisState) AnalysisStateSet; +typedef Array(AnalysisState *) AnalysisStateSet; + +typedef Array(AnalysisState *) AnalysisStatePool; /* * AnalysisSubgraph - A subset of the states in the parse table that are used @@ -261,6 +269,7 @@ */ struct TSQuery { SymbolTable captures; + Array(CaptureQuantifiers) capture_quantifiers; SymbolTable predicate_values; Array(QueryStep) steps; Array(PatternEntry) pattern_map; @@ -453,6 +462,263 @@ } /************** + * Quantifiers + **************/ + +static TSQuantifier quantifier_mul( + TSQuantifier left, + TSQuantifier right +) { + switch (left) + { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierZeroOrMore: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierOne: + return right; + case TSQuantifierOneOrMore: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! +} + +static TSQuantifier quantifier_join( + TSQuantifier left, + TSQuantifier right +) { + switch (left) + { + case TSQuantifierZero: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZero; + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + }; + break; + case TSQuantifierZeroOrOne: + switch (right) { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + case TSQuantifierOne: + return TSQuantifierZeroOrOne; + break; + case TSQuantifierZeroOrMore: + case TSQuantifierOneOrMore: + return TSQuantifierZeroOrMore; + break; + }; + break; + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + switch (right) { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + return TSQuantifierOne; + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOneOrMore: + switch (right) { + case TSQuantifierZero: + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! +} + +static TSQuantifier quantifier_add( + TSQuantifier left, + TSQuantifier right +) { + switch (left) + { + case TSQuantifierZero: + return right; + case TSQuantifierZeroOrOne: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZeroOrOne; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierZeroOrMore: + switch (right) { + case TSQuantifierZero: + return TSQuantifierZeroOrMore; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + return TSQuantifierZeroOrMore; + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOne: + switch (right) { + case TSQuantifierZero: + return TSQuantifierOne; + case TSQuantifierZeroOrOne: + case TSQuantifierZeroOrMore: + case TSQuantifierOne: + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + }; + break; + case TSQuantifierOneOrMore: + return TSQuantifierOneOrMore; + } + return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! +} + +// Create new capture quantifiers structure +static CaptureQuantifiers capture_quantifiers_new(void) { + return (CaptureQuantifiers) array_new(); +} + +// Delete capture quantifiers structure +static void capture_quantifiers_delete( + CaptureQuantifiers *self +) { + array_delete(self); +} + +// Clear capture quantifiers structure +static void capture_quantifiers_clear( + CaptureQuantifiers *self +) { + array_clear(self); +} + +// Replace capture quantifiers with the given quantifiers +static void capture_quantifiers_replace( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + array_clear(self); + array_push_all(self, quantifiers); +} + +// Return capture quantifier for the given capture id +static TSQuantifier capture_quantifier_for_id( + const CaptureQuantifiers *self, + uint16_t id +) { + return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id); +} + +// Add the given quantifier to the current value for id +static void capture_quantifiers_add_for_id( + CaptureQuantifiers *self, + uint16_t id, + TSQuantifier quantifier +) { + if (self->size <= id) { + array_grow_by(self, id + 1 - self->size); + } + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier); +} + +// Point-wise add the given quantifiers to the current values +static void capture_quantifiers_add_all( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + if (self->size < quantifiers->size) { + array_grow_by(self, quantifiers->size - self->size); + } + for (uint16_t id = 0; id < quantifiers->size; id++) { + uint8_t *quantifier = array_get(quantifiers, id); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); + } +} + +// Join the given quantifier with the current values +static void capture_quantifiers_mul( + CaptureQuantifiers *self, + TSQuantifier quantifier +) { + for (uint16_t id = 0; id < self->size; id++) { + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier); + } +} + +// Point-wise join the quantifiers from a list of alternatives with the current values +static void capture_quantifiers_join_all( + CaptureQuantifiers *self, + CaptureQuantifiers *quantifiers +) { + if (self->size < quantifiers->size) { + array_grow_by(self, quantifiers->size - self->size); + } + for (uint32_t id = 0; id < quantifiers->size; id++) { + uint8_t *quantifier = array_get(quantifiers, id); + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); + } + for (uint32_t id = quantifiers->size; id < self->size; id++) { + uint8_t *own_quantifier = array_get(self, id); + *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero); + } +} + +/************** * SymbolTable **************/ @@ -571,7 +837,7 @@ ) { return (StatePredecessorMap) { .contents = ts_calloc( - language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), + (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), sizeof(TSStateId) ), }; @@ -586,7 +852,7 @@ TSStateId state, TSStateId predecessor ) { - unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1); + size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); TSStateId *count = &self->contents[index]; if ( *count == 0 || @@ -602,7 +868,7 @@ TSStateId state, unsigned *count ) { - unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1); + size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); *count = self->contents[index]; return &self->contents[index + 1]; } @@ -626,34 +892,34 @@ } static inline int analysis_state__compare_position( - const AnalysisState *self, - const AnalysisState *other + AnalysisState *const *self, + AnalysisState *const *other ) { - for (unsigned i = 0; i < self->depth; i++) { - if (i >= other->depth) return -1; - if (self->stack[i].child_index < other->stack[i].child_index) return -1; - if (self->stack[i].child_index > other->stack[i].child_index) return 1; - } - if (self->depth < other->depth) return 1; + for (unsigned i = 0; i < (*self)->depth; i++) { + if (i >= (*other)->depth) return -1; + if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1; + if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1; + } + if ((*self)->depth < (*other)->depth) return 1; + if ((*self)->step_index < (*other)->step_index) return -1; + if ((*self)->step_index > (*other)->step_index) return 1; return 0; } static inline int analysis_state__compare( - const AnalysisState *self, - const AnalysisState *other + AnalysisState *const *self, + AnalysisState *const *other ) { int result = analysis_state__compare_position(self, other); if (result != 0) return result; - for (unsigned i = 0; i < self->depth; i++) { - if (self->stack[i].parent_symbol < other->stack[i].parent_symbol) return -1; - if (self->stack[i].parent_symbol > other->stack[i].parent_symbol) return 1; - if (self->stack[i].parse_state < other->stack[i].parse_state) return -1; - if (self->stack[i].parse_state > other->stack[i].parse_state) return 1; - if (self->stack[i].field_id < other->stack[i].field_id) return -1; - if (self->stack[i].field_id > other->stack[i].field_id) return 1; + for (unsigned i = 0; i < (*self)->depth; i++) { + if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1; + if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1; + if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1; + if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1; + if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1; + if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1; } - if (self->step_index < other->step_index) return -1; - if (self->step_index > other->step_index) return 1; return 0; } @@ -668,6 +934,84 @@ return false; } +static inline AnalysisState *analysis_state__clone(AnalysisState const *self) { + AnalysisState *new_state = ts_malloc(sizeof(AnalysisState)); + *new_state = *self; + return new_state; +} + +/**************** + * AnalysisStateSet + ****************/ + +// Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by +// cloning one from scratch. +static inline AnalysisState *analysis_state_pool__clone_or_reuse( + AnalysisStatePool *self, + AnalysisState *borrowed_item +) { + AnalysisState *new_item; + if (self->size) { + new_item = array_pop(self); + *new_item = *borrowed_item; + } else { + new_item = analysis_state__clone(borrowed_item); + } + + return new_item; +} + +// Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this +// set. The set does not contain duplicates, so if the item is already present, it will not be +// inserted, and no clone will be made. +// +// The caller retains ownership of the passed-in memory. However, the clone that is created by this +// function will be managed by the state set. +static inline void analysis_state_set__insert_sorted_by_clone( + AnalysisStateSet *self, + AnalysisStatePool *pool, + AnalysisState *borrowed_item +) { + unsigned index, exists; + array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists); + if (!exists) { + AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); + array_insert(self, index, new_item); + } +} + +// Inserts a clone of the passed-in item at the end position of this list. +// +// IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function +// `analysis_state__compare`) than largest item already in this set. If items are inserted in the +// wrong order, the set will not function properly for future use. +// +// The caller retains ownership of the passed-in memory. However, the clone that is created by this +// function will be managed by the state set. +static inline void analysis_state_set__push_by_clone( + AnalysisStateSet *self, + AnalysisStatePool *pool, + AnalysisState *borrowed_item +) { + AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); + array_push(self, new_item); +} + +// Removes all items from this set, returning it to an empty state. +static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStatePool *pool) { + array_push_all(pool, self); + array_clear(self); +} + +// Releases all memory that is managed with this state set, including any items currently present. +// After calling this function, the set is no longer suitable for use. +static inline void analysis_state_set__delete(AnalysisStateSet *self) { + for (unsigned i = 0; i < self->size; i++) { + ts_free(self->contents[i]); + } + array_delete(self); +} + /*********************** * AnalysisSubgraphNode ***********************/ @@ -879,28 +1223,30 @@ if (lookahead_iterator.next_state != state) { state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); } - const TSSymbol *aliases, *aliases_end; - ts_language_aliases_for_symbol( - self->language, - lookahead_iterator.symbol, - &aliases, - &aliases_end - ); - for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { - array_search_sorted_by( - &subgraphs, - .symbol, - *symbol, - &subgraph_index, - &exists + if (ts_language_state_is_primary(self->language, state)) { + const TSSymbol *aliases, *aliases_end; + ts_language_aliases_for_symbol( + self->language, + lookahead_iterator.symbol, + &aliases, + &aliases_end ); - if (exists) { - AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - if ( - subgraph->start_states.size == 0 || - *array_back(&subgraph->start_states) != state - ) - array_push(&subgraph->start_states, state); + for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { + array_search_sorted_by( + &subgraphs, + .symbol, + *symbol, + &subgraph_index, + &exists + ); + if (exists) { + AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; + if ( + subgraph->start_states.size == 0 || + *array_back(&subgraph->start_states) != state + ) + array_push(&subgraph->start_states, state); + } } } } @@ -977,6 +1323,7 @@ AnalysisStateSet states = array_new(); AnalysisStateSet next_states = array_new(); AnalysisStateSet deeper_states = array_new(); + AnalysisStatePool state_pool = array_new(); Array(uint16_t) final_step_indices = array_new(); for (unsigned i = 0; i < parent_step_indices.size; i++) { uint16_t parent_step_index = parent_step_indices.contents[i]; @@ -1001,11 +1348,11 @@ // Initialize an analysis state at every parse state in the table where // this parent symbol can occur. AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index]; - array_clear(&states); - array_clear(&deeper_states); + analysis_state_set__clear(&states, &state_pool); + analysis_state_set__clear(&deeper_states, &state_pool); for (unsigned j = 0; j < subgraph->start_states.size; j++) { TSStateId parse_state = subgraph->start_states.contents[j]; - array_push(&states, ((AnalysisState) { + analysis_state_set__push_by_clone(&states, &state_pool, &((AnalysisState) { .step_index = parent_step_index + 1, .stack = { [0] = { @@ -1023,19 +1370,24 @@ // Walk the subgraph for this non-terminal, tracking all of the possible // sequences of progress within the pattern. bool can_finish_pattern = false; - bool did_exceed_max_depth = false; + bool did_abort_analysis = false; unsigned recursion_depth_limit = 0; unsigned prev_final_step_count = 0; array_clear(&final_step_indices); - for (;;) { + for (unsigned iteration = 0;; iteration++) { + if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { + did_abort_analysis = true; + break; + } + #ifdef DEBUG_ANALYZE_QUERY - printf("Final step indices:"); + printf("Iteration: %u. Final step indices:", iteration); for (unsigned j = 0; j < final_step_indices.size; j++) { printf(" %4u", final_step_indices.contents[j]); } printf("\nWalk states for %u %s:\n", i, ts_language_symbol_name(self->language, parent_symbol)); for (unsigned j = 0; j < states.size; j++) { - AnalysisState *state = &states.contents[j]; + AnalysisState *state = states.contents[j]; printf(" %3u: step: %u, stack: [", j, state->step_index); for (unsigned k = 0; k < state->depth; k++) { printf( @@ -1057,7 +1409,10 @@ // limit. But only allow this if progress has been made since the last time the depth // limit was increased. if (states.size == 0) { - if (deeper_states.size > 0 && final_step_indices.size > prev_final_step_count) { + if ( + deeper_states.size > 0 + && final_step_indices.size > prev_final_step_count + ) { #ifdef DEBUG_ANALYZE_QUERY printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); #endif @@ -1073,9 +1428,9 @@ break; } - array_clear(&next_states); + analysis_state_set__clear(&next_states, &state_pool); for (unsigned j = 0; j < states.size; j++) { - AnalysisState * const state = &states.contents[j]; + AnalysisState * const state = states.contents[j]; // For efficiency, it's important to avoid processing the same analysis state more // than once. To achieve this, keep the states in order of ascending position within @@ -1083,13 +1438,26 @@ // the states that have made the least progress. Avoid advancing states that have already // made more progress. if (next_states.size > 0) { - int comparison = analysis_state__compare_position(state, array_back(&next_states)); + int comparison = analysis_state__compare_position( + &state, + array_back(&next_states) + ); if (comparison == 0) { - array_insert_sorted_with(&next_states, analysis_state__compare, *state); + #ifdef DEBUG_ANALYZE_QUERY + printf("Skip iteration for state %u\n", j); + #endif + analysis_state_set__insert_sorted_by_clone(&next_states, &state_pool, state); continue; } else if (comparison > 0) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Terminate iteration at state %u\n", j); + #endif while (j < states.size) { - array_push(&next_states, states.contents[j]); + analysis_state_set__push_by_clone( + &next_states, + &state_pool, + states.contents[j] + ); j++; } break; @@ -1203,7 +1571,7 @@ printf("Exceeded depth limit for state %u\n", j); #endif - did_exceed_max_depth = true; + did_abort_analysis = true; continue; } @@ -1220,7 +1588,11 @@ }; if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { - array_insert_sorted_with(&deeper_states, analysis_state__compare, next_state); + analysis_state_set__insert_sorted_by_clone( + &deeper_states, + &state_pool, + &next_state + ); continue; } } @@ -1267,7 +1639,7 @@ if (did_finish_pattern || next_state.depth == 0) { array_insert_sorted_by(&final_step_indices, , next_state.step_index); } else { - array_insert_sorted_with(&next_states, analysis_state__compare, next_state); + analysis_state_set__insert_sorted_by_clone(&next_states, &state_pool, &next_state); } } @@ -1295,22 +1667,9 @@ next_states = _states; } - // Mark as indefinite any step where a match terminated. - // Later, this property will be propagated to all of the step's predecessors. - for (unsigned j = 0; j < final_step_indices.size; j++) { - uint32_t final_step_index = final_step_indices.contents[j]; - QueryStep *step = &self->steps.contents[final_step_index]; - if ( - step->depth != PATTERN_DONE_MARKER && - step->depth > parent_depth && - !step->is_dead_end - ) { - step->parent_pattern_guaranteed = false; - step->root_pattern_guaranteed = false; - } - } - - if (did_exceed_max_depth) { + // If this pattern could not be fully analyzed, then every step should + // be considered fallible. + if (did_abort_analysis) { for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { QueryStep *step = &self->steps.contents[j]; if ( @@ -1322,11 +1681,12 @@ step->root_pattern_guaranteed = false; } } + continue; } // If this pattern cannot match, store the pattern index so that it can be // returned to the caller. - if (all_patterns_are_valid && !can_finish_pattern && !did_exceed_max_depth) { + if (!can_finish_pattern) { assert(final_step_indices.size > 0); uint16_t impossible_step_index = *array_back(&final_step_indices); uint32_t i, exists; @@ -1336,6 +1696,21 @@ all_patterns_are_valid = false; break; } + + // Mark as fallible any step where a match terminated. + // Later, this property will be propagated to all of the step's predecessors. + for (unsigned j = 0; j < final_step_indices.size; j++) { + uint32_t final_step_index = final_step_indices.contents[j]; + QueryStep *step = &self->steps.contents[final_step_index]; + if ( + step->depth != PATTERN_DONE_MARKER && + step->depth > parent_depth && + !step->is_dead_end + ) { + step->parent_pattern_guaranteed = false; + step->root_pattern_guaranteed = false; + } + } } // Mark as indefinite any step with captures that are used in predicates. @@ -1441,10 +1816,14 @@ array_delete(&subgraphs.contents[i].nodes); } array_delete(&subgraphs); + for (unsigned i = 0; i < state_pool.size; i++) { + ts_free(state_pool.contents[i]); + } + array_delete(&state_pool); array_delete(&next_nodes); - array_delete(&states); - array_delete(&next_states); - array_delete(&deeper_states); + analysis_state_set__delete(&states); + analysis_state_set__delete(&next_states); + analysis_state_set__delete(&deeper_states); array_delete(&final_step_indices); array_delete(&parent_step_indices); array_delete(&predicate_capture_ids); @@ -1665,11 +2044,15 @@ // Read one S-expression pattern from the stream, and incorporate it into // the query's internal state machine representation. For nested patterns, // this function calls itself recursively. +// +// The caller is repsonsible for passing in a dedicated CaptureQuantifiers. +// These should not be shared between different calls to ts_query__parse_pattern! static TSQueryError ts_query__parse_pattern( TSQuery *self, Stream *stream, uint32_t depth, - bool is_immediate + bool is_immediate, + CaptureQuantifiers *capture_quantifiers ) { if (stream->next == 0) return TSQueryErrorSyntax; if (stream->next == ')' || stream->next == ']') return PARENT_DONE; @@ -1694,13 +2077,15 @@ // Parse each branch, and add a placeholder step in between the branches. Array(uint32_t) branch_step_indices = array_new(); + CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new(); for (;;) { uint32_t start_index = self->steps.size; TSQueryError e = ts_query__parse_pattern( self, stream, depth, - is_immediate + is_immediate, + &branch_capture_quantifiers ); if (e == PARENT_DONE) { @@ -1711,12 +2096,20 @@ e = TSQueryErrorSyntax; } if (e) { + capture_quantifiers_delete(&branch_capture_quantifiers); array_delete(&branch_step_indices); return e; } + if(start_index == starting_step_index) { + capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers); + } else { + capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers); + } + array_push(&branch_step_indices, start_index); array_push(&self->steps, query_step__new(0, depth, false)); + capture_quantifiers_clear(&branch_capture_quantifiers); } (void)array_pop(&self->steps); @@ -1732,6 +2125,7 @@ end_step->is_dead_end = true; } + capture_quantifiers_delete(&branch_capture_quantifiers); array_delete(&branch_step_indices); } @@ -1746,6 +2140,7 @@ // If this parenthesis is followed by a node, then it represents a grouped sequence. if (stream->next == '(' || stream->next == '"' || stream->next == '[') { bool child_is_immediate = false; + CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); for (;;) { if (stream->next == '.') { child_is_immediate = true; @@ -1756,7 +2151,8 @@ self, stream, depth, - child_is_immediate + child_is_immediate, + &child_capture_quantifiers ); if (e == PARENT_DONE) { if (stream->next == ')') { @@ -1765,10 +2161,17 @@ } e = TSQueryErrorSyntax; } - if (e) return e; + if (e) { + capture_quantifiers_delete(&child_capture_quantifiers); + return e; + } + + capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); child_is_immediate = false; + capture_quantifiers_clear(&child_capture_quantifiers); } + capture_quantifiers_delete(&child_capture_quantifiers); } // A dot/pound character indicates the start of a predicate. @@ -1857,12 +2260,16 @@ uint16_t last_child_step_index = 0; uint16_t negated_field_count = 0; TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT]; + CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); for (;;) { // Parse a negated field assertion if (stream->next == '!') { stream_advance(stream); stream_skip_whitespace(stream); - if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + if (!stream_is_ident_start(stream)) { + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorSyntax; + } const char *field_name = stream->input; stream_scan_identifier(stream); uint32_t length = stream->input - field_name; @@ -1875,6 +2282,7 @@ ); if (!field_id) { stream->input = field_name; + capture_quantifiers_delete(&child_capture_quantifiers); return TSQueryErrorField; } @@ -1899,12 +2307,16 @@ self, stream, depth + 1, - child_is_immediate + child_is_immediate, + &child_capture_quantifiers ); if (e == PARENT_DONE) { if (stream->next == ')') { if (child_is_immediate) { - if (last_child_step_index == 0) return TSQueryErrorSyntax; + if (last_child_step_index == 0) { + capture_quantifiers_delete(&child_capture_quantifiers); + return TSQueryErrorSyntax; + } self->steps.contents[last_child_step_index].is_last_child = true; } @@ -1922,11 +2334,18 @@ } e = TSQueryErrorSyntax; } - if (e) return e; + if (e) { + capture_quantifiers_delete(&child_capture_quantifiers); + return e; + } + + capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); last_child_step_index = step_index; child_is_immediate = false; + capture_quantifiers_clear(&child_capture_quantifiers); } + capture_quantifiers_delete(&child_capture_quantifiers); } } @@ -1975,14 +2394,19 @@ stream_skip_whitespace(stream); // Parse the pattern + CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new(); TSQueryError e = ts_query__parse_pattern( self, stream, depth, - is_immediate + is_immediate, + &field_capture_quantifiers ); - if (e == PARENT_DONE) return TSQueryErrorSyntax; - if (e) return e; + if (e) { + capture_quantifiers_delete(&field_capture_quantifiers); + if (e == PARENT_DONE) e = TSQueryErrorSyntax; + return e; + } // Add the field name to the first step of the pattern TSFieldId field_id = ts_language_field_id_for_name( @@ -2010,6 +2434,9 @@ break; } } + + capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers); + capture_quantifiers_delete(&field_capture_quantifiers); } else { @@ -2019,9 +2446,12 @@ stream_skip_whitespace(stream); // Parse suffixes modifiers for this pattern + TSQuantifier quantifier = TSQuantifierOne; for (;;) { // Parse the one-or-more operator. if (stream->next == '+') { + quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier); + stream_advance(stream); stream_skip_whitespace(stream); @@ -2034,6 +2464,8 @@ // Parse the zero-or-more repetition operator. else if (stream->next == '*') { + quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier); + stream_advance(stream); stream_skip_whitespace(stream); @@ -2052,6 +2484,8 @@ // Parse the optional operator. else if (stream->next == '?') { + quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier); + stream_advance(stream); stream_skip_whitespace(stream); @@ -2078,6 +2512,9 @@ length ); + // Add the capture quantifier + capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); + uint32_t step_index = starting_step_index; for (;;) { QueryStep *step = &self->steps.contents[step_index]; @@ -2101,6 +2538,8 @@ } } + capture_quantifiers_mul(capture_quantifiers, quantifier); + return 0; } @@ -2125,6 +2564,7 @@ .steps = array_new(), .pattern_map = array_new(), .captures = symbol_table_new(), + .capture_quantifiers = array_new(), .predicate_values = symbol_table_new(), .predicate_steps = array_new(), .patterns = array_new(), @@ -2149,7 +2589,8 @@ .predicate_steps = (Slice) {.offset = start_predicate_step_index}, .start_byte = stream_offset(&stream), })); - *error_type = ts_query__parse_pattern(self, &stream, 0, false); + CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); + *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers); array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); QueryPattern *pattern = array_back(&self->patterns); @@ -2161,10 +2602,14 @@ if (*error_type) { if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; *error_offset = stream_offset(&stream); + capture_quantifiers_delete(&capture_quantifiers); ts_query_delete(self); return NULL; } + // Maintain a list of capture quantifiers for each pattern + array_push(&self->capture_quantifiers, capture_quantifiers); + // Maintain a map that can look up patterns for a given root symbol. uint16_t wildcard_root_alternative_index = NONE; for (;;) { @@ -2240,6 +2685,11 @@ array_delete(&self->negated_fields); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); + for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { + CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index); + capture_quantifiers_delete(capture_quantifiers); + } + array_delete(&self->capture_quantifiers); ts_free(self); } } @@ -2264,6 +2714,15 @@ return symbol_table_name_for_id(&self->captures, index, length); } +TSQuantifier ts_query_capture_quantifier_for_id( + const TSQuery *self, + uint32_t pattern_index, + uint32_t capture_index +) { + CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index); + return capture_quantifier_for_id(capture_quantifiers, capture_index); +} + const char *ts_query_string_value_for_id( const TSQuery *self, uint32_t index, @@ -3268,6 +3727,20 @@ return; } } + + // Remove unfinished query states as well to prevent future + // captures for a match being removed. + for (unsigned i = 0; i < self->states.size; i++) { + const QueryState *state = &self->states.contents[i]; + if (state->id == match_id) { + capture_list_pool_release( + &self->capture_list_pool, + state->capture_list_id + ); + array_erase(&self->states, i); + return; + } + } } bool ts_query_cursor_next_capture( diff -Nru tree-sitter-0.20.1/lib/src/stack.c tree-sitter-0.20.3/lib/src/stack.c --- tree-sitter-0.20.1/lib/src/stack.c 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/stack.c 2022-01-22 00:36:45.000000000 +0000 @@ -43,11 +43,6 @@ bool is_pending; } StackIterator; -typedef struct { - void *payload; - StackIterateCallback callback; -} StackIterateSession; - typedef Array(StackNode *) StackNodeArray; typedef enum { @@ -91,7 +86,11 @@ assert(self->ref_count != 0); } -static void stack_node_release(StackNode *self, StackNodeArray *pool, SubtreePool *subtree_pool) { +static void stack_node_release( + StackNode *self, + StackNodeArray *pool, + SubtreePool *subtree_pool +) { recur: assert(self->ref_count != 0); self->ref_count--; @@ -121,16 +120,25 @@ } } -static StackNode *stack_node_new(StackNode *previous_node, Subtree subtree, - bool is_pending, TSStateId state, StackNodeArray *pool) { - StackNode *node = pool->size > 0 ? - array_pop(pool) : - ts_malloc(sizeof(StackNode)); - *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state}; +static StackNode *stack_node_new( + StackNode *previous_node, + Subtree subtree, + bool is_pending, + TSStateId state, + StackNodeArray *pool +) { + StackNode *node = pool->size > 0 + ? array_pop(pool) + : ts_malloc(sizeof(StackNode)); + *node = (StackNode) { + .ref_count = 1, + .link_count = 0, + .state = state + }; if (previous_node) { node->link_count = 1; - node->links[0] = (StackLink){ + node->links[0] = (StackLink) { .node = previous_node, .subtree = subtree, .is_pending = is_pending, @@ -156,19 +164,29 @@ } static bool stack__subtree_is_equivalent(Subtree left, Subtree right) { - return - left.ptr == right.ptr || - (left.ptr && right.ptr && - ts_subtree_symbol(left) == ts_subtree_symbol(right) && - ((ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) || - (ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes && - ts_subtree_size(left).bytes == ts_subtree_size(right).bytes && - ts_subtree_child_count(left) == ts_subtree_child_count(right) && - ts_subtree_extra(left) == ts_subtree_extra(right) && - ts_subtree_external_scanner_state_eq(left, right)))); + if (left.ptr == right.ptr) return true; + if (!left.ptr || !right.ptr) return false; + + // Symbols must match + if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false; + + // If both have errors, don't bother keeping both. + if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true; + + return ( + ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes && + ts_subtree_size(left).bytes == ts_subtree_size(right).bytes && + ts_subtree_child_count(left) == ts_subtree_child_count(right) && + ts_subtree_extra(left) == ts_subtree_extra(right) && + ts_subtree_external_scanner_state_eq(left, right) + ); } -static void stack_node_add_link(StackNode *self, StackLink link, SubtreePool *subtree_pool) { +static void stack_node_add_link( + StackNode *self, + StackLink link, + SubtreePool *subtree_pool +) { if (link.node == self) return; for (int i = 0; i < self->link_count; i++) { @@ -193,8 +211,10 @@ } // If the previous nodes are mergeable, merge them recursively. - if (existing_link->node->state == link.node->state && - existing_link->node->position.bytes == link.node->position.bytes) { + if ( + existing_link->node->state == link.node->state && + existing_link->node->position.bytes == link.node->position.bytes + ) { for (int j = 0; j < link.node->link_count; j++) { stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); } @@ -227,7 +247,11 @@ if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence; } -static void stack_head_delete(StackHead *self, StackNodeArray *pool, SubtreePool *subtree_pool) { +static void stack_head_delete( + StackHead *self, + StackNodeArray *pool, + SubtreePool *subtree_pool +) { if (self->node) { if (self->last_external_token.ptr) { ts_subtree_release(subtree_pool, self->last_external_token); @@ -240,8 +264,11 @@ } } -static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version, - StackNode *node) { +static StackVersion ts_stack__add_version( + Stack *self, + StackVersion original_version, + StackNode *node +) { StackHead head = { .node = node, .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error, @@ -255,8 +282,12 @@ return (StackVersion)(self->heads.size - 1); } -static void ts_stack__add_slice(Stack *self, StackVersion original_version, - StackNode *node, SubtreeArray *subtrees) { +static void ts_stack__add_slice( + Stack *self, + StackVersion original_version, + StackNode *node, + SubtreeArray *subtrees +) { for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { StackVersion version = self->slices.contents[i].version; if (self->heads.contents[version].node == node) { @@ -271,9 +302,13 @@ array_push(&self->slices, slice); } -inline StackSliceArray stack__iter(Stack *self, StackVersion version, - StackCallback callback, void *payload, - int goal_subtree_count) { +inline StackSliceArray stack__iter( + Stack *self, + StackVersion version, + StackCallback callback, + void *payload, + int goal_subtree_count +) { array_clear(&self->slices); array_clear(&self->iterators); @@ -317,8 +352,9 @@ } if (should_stop) { - if (!should_pop) + if (!should_pop) { ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees); + } array_erase(&self->iterators, i); i--, size--; continue; @@ -443,30 +479,19 @@ return head->node->node_count - head->node_count_at_last_error; } -void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, - bool pending, TSStateId state) { +void ts_stack_push( + Stack *self, + StackVersion version, + Subtree subtree, + bool pending, + TSStateId state +) { StackHead *head = array_get(&self->heads, version); StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count; head->node = new_node; } -inline StackAction iterate_callback(void *payload, const StackIterator *iterator) { - StackIterateSession *session = payload; - session->callback( - session->payload, - iterator->node->state, - iterator->subtree_count - ); - return StackActionNone; -} - -void ts_stack_iterate(Stack *self, StackVersion version, - StackIterateCallback callback, void *payload) { - StackIterateSession session = {payload, callback}; - stack__iter(self, version, iterate_callback, &session, -1); -} - inline StackAction pop_count_callback(void *payload, const StackIterator *iterator) { unsigned *goal_subtree_count = payload; if (iterator->subtree_count == *goal_subtree_count) { @@ -530,7 +555,7 @@ break; } } - return (SubtreeArray){.size = 0}; + return (SubtreeArray) {.size = 0}; } inline StackAction pop_all_callback(void *payload, const StackIterator *iterator) { @@ -557,7 +582,7 @@ if (entry.depth < depth) break; if (entry.depth == depth && entry.state == state) return StackActionNone; } - array_push(session->summary, ((StackSummaryEntry){ + array_push(session->summary, ((StackSummaryEntry) { .position = iterator->node->position, .depth = depth, .state = state, @@ -712,7 +737,7 @@ stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); } array_clear(&self->heads); - array_push(&self->heads, ((StackHead){ + array_push(&self->heads, ((StackHead) { .node = self->base_node, .last_external_token = NULL_SUBTREE, .status = StackStatusActive, @@ -722,7 +747,6 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) { array_reserve(&self->iterators, 32); - bool was_recording_allocations = ts_toggle_allocation_recording(false); if (!f) f = stderr; fprintf(f, "digraph stack {\n"); @@ -761,7 +785,9 @@ } fprintf(f, "\"]\n"); - array_push(&self->iterators, ((StackIterator){.node = head->node })); + array_push(&self->iterators, ((StackIterator) { + .node = head->node + })); } bool all_iterators_done = false; @@ -851,7 +877,6 @@ fprintf(f, "}\n"); array_delete(&visited_nodes); - ts_toggle_allocation_recording(was_recording_allocations); return true; } diff -Nru tree-sitter-0.20.1/lib/src/stack.h tree-sitter-0.20.3/lib/src/stack.h --- tree-sitter-0.20.1/lib/src/stack.h 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/stack.h 2022-01-22 00:36:45.000000000 +0000 @@ -126,8 +126,6 @@ typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t); -void ts_stack_iterate(Stack *, StackVersion, StackIterateCallback, void *); - #ifdef __cplusplus } #endif diff -Nru tree-sitter-0.20.1/lib/src/subtree.c tree-sitter-0.20.3/lib/src/subtree.c --- tree-sitter-0.20.1/lib/src/subtree.c 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/subtree.c 2022-01-22 00:36:45.000000000 +0000 @@ -409,14 +409,19 @@ self.ptr->padding.bytes + self.ptr->size.bytes + ts_subtree_lookahead_bytes(child); - if (child_lookahead_end_byte > lookahead_end_byte) lookahead_end_byte = child_lookahead_end_byte; + if (child_lookahead_end_byte > lookahead_end_byte) { + lookahead_end_byte = child_lookahead_end_byte; + } if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) { self.ptr->error_cost += ts_subtree_error_cost(child); } uint32_t grandchild_count = ts_subtree_child_count(child); - if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) { + if ( + self.ptr->symbol == ts_builtin_sym_error || + self.ptr->symbol == ts_builtin_sym_error_repeat + ) { if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) { if (ts_subtree_visible(child)) { self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; @@ -454,7 +459,10 @@ self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; - if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) { + if ( + self.ptr->symbol == ts_builtin_sym_error || + self.ptr->symbol == ts_builtin_sym_error_repeat + ) { self.ptr->error_cost += ERROR_COST_PER_RECOVERY + ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + @@ -603,37 +611,6 @@ } } -bool ts_subtree_eq(Subtree self, Subtree other) { - if (self.data.is_inline || other.data.is_inline) { - return memcmp(&self, &other, sizeof(SubtreeInlineData)) == 0; - } - - if (self.ptr) { - if (!other.ptr) return false; - } else { - return !other.ptr; - } - - if (self.ptr->symbol != other.ptr->symbol) return false; - if (self.ptr->visible != other.ptr->visible) return false; - if (self.ptr->named != other.ptr->named) return false; - if (self.ptr->padding.bytes != other.ptr->padding.bytes) return false; - if (self.ptr->size.bytes != other.ptr->size.bytes) return false; - if (self.ptr->symbol == ts_builtin_sym_error) return self.ptr->lookahead_char == other.ptr->lookahead_char; - if (self.ptr->child_count != other.ptr->child_count) return false; - if (self.ptr->child_count > 0) { - if (self.ptr->visible_child_count != other.ptr->visible_child_count) return false; - if (self.ptr->named_child_count != other.ptr->named_child_count) return false; - - for (uint32_t i = 0; i < self.ptr->child_count; i++) { - if (!ts_subtree_eq(ts_subtree_children(self)[i], ts_subtree_children(other)[i])) { - return false; - } - } - } - return true; -} - int ts_subtree_compare(Subtree left, Subtree right) { if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) return -1; if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) return 1; @@ -874,7 +851,7 @@ if (!self.ptr) return snprintf(string, limit, "(NULL)"); char *cursor = string; - char **writer = (limit > 0) ? &cursor : &string; + char **writer = (limit > 1) ? &cursor : &string; bool is_root = field_name == ROOT_FIELD; bool is_visible = include_all || @@ -973,7 +950,7 @@ ) { char scratch_string[1]; size_t size = ts_subtree__write_to_string( - self, scratch_string, 0, + self, scratch_string, 1, language, include_all, 0, false, ROOT_FIELD ) + 1; diff -Nru tree-sitter-0.20.1/lib/src/subtree.h tree-sitter-0.20.3/lib/src/subtree.h --- tree-sitter-0.20.1/lib/src/subtree.h 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/subtree.h 2022-01-22 00:36:45.000000000 +0000 @@ -11,6 +11,7 @@ #include "./length.h" #include "./array.h" #include "./error_costs.h" +#include "./host.h" #include "tree_sitter/api.h" #include "tree_sitter/parser.h" @@ -39,28 +40,74 @@ // // This representation is used for small leaf nodes that are not // errors, and were not created by an external scanner. -typedef struct { - bool is_inline : 1; - bool visible : 1; - bool named : 1; - bool extra : 1; - bool has_changes : 1; - bool is_missing : 1; +// +// The idea behind the layout of this struct is that the `is_inline` +// bit will fall exactly into the same location as the least significant +// bit of the pointer in `Subtree` or `MutableSubtree`, respectively. +// Because of alignment, for any valid pointer this will be 0, giving +// us the opportunity to make use of this bit to signify whether to use +// the pointer or the inline struct. +typedef struct SubtreeInlineData SubtreeInlineData; + +#define SUBTREE_BITS \ + bool visible : 1; \ + bool named : 1; \ + bool extra : 1; \ + bool has_changes : 1; \ + bool is_missing : 1; \ bool is_keyword : 1; - uint8_t symbol; - uint8_t padding_bytes; + +#define SUBTREE_SIZE \ + uint8_t padding_columns; \ + uint8_t padding_rows : 4; \ + uint8_t lookahead_bytes : 4; \ + uint8_t padding_bytes; \ uint8_t size_bytes; - uint8_t padding_columns; - uint8_t padding_rows : 4; - uint8_t lookahead_bytes : 4; + +#if TS_BIG_ENDIAN +#if TS_PTR_SIZE == 32 + +struct SubtreeInlineData { + uint16_t parse_state; + uint8_t symbol; + SUBTREE_BITS + bool unused : 1; + bool is_inline : 1; + SUBTREE_SIZE +}; + +#else + +struct SubtreeInlineData { + SUBTREE_SIZE uint16_t parse_state; -} SubtreeInlineData; + uint8_t symbol; + SUBTREE_BITS + bool unused : 1; + bool is_inline : 1; +}; + +#endif +#else + +struct SubtreeInlineData { + bool is_inline : 1; + SUBTREE_BITS + uint8_t symbol; + uint16_t parse_state; + SUBTREE_SIZE +}; + +#endif + +#undef SUBTREE_BITS +#undef SUBTREE_SIZE // A heap-allocated representation of a subtree. // // This representation is used for parent nodes, external tokens, // errors, and other leaf nodes whose data is too large to fit into -// the inlinen representation. +// the inline representation. typedef struct { volatile uint32_t ref_count; Length padding; @@ -150,7 +197,6 @@ MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); void ts_subtree_retain(Subtree); void ts_subtree_release(SubtreePool *, Subtree); -bool ts_subtree_eq(Subtree, Subtree); int ts_subtree_compare(Subtree, Subtree); void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *); diff -Nru tree-sitter-0.20.1/lib/src/tree_cursor.c tree-sitter-0.20.3/lib/src/tree_cursor.c --- tree-sitter-0.20.1/lib/src/tree_cursor.c 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/lib/src/tree_cursor.c 2022-01-22 00:36:45.000000000 +0000 @@ -34,9 +34,11 @@ }; } -static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self, - TreeCursorEntry *result, - bool *visible) { +static inline bool ts_tree_cursor_child_iterator_next( + CursorChildIterator *self, + TreeCursorEntry *result, + bool *visible +) { if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; *result = (TreeCursorEntry) { diff -Nru tree-sitter-0.20.1/script/generate-bindings tree-sitter-0.20.3/script/generate-bindings --- tree-sitter-0.20.1/script/generate-bindings 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/script/generate-bindings 2022-01-22 00:36:45.000000000 +0000 @@ -3,14 +3,15 @@ output_path=lib/binding_rust/bindings.rs header_path='lib/include/tree_sitter/api.h' -bindgen \ - --no-layout-tests \ - --whitelist-type '^TS.*' \ - --whitelist-function '^ts_.*' \ - --opaque-type FILE \ - --size_t-is-usize \ - --translate-enum-integer-types \ - --distrust-clang-mangling \ +bindgen \ + --no-layout-tests \ + --whitelist-type '^TS.*' \ + --whitelist-function '^ts_.*' \ + --opaque-type FILE \ + --blocklist-type FILE \ + --blocklist-type '^__.*' \ + --blocklist-function ts_tree_print_dot_graph \ + --size_t-is-usize \ $header_path > $output_path echo "" >> $output_path diff -Nru tree-sitter-0.20.1/script/generate-fixtures tree-sitter-0.20.3/script/generate-fixtures --- tree-sitter-0.20.1/script/generate-fixtures 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/script/generate-fixtures 2022-01-22 00:36:45.000000000 +0000 @@ -22,6 +22,6 @@ echo "Regenerating ${grammar_name} parser" ( cd $grammar_dir - "$tree_sitter" generate src/grammar.json --no-bindings + "$tree_sitter" generate src/grammar.json --no-bindings --abi=latest ) done <<< "$grammar_files" diff -Nru tree-sitter-0.20.1/script/generate-fixtures.cmd tree-sitter-0.20.3/script/generate-fixtures.cmd --- tree-sitter-0.20.1/script/generate-fixtures.cmd 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/script/generate-fixtures.cmd 2022-01-22 00:36:45.000000000 +0000 @@ -6,7 +6,7 @@ for /f "tokens=*" %%f in ('dir test\fixtures\grammars\grammar.js /b/s') do ( pushd "%%f\.." echo Regenerating parser !cd! - %tree_sitter% generate src\grammar.json --no-bindings + %tree_sitter% generate src\grammar.json --no-bindings --abi=latest popd ) diff -Nru tree-sitter-0.20.1/tags/Cargo.toml tree-sitter-0.20.3/tags/Cargo.toml --- tree-sitter-0.20.1/tags/Cargo.toml 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/tags/Cargo.toml 2022-01-22 00:36:45.000000000 +0000 @@ -1,7 +1,7 @@ [package] name = "tree-sitter-tags" description = "Library for extracting tag information" -version = "0.20.0" +version = "0.20.1" authors = [ "Max Brunsfeld ", "Patrick Thomson ", diff -Nru tree-sitter-0.20.1/tags/src/lib.rs tree-sitter-0.20.3/tags/src/lib.rs --- tree-sitter-0.20.1/tags/src/lib.rs 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/tags/src/lib.rs 2022-01-22 00:36:45.000000000 +0000 @@ -82,7 +82,6 @@ #[derive(Debug)] struct LocalDef<'a> { name: &'a [u8], - value_range: Range, } #[derive(Debug)] @@ -349,7 +348,6 @@ }) { scope.local_defs.push(LocalDef { name: &self.source[range.clone()], - value_range: range, }); } } diff -Nru tree-sitter-0.20.1/test/fixtures/test_grammars/external_tokens/scanner.c tree-sitter-0.20.3/test/fixtures/test_grammars/external_tokens/scanner.c --- tree-sitter-0.20.1/test/fixtures/test_grammars/external_tokens/scanner.c 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/test/fixtures/test_grammars/external_tokens/scanner.c 2022-01-22 00:36:45.000000000 +0000 @@ -14,7 +14,7 @@ void *tree_sitter_external_tokens_external_scanner_create() { Scanner *scanner = malloc(sizeof(Scanner)); - *scanner = (Scanner){ + *scanner = (Scanner) { .open_delimiter = 0, .close_delimiter = 0, .depth = 0 diff -Nru tree-sitter-0.20.1/test/fixtures/test_grammars/external_unicode_column_alignment/corpus.txt tree-sitter-0.20.3/test/fixtures/test_grammars/external_unicode_column_alignment/corpus.txt --- tree-sitter-0.20.1/test/fixtures/test_grammars/external_unicode_column_alignment/corpus.txt 1970-01-01 00:00:00.000000000 +0000 +++ tree-sitter-0.20.3/test/fixtures/test_grammars/external_unicode_column_alignment/corpus.txt 2022-01-22 00:36:45.000000000 +0000 @@ -0,0 +1,93 @@ +======================== +Single list, no boxes +======================== + +- +- +- + +---------------------- + +(expression + (list + (list_item) + (list_item) + (list_item) + ) +) + +======================== +Two lists, no boxes +======================== + + - + - + - + - + - + +---------------------- + +(expression + (list + (list_item) + (list_item) + (list_item) + ) + (list + (list_item) + (list_item) + ) +) + +======================== +List with boxes +======================== + + - +□- + - + +---------------------- + +(expression + (list + (list_item) + (list_item) + (list_item) + ) +) + +======================== +Multiple lists with boxes +======================== + + - +□ □- + □ - +□□□□□□- +□ □ □ - + - +□□□ - +□□□- +□ □- + +---------------------- + +(expression + (list + (list_item) + (list_item) + (list_item) + ) + (list + (list_item) + (list_item) + (list_item) + (list_item) + ) + (list + (list_item) + (list_item) + ) +) diff -Nru tree-sitter-0.20.1/test/fixtures/test_grammars/external_unicode_column_alignment/grammar.js tree-sitter-0.20.3/test/fixtures/test_grammars/external_unicode_column_alignment/grammar.js --- tree-sitter-0.20.1/test/fixtures/test_grammars/external_unicode_column_alignment/grammar.js 1970-01-01 00:00:00.000000000 +0000 +++ tree-sitter-0.20.3/test/fixtures/test_grammars/external_unicode_column_alignment/grammar.js 2022-01-22 00:36:45.000000000 +0000 @@ -0,0 +1,17 @@ +module.exports = grammar({ + name: "external_unicode_column_alignment", + + externals: $ => [ + $._start_list, + $.list_item, + $._end_list + ], + + extras: $ => [/\s/, '□'], + + rules: { + expression: $ => repeat($.list), + + list: $ => seq($._start_list, repeat1($.list_item), $._end_list) + } +}) diff -Nru tree-sitter-0.20.1/test/fixtures/test_grammars/external_unicode_column_alignment/README.md tree-sitter-0.20.3/test/fixtures/test_grammars/external_unicode_column_alignment/README.md --- tree-sitter-0.20.1/test/fixtures/test_grammars/external_unicode_column_alignment/README.md 1970-01-01 00:00:00.000000000 +0000 +++ tree-sitter-0.20.3/test/fixtures/test_grammars/external_unicode_column_alignment/README.md 2022-01-22 00:36:45.000000000 +0000 @@ -0,0 +1 @@ +This tests that `get_column` correctly counts codepoints since start of line. \ No newline at end of file diff -Nru tree-sitter-0.20.1/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c tree-sitter-0.20.3/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c --- tree-sitter-0.20.1/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c 1970-01-01 00:00:00.000000000 +0000 +++ tree-sitter-0.20.3/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c 2022-01-22 00:36:45.000000000 +0000 @@ -0,0 +1,85 @@ +#include +#include +#include + +enum { + LIST_START, + LIST_ITEM, + LIST_END +}; + +typedef struct { + int32_t column; +} Scanner; + +void *tree_sitter_external_unicode_column_alignment_external_scanner_create() { + Scanner *scanner = malloc(sizeof(Scanner)); + *scanner = (Scanner){ + .column = -1 + }; + return scanner; +} + +void tree_sitter_external_unicode_column_alignment_external_scanner_destroy(void *payload) { + free(payload); +} + +unsigned tree_sitter_external_unicode_column_alignment_external_scanner_serialize( + void *payload, + char *buffer +) { + Scanner *scanner = payload; + unsigned copied = sizeof(int32_t); + memcpy(buffer, &(scanner->column), copied); + return copied; +} + +void tree_sitter_external_unicode_column_alignment_external_scanner_deserialize( + void *payload, + const char *buffer, + unsigned length +) { + Scanner *scanner = payload; + scanner->column = -1; + if (length > 0) { + memcpy(&(scanner->column), buffer, sizeof(int32_t)); + } +} + +bool tree_sitter_external_unicode_column_alignment_external_scanner_scan( + void *payload, + TSLexer *lexer, + const bool *whitelist +) { + Scanner *scanner = payload; + // U+25A1 is unicode codepoint □ + while (iswspace(lexer->lookahead) || 0x25A1 == lexer->lookahead) { + lexer->advance(lexer, true); + } + if ('-' == lexer->lookahead) { + const int32_t column = lexer->get_column(lexer); + if (-1 == scanner->column) { + lexer->result_symbol = LIST_START; + scanner->column = column; + return true; + } else { + if (column == scanner->column) { + lexer->result_symbol = LIST_ITEM; + lexer->advance(lexer, false); + return true; + } else { + lexer->result_symbol = LIST_END; + scanner->column = -1; + return true; + } + } + } + + if (lexer->eof(lexer) && -1 != scanner->column) { + lexer->result_symbol = LIST_END; + scanner->column = -1; + return true; + } + + return false; +} diff -Nru tree-sitter-0.20.1/test/fixtures/test_grammars/inlined_aliased_rules/grammar.js tree-sitter-0.20.3/test/fixtures/test_grammars/inlined_aliased_rules/grammar.js --- tree-sitter-0.20.1/test/fixtures/test_grammars/inlined_aliased_rules/grammar.js 1970-01-01 00:00:00.000000000 +0000 +++ tree-sitter-0.20.3/test/fixtures/test_grammars/inlined_aliased_rules/grammar.js 2022-01-22 00:36:45.000000000 +0000 @@ -0,0 +1,28 @@ +module.exports = grammar({ + name: "inlined_aliased_rules", + + extras: $ => [/\s/], + + inline: $ => [$.expression], + + rules: { + statement: $ => seq($.expression, ";"), + + expression: $ => + choice( + $.call_expression, + $.member_expression, + alias($.identifier, $.variable_name), + ), + + call_expression: $ => prec.left(seq($.expression, "(", $.expression, ")")), + + member_expression: $ => + prec.left( + 1, + seq($.expression, ".", alias($.identifier, $.property_name)), + ), + + identifier: $ => /[a-z]+/, + }, +}); diff -Nru tree-sitter-0.20.1/test/fixtures/test_grammars/inlined_aliased_rules/grammar.json tree-sitter-0.20.3/test/fixtures/test_grammars/inlined_aliased_rules/grammar.json --- tree-sitter-0.20.1/test/fixtures/test_grammars/inlined_aliased_rules/grammar.json 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/test/fixtures/test_grammars/inlined_aliased_rules/grammar.json 1970-01-01 00:00:00.000000000 +0000 @@ -1,75 +0,0 @@ -{ - "name": "inlined_aliased_rules", - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "inline": [ - "expression" - ], - - "rules": { - "statement": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": ";"} - ] - }, - - "expression": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "call_expression"}, - {"type": "SYMBOL", "name": "member_expression"}, - { - "type": "ALIAS", - "value": "variable_name", - "named": true, - "content": { - "type": "SYMBOL", - "name": "identifier" - } - } - ] - }, - - "call_expression": { - "type": "PREC_LEFT", - "value": 0, - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "("}, - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": ")"} - ] - } - }, - - "member_expression": { - "type": "PREC_LEFT", - "value": 1, - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "."}, - { - "type": "ALIAS", - "named": true, - "value": "property_name", - "content": { - "type": "SYMBOL", - "name": "identifier" - } - } - ] - } - }, - - "identifier": {"type": "PATTERN", "value": "[a-z]+"} - } -} diff -Nru tree-sitter-0.20.1/test/fixtures/test_grammars/inverted_external_token/grammar.js tree-sitter-0.20.3/test/fixtures/test_grammars/inverted_external_token/grammar.js --- tree-sitter-0.20.1/test/fixtures/test_grammars/inverted_external_token/grammar.js 1970-01-01 00:00:00.000000000 +0000 +++ tree-sitter-0.20.3/test/fixtures/test_grammars/inverted_external_token/grammar.js 2022-01-22 00:36:45.000000000 +0000 @@ -0,0 +1,15 @@ +module.exports = grammar({ + name: "inverted_external_token", + + externals: $ => [$.line_break], + + extras: $ => [/\s/], + + rules: { + program: $ => repeat($.statement), + statement: $ => seq($._expression, $.line_break), + _expression: $ => choice($.identifier, $.member_expression), + member_expression: $ => prec.left(seq($._expression, ".", $.identifier)), + identifier: $ => /[a-z]+/, + }, +}); diff -Nru tree-sitter-0.20.1/test/fixtures/test_grammars/inverted_external_token/grammar.json tree-sitter-0.20.3/test/fixtures/test_grammars/inverted_external_token/grammar.json --- tree-sitter-0.20.1/test/fixtures/test_grammars/inverted_external_token/grammar.json 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/test/fixtures/test_grammars/inverted_external_token/grammar.json 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -{ - "name": "inverted_external_token", - - "externals": [ - {"type": "SYMBOL", "name": "line_break"} - ], - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "rules": { - "program": { - "type": "REPEAT", - "content": { - "type": "SYMBOL", - "name": "statement" - } - }, - - "statement": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "_expression"}, - {"type": "SYMBOL", "name": "line_break"} - ] - }, - - "_expression": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "member_expression"} - ] - }, - - "member_expression": { - "type": "PREC_LEFT", - "value": 0, - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "_expression"}, - {"type": "STRING", "value": "."}, - {"type": "SYMBOL", "name": "identifier"} - ] - } - }, - - "identifier": { - "type": "PATTERN", - "value": "[a-z]+" - } - } -} diff -Nru tree-sitter-0.20.1/test/fixtures/test_grammars/invisible_start_rule/grammar.js tree-sitter-0.20.3/test/fixtures/test_grammars/invisible_start_rule/grammar.js --- tree-sitter-0.20.1/test/fixtures/test_grammars/invisible_start_rule/grammar.js 1970-01-01 00:00:00.000000000 +0000 +++ tree-sitter-0.20.3/test/fixtures/test_grammars/invisible_start_rule/grammar.js 2022-01-22 00:36:45.000000000 +0000 @@ -0,0 +1,8 @@ +module.exports = grammar({ + name: "invisible_start_rule", + rules: { + _value: $ => choice($.a, $.b), + a: $ => "a", + b: $ => "b", + }, +}); diff -Nru tree-sitter-0.20.1/test/fixtures/test_grammars/invisible_start_rule/grammar.json tree-sitter-0.20.3/test/fixtures/test_grammars/invisible_start_rule/grammar.json --- tree-sitter-0.20.1/test/fixtures/test_grammars/invisible_start_rule/grammar.json 2021-11-21 20:33:27.000000000 +0000 +++ tree-sitter-0.20.3/test/fixtures/test_grammars/invisible_start_rule/grammar.json 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -{ - "name": "invisible_start_rule", - - "rules": { - "_value": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "a"}, - {"type": "SYMBOL", "name": "b"} - ] - }, - - "a": { - "type": "STRING", - "value": "a" - }, - - "b": { - "type": "STRING", - "value": "b" - } - } -} \ No newline at end of file