diff -Nru rust-tree-sitter-0.17.1/binding_rust/allocations.rs rust-tree-sitter-0.20.1/binding_rust/allocations.rs --- rust-tree-sitter-0.17.1/binding_rust/allocations.rs 1970-01-01 00:00:00.000000000 +0000 +++ rust-tree-sitter-0.20.1/binding_rust/allocations.rs 1973-11-29 21:33:09.000000000 +0000 @@ -0,0 +1,120 @@ +use spin::Mutex; +use std::{ + collections::HashMap, + os::raw::{c_ulong, c_void}, +}; + +#[derive(Debug, PartialEq, Eq, Hash)] +struct Allocation(*const c_void); +unsafe impl Send for Allocation {} +unsafe impl Sync for Allocation {} + +#[derive(Default)] +struct AllocationRecorder { + enabled: bool, + allocation_count: u64, + outstanding_allocations: HashMap, +} + +thread_local! { + static RECORDER: Mutex = Default::default(); +} + +extern "C" { + fn malloc(size: c_ulong) -> *mut c_void; + fn calloc(count: c_ulong, size: c_ulong) -> *mut c_void; + fn realloc(ptr: *mut c_void, size: c_ulong) -> *mut c_void; + fn free(ptr: *mut c_void); +} + +pub fn record(f: impl FnOnce() -> T) -> T { + RECORDER.with(|recorder| { + let mut recorder = recorder.lock(); + recorder.enabled = true; + recorder.allocation_count = 0; + recorder.outstanding_allocations.clear(); + }); + + let value = f(); + + let outstanding_allocation_indices = RECORDER.with(|recorder| { + let mut recorder = recorder.lock(); + recorder.enabled = false; + recorder.allocation_count = 0; + recorder + .outstanding_allocations + .drain() + .map(|e| e.1) + .collect::>() + }); + if !outstanding_allocation_indices.is_empty() { + panic!( + "Leaked allocation indices: {:?}", + outstanding_allocation_indices + ); + } + value +} + +fn record_alloc(ptr: *mut c_void) { + RECORDER.with(|recorder| { + let mut recorder = recorder.lock(); + if recorder.enabled { + let count = recorder.allocation_count; + recorder.allocation_count += 1; + recorder + .outstanding_allocations + .insert(Allocation(ptr), count); + } + }); +} + +fn record_dealloc(ptr: *mut c_void) { + RECORDER.with(|recorder| { + let mut recorder = recorder.lock(); + if recorder.enabled { + recorder.outstanding_allocations.remove(&Allocation(ptr)); + } + }); +} + +#[no_mangle] +pub extern "C" fn ts_record_malloc(size: c_ulong) -> *const c_void { + let result = unsafe { malloc(size) }; + record_alloc(result); + result +} + +#[no_mangle] +pub extern "C" fn ts_record_calloc(count: c_ulong, size: c_ulong) -> *const c_void { + let result = unsafe { calloc(count, size) }; + record_alloc(result); + result +} + +#[no_mangle] +pub extern "C" fn ts_record_realloc(ptr: *mut c_void, size: c_ulong) -> *const c_void { + record_dealloc(ptr); + let result = unsafe { realloc(ptr, size) }; + record_alloc(result); + result +} + +// This needs to be unsafe because it's reexported as crate::util::free_ptr, which is mapped to +// libc's `free` function when the allocation-tracking feature is disabled. Since `free` is +// unsafe, this function needs to be too. +#[no_mangle] +pub unsafe extern "C" fn ts_record_free(ptr: *mut c_void) { + record_dealloc(ptr); + free(ptr); +} + +#[no_mangle] +pub extern "C" fn ts_toggle_allocation_recording(enabled: bool) -> bool { + RECORDER.with(|recorder| { + let mut recorder = recorder.lock(); + let was_enabled = recorder.enabled; + recorder.enabled = enabled; + was_enabled + }) +} diff -Nru rust-tree-sitter-0.17.1/binding_rust/bindings.rs rust-tree-sitter-0.20.1/binding_rust/bindings.rs --- rust-tree-sitter-0.17.1/binding_rust/bindings.rs 2020-10-05 19:52:07.000000000 +0000 +++ rust-tree-sitter-0.20.1/binding_rust/bindings.rs 1973-11-29 21:33:09.000000000 +0000 @@ -1,4 +1,4 @@ -/* automatically generated by rust-bindgen */ +/* automatically generated by rust-bindgen 0.59.1 */ pub type __darwin_size_t = ::std::os::raw::c_ulong; pub type FILE = [u64; 19usize]; @@ -133,6 +133,7 @@ pub const TSQueryError_TSQueryErrorField: TSQueryError = 3; pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4; pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5; +pub const TSQueryError_TSQueryErrorLanguage: TSQueryError = 6; pub type TSQueryError = u32; extern "C" { #[doc = " Create a new parser."] @@ -148,13 +149,13 @@ #[doc = " Returns a boolean indicating whether or not the language was successfully"] #[doc = " assigned. True means assignment succeeded. False means there was a version"] #[doc = " mismatch: the language was generated with an incompatible version of the"] - #[doc = " Tree-sitter CLI. Check the language\'s version using `ts_language_version`"] - #[doc = " and compare it to this library\'s `TREE_SITTER_LANGUAGE_VERSION` and"] + #[doc = " Tree-sitter CLI. Check the language's version using `ts_language_version`"] + #[doc = " and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and"] #[doc = " `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants."] pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool; } extern "C" { - #[doc = " Get the parser\'s current language."] + #[doc = " Get the parser's current language."] pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage; } extern "C" { @@ -167,14 +168,12 @@ #[doc = ""] #[doc = " The second and third parameters specify the location and length of an array"] #[doc = " of ranges. The parser does *not* take ownership of these ranges; it copies"] - #[doc = " the data, so it doesn\'t matter how these ranges are allocated."] + #[doc = " the data, so it doesn't matter how these ranges are allocated."] #[doc = ""] #[doc = " If `length` is zero, then the entire document will be parsed. Otherwise,"] #[doc = " the given ranges must be ordered from earliest to latest in the document,"] #[doc = " and they must not overlap. That is, the following must hold for all"] - #[doc = " `i` < `length - 1`:"] - #[doc = ""] - #[doc = " ranges[i].end_byte <= ranges[i + 1].start_byte"] + #[doc = " `i` < `length - 1`: ranges[i].end_byte <= ranges[i + 1].start_byte"] #[doc = ""] #[doc = " If this requirement is not satisfied, the operation will fail, the ranges"] #[doc = " will not be assigned, and this function will return `false`. On success,"] @@ -208,8 +207,8 @@ #[doc = " following three fields:"] #[doc = " 1. `read`: A function to retrieve a chunk of text at a given byte offset"] #[doc = " and (row, column) position. The function should return a pointer to the"] - #[doc = " text and write its length to the the `bytes_read` pointer. The parser"] - #[doc = " does not take ownership of this buffer; it just borrows it until it has"] + #[doc = " text and write its length to the `bytes_read` pointer. The parser does"] + #[doc = " not take ownership of this buffer; it just borrows it until it has"] #[doc = " finished reading it. The function should write a zero value to the"] #[doc = " `bytes_read` pointer to indicate the end of the document."] #[doc = " 2. `payload`: An arbitrary pointer that will be passed to each invocation"] @@ -266,7 +265,7 @@ #[doc = ""] #[doc = " If the parser previously failed because of a timeout or a cancellation, then"] #[doc = " by default, it will resume where it left off on the next call to"] - #[doc = " `ts_parser_parse` or other parsing functions. If you don\'t want to resume,"] + #[doc = " `ts_parser_parse` or other parsing functions. If you don't want to resume,"] #[doc = " and instead intend to use this parser to parse some other document, you must"] #[doc = " call `ts_parser_reset` first."] pub fn ts_parser_reset(self_: *mut TSParser); @@ -284,7 +283,7 @@ pub fn ts_parser_timeout_micros(self_: *const TSParser) -> u64; } extern "C" { - #[doc = " Set the parser\'s current cancellation flag pointer."] + #[doc = " Set the parser's current cancellation flag pointer."] #[doc = ""] #[doc = " If a non-null pointer is assigned, then the parser will periodically read"] #[doc = " from this pointer during parsing. If it reads a non-zero value, it will"] @@ -292,7 +291,7 @@ pub fn ts_parser_set_cancellation_flag(self_: *mut TSParser, flag: *const usize); } extern "C" { - #[doc = " Get the parser\'s current cancellation flag pointer."] + #[doc = " Get the parser's current cancellation flag pointer."] pub fn ts_parser_cancellation_flag(self_: *const TSParser) -> *const usize; } extern "C" { @@ -304,7 +303,7 @@ pub fn ts_parser_set_logger(self_: *mut TSParser, logger: TSLogger); } extern "C" { - #[doc = " Get the parser\'s current logger."] + #[doc = " Get the parser's current logger."] pub fn ts_parser_logger(self_: *const TSParser) -> TSLogger; } extern "C" { @@ -346,7 +345,7 @@ #[doc = " document, returning an array of ranges whose syntactic structure has changed."] #[doc = ""] #[doc = " For this to work correctly, the old syntax tree must have been edited such"] - #[doc = " that its ranges match up to the new tree. Generally, you\'ll want to call"] + #[doc = " that its ranges match up to the new tree. Generally, you'll want to call"] #[doc = " this function right after calling one of the `ts_parser_parse` functions."] #[doc = " You need to pass the old tree that was passed to parse, as well as the new"] #[doc = " tree that was returned from that function."] @@ -365,27 +364,27 @@ pub fn ts_tree_print_dot_graph(arg1: *const TSTree, arg2: *mut FILE); } extern "C" { - #[doc = " Get the node\'s type as a null-terminated string."] + #[doc = " Get the node's type as a null-terminated string."] pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the node\'s type as a numerical id."] + #[doc = " Get the node's type as a numerical id."] pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol; } extern "C" { - #[doc = " Get the node\'s start byte."] + #[doc = " Get the node's start byte."] pub fn ts_node_start_byte(arg1: TSNode) -> u32; } extern "C" { - #[doc = " Get the node\'s start position in terms of rows and columns."] + #[doc = " Get the node's start position in terms of rows and columns."] pub fn ts_node_start_point(arg1: TSNode) -> TSPoint; } extern "C" { - #[doc = " Get the node\'s end byte."] + #[doc = " Get the node's end byte."] pub fn ts_node_end_byte(arg1: TSNode) -> u32; } extern "C" { - #[doc = " Get the node\'s end position in terms of rows and columns."] + #[doc = " Get the node's end position in terms of rows and columns."] pub fn ts_node_end_point(arg1: TSNode) -> TSPoint; } extern "C" { @@ -426,32 +425,37 @@ pub fn ts_node_has_error(arg1: TSNode) -> bool; } extern "C" { - #[doc = " Get the node\'s immediate parent."] + #[doc = " Get the node's immediate parent."] pub fn ts_node_parent(arg1: TSNode) -> TSNode; } extern "C" { - #[doc = " Get the node\'s child at the given index, where zero represents the first"] + #[doc = " Get the node's child at the given index, where zero represents the first"] #[doc = " child."] pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - #[doc = " Get the node\'s number of children."] + #[doc = " Get the field name for node's child at the given index, where zero represents"] + #[doc = " the first child. Returns NULL, if no field is found."] + pub fn ts_node_field_name_for_child(arg1: TSNode, arg2: u32) -> *const ::std::os::raw::c_char; +} +extern "C" { + #[doc = " Get the node's number of children."] pub fn ts_node_child_count(arg1: TSNode) -> u32; } extern "C" { - #[doc = " Get the node\'s *named* child at the given index."] + #[doc = " Get the node's *named* child at the given index."] #[doc = ""] #[doc = " See also `ts_node_is_named`."] pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - #[doc = " Get the node\'s number of *named* children."] + #[doc = " Get the node's number of *named* children."] #[doc = ""] #[doc = " See also `ts_node_is_named`."] pub fn ts_node_named_child_count(arg1: TSNode) -> u32; } extern "C" { - #[doc = " Get the node\'s child with the given field name."] + #[doc = " Get the node's child with the given field name."] pub fn ts_node_child_by_field_name( self_: TSNode, field_name: *const ::std::os::raw::c_char, @@ -459,32 +463,32 @@ ) -> TSNode; } extern "C" { - #[doc = " Get the node\'s child with the given numerical field id."] + #[doc = " Get the node's child with the given numerical field id."] #[doc = ""] #[doc = " You can convert a field name to an id using the"] #[doc = " `ts_language_field_id_for_name` function."] pub fn ts_node_child_by_field_id(arg1: TSNode, arg2: TSFieldId) -> TSNode; } extern "C" { - #[doc = " Get the node\'s next / previous sibling."] + #[doc = " Get the node's next / previous sibling."] pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode; } extern "C" { pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode; } extern "C" { - #[doc = " Get the node\'s next / previous *named* sibling."] + #[doc = " Get the node's next / previous *named* sibling."] pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode; } extern "C" { pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode; } extern "C" { - #[doc = " Get the node\'s first child that extends beyond the given byte offset."] + #[doc = " Get the node's first child that extends beyond the given byte offset."] pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - #[doc = " Get the node\'s first named child that extends beyond the given byte offset."] + #[doc = " Get the node's first named child that extends beyond the given byte offset."] pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { @@ -539,22 +543,22 @@ pub fn ts_tree_cursor_reset(arg1: *mut TSTreeCursor, arg2: TSNode); } extern "C" { - #[doc = " Get the tree cursor\'s current node."] + #[doc = " Get the tree cursor's current node."] pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; } extern "C" { - #[doc = " Get the field name of the tree cursor\'s current node."] + #[doc = " Get the field name of the tree cursor's current node."] #[doc = ""] - #[doc = " This returns `NULL` if the current node doesn\'t have a field."] + #[doc = " This returns `NULL` if the current node doesn't have a field."] #[doc = " See also `ts_node_child_by_field_name`."] pub fn ts_tree_cursor_current_field_name( arg1: *const TSTreeCursor, ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the field name of the tree cursor\'s current node."] + #[doc = " Get the field id of the tree cursor's current node."] #[doc = ""] - #[doc = " This returns zero if the current node doesn\'t have a field."] + #[doc = " This returns zero if the current node doesn't have a field."] #[doc = " See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`."] pub fn ts_tree_cursor_current_field_id(arg1: *const TSTreeCursor) -> TSFieldId; } @@ -581,13 +585,17 @@ } extern "C" { #[doc = " Move the cursor to the first child of its current node that extends beyond"] - #[doc = " the given byte offset."] + #[doc = " the given byte offset or point."] #[doc = ""] #[doc = " This returns the index of the child node if one was found, and returns -1"] #[doc = " if no such child was found."] pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64; } extern "C" { + pub fn ts_tree_cursor_goto_first_child_for_point(arg1: *mut TSTreeCursor, arg2: TSPoint) + -> i64; +} +extern "C" { pub fn ts_tree_cursor_copy(arg1: *const TSTreeCursor) -> TSTreeCursor; } extern "C" { @@ -623,7 +631,7 @@ pub fn ts_query_string_count(arg1: *const TSQuery) -> u32; } extern "C" { - #[doc = " Get the byte offset where the given pattern starts in the query\'s source."] + #[doc = " Get the byte offset where the given pattern starts in the query's source."] #[doc = ""] #[doc = " This can be useful when combining queries by concatenating their source"] #[doc = " code strings."] @@ -651,12 +659,12 @@ ) -> *const TSQueryPredicateStep; } extern "C" { - pub fn ts_query_step_is_definite(self_: *const TSQuery, byte_offset: u32) -> bool; + pub fn ts_query_is_pattern_guaranteed_at_step(self_: *const TSQuery, byte_offset: u32) -> bool; } extern "C" { - #[doc = " Get the name and length of one of the query\'s captures, or one of the"] - #[doc = " query\'s string literals. Each capture and string is associated with a"] - #[doc = " numeric id based on the order that it appeared in the query\'s source."] + #[doc = " Get the name and length of one of the query's captures, or one of the"] + #[doc = " query's string literals. Each capture and string is associated with a"] + #[doc = " numeric id based on the order that it appeared in the query's source."] pub fn ts_query_capture_name_for_id( arg1: *const TSQuery, id: u32, @@ -697,16 +705,16 @@ #[doc = " to start running a given query on a given syntax node. Then, there are"] #[doc = " two options for consuming the results of the query:"] #[doc = " 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the"] - #[doc = " the *matches* in the order that they were found. Each match contains the"] + #[doc = " *matches* in the order that they were found. Each match contains the"] #[doc = " index of the pattern that matched, and an array of captures. Because"] #[doc = " multiple patterns can match the same set of nodes, one match may contain"] #[doc = " captures that appear *before* some of the captures from a previous match."] #[doc = " 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the"] #[doc = " individual *captures* in the order that they appear. This is useful if"] - #[doc = " don\'t care about which pattern matched, and just want a single ordered"] + #[doc = " don't care about which pattern matched, and just want a single ordered"] #[doc = " sequence of captures."] #[doc = ""] - #[doc = " If you don\'t care about consuming all of the results, you can stop calling"] + #[doc = " If you don't care about consuming all of the results, you can stop calling"] #[doc = " `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point."] #[doc = " You can then start executing another query on another node by calling"] #[doc = " `ts_query_cursor_exec` again."] @@ -721,6 +729,24 @@ pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode); } extern "C" { + #[doc = " Manage the maximum number of in-progress matches allowed by this query"] + #[doc = " cursor."] + #[doc = ""] + #[doc = " Query cursors have an optional maximum capacity for storing lists of"] + #[doc = " in-progress captures. If this capacity is exceeded, then the"] + #[doc = " earliest-starting match will silently be dropped to make room for further"] + #[doc = " matches. This maximum capacity is optional — by default, query cursors allow"] + #[doc = " any number of pending matches, dynamically allocating new space for them as"] + #[doc = " needed as the query is executed."] + pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool; +} +extern "C" { + pub fn ts_query_cursor_match_limit(arg1: *const TSQueryCursor) -> u32; +} +extern "C" { + pub fn ts_query_cursor_set_match_limit(arg1: *mut TSQueryCursor, arg2: u32); +} +extern "C" { #[doc = " Set the range of bytes or (row, column) positions in which the query"] #[doc = " will be executed."] pub fn ts_query_cursor_set_byte_range(arg1: *mut TSQueryCursor, arg2: u32, arg3: u32); @@ -742,7 +768,7 @@ #[doc = " Advance to the next capture of the currently running query."] #[doc = ""] #[doc = " If there is a capture, write its match to `*match` and its index within"] - #[doc = " the matche\'s capture list to `*capture_index`. Otherwise, return `false`."] + #[doc = " the matche's capture list to `*capture_index`. Otherwise, return `false`."] pub fn ts_query_cursor_next_capture( arg1: *mut TSQueryCursor, match_: *mut TSQueryMatch, @@ -804,5 +830,5 @@ pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 12; -pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 9; +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 13; +pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 13; diff -Nru rust-tree-sitter-0.17.1/binding_rust/build.rs rust-tree-sitter-0.20.1/binding_rust/build.rs --- rust-tree-sitter-0.17.1/binding_rust/build.rs 2020-10-28 19:29:45.000000000 +0000 +++ rust-tree-sitter-0.20.1/binding_rust/build.rs 1973-11-29 21:33:09.000000000 +0000 @@ -21,9 +21,9 @@ let mut config = cc::Build::new(); - println!("cargo:rerun-if-env-changed=PROFILE"); - if env::var("PROFILE").map_or(false, |s| s == "debug") { - config.define("TREE_SITTER_TEST", ""); + println!("cargo:rerun-if-env-changed=CARGO_FEATURE_ALLOCATION_TRACKING"); + if env::var("CARGO_FEATURE_ALLOCATION_TRACKING").is_ok() { + config.define("TREE_SITTER_ALLOCATION_TRACKING", ""); } let src_path = Path::new("src"); diff -Nru rust-tree-sitter-0.17.1/binding_rust/lib.rs rust-tree-sitter-0.20.1/binding_rust/lib.rs --- rust-tree-sitter-0.17.1/binding_rust/lib.rs 2020-10-28 19:29:45.000000000 +0000 +++ rust-tree-sitter-0.20.1/binding_rust/lib.rs 1973-11-29 21:33:09.000000000 +0000 @@ -1,16 +1,25 @@ mod ffi; mod util; +#[cfg(feature = "allocation-tracking")] +pub mod allocations; + #[cfg(unix)] use std::os::unix::io::AsRawFd; -use std::ffi::CStr; -use std::marker::PhantomData; -use std::mem::MaybeUninit; -use std::os::raw::{c_char, c_void}; -use std::ptr::NonNull; -use std::sync::atomic::AtomicUsize; -use std::{char, fmt, hash, iter, ptr, slice, str, u16}; +use std::{ + char, error, + ffi::CStr, + fmt, hash, iter, + marker::PhantomData, + mem::MaybeUninit, + ops, + os::raw::{c_char, c_void}, + ptr::{self, NonNull}, + slice, str, + sync::atomic::AtomicUsize, + u16, +}; /// The latest ABI version that is supported by the current version of the /// library. @@ -99,7 +108,9 @@ } /// A stateful object for executing a `Query` on a syntax `Tree`. -pub struct QueryCursor(NonNull); +pub struct QueryCursor { + ptr: NonNull, +} /// A key-value pair associated with a particular pattern in a `Query`. #[derive(Debug, PartialEq, Eq)] @@ -123,18 +134,36 @@ } /// A match of a `Query` to a particular set of `Node`s. -pub struct QueryMatch<'a> { +pub struct QueryMatch<'cursor, 'tree> { pub pattern_index: usize, - pub captures: &'a [QueryCapture<'a>], + pub captures: &'cursor [QueryCapture<'tree>], id: u32, cursor: *mut ffi::TSQueryCursor, } -/// A sequence of `QueryCapture`s within a `QueryMatch`. -pub struct QueryCaptures<'a, T: AsRef<[u8]>> { +/// A sequence of `QueryMatch`es associated with a given `QueryCursor`. +pub struct QueryMatches<'a, 'tree: 'a, T: TextProvider<'a>> { + ptr: *mut ffi::TSQueryCursor, + query: &'a Query, + text_provider: T, + buffer1: Vec, + buffer2: Vec, + _tree: PhantomData<&'tree ()>, +} + +/// A sequence of `QueryCapture`s associated with a given `QueryCursor`. +pub struct QueryCaptures<'a, 'tree: 'a, T: TextProvider<'a>> { ptr: *mut ffi::TSQueryCursor, query: &'a Query, - text_callback: Box) -> T + 'a>, + text_provider: T, + buffer1: Vec, + buffer2: Vec, + _tree: PhantomData<&'tree ()>, +} + +pub trait TextProvider<'a> { + type I: Iterator + 'a; + fn text(&mut self, node: Node) -> Self::I; } /// A particular `Node` that has been captured with a particular name within a `Query`. @@ -173,6 +202,7 @@ Capture, Predicate, Structure, + Language, } #[derive(Debug)] @@ -268,16 +298,6 @@ } } -impl fmt::Display for LanguageError { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - write!( - f, - "Incompatible language version {}. Expected minimum {}, maximum {}", - self.version, MIN_COMPATIBLE_LANGUAGE_VERSION, LANGUAGE_VERSION, - ) - } -} - impl Parser { /// Create a new parser. pub fn new() -> Parser { @@ -348,7 +368,7 @@ }; callback(log_type, message); } - }; + } let raw_container = Box::into_raw(container); @@ -459,7 +479,7 @@ let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32; return slice.as_ptr() as *const c_char; - }; + } let c_input = ffi::TSInput { payload: &mut payload as *mut (&mut F, Option) as *mut c_void, @@ -515,7 +535,7 @@ let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32 * 2; slice.as_ptr() as *const c_char - }; + } let c_input = ffi::TSInput { payload: &mut payload as *mut (&mut F, Option) as *mut c_void, @@ -570,7 +590,8 @@ /// ```text /// ranges[i].end_byte <= ranges[i + 1].start_byte /// ``` - /// If this requirement is not satisfied, method will panic. + /// If this requirement is not satisfied, method will return IncludedRangesError + /// error with an offset in the passed ranges slice pointing to a first incorrect range. pub fn set_included_ranges<'a>( &mut self, ranges: &'a [Range], @@ -609,7 +630,7 @@ /// If a pointer is assigned, then the parser will periodically read from /// this pointer during parsing. If it reads a non-zero value, it will halt early, /// returning `None`. See [parse](Parser::parse) for more information. - pub unsafe fn set_cancellation_flag(&self, flag: Option<&AtomicUsize>) { + pub unsafe fn set_cancellation_flag(&mut self, flag: Option<&AtomicUsize>) { if let Some(flag) = flag { ffi::ts_parser_set_cancellation_flag( self.0.as_ptr(), @@ -865,6 +886,18 @@ Self::new(unsafe { ffi::ts_node_child_by_field_id(self.0, field_id) }) } + /// Get the field name of this node's child at the given index. + pub fn field_name_for_child(&self, child_index: u32) -> Option<&'static str> { + unsafe { + let ptr = ffi::ts_node_field_name_for_child(self.0, child_index); + if ptr.is_null() { + None + } else { + Some(CStr::from_ptr(ptr).to_str().unwrap()) + } + } + } + /// Iterate over this node's children. /// /// A [TreeCursor] is used to retrieve the children efficiently. Obtain @@ -1141,12 +1174,33 @@ } } + /// Move this cursor to the first child of its current node that extends beyond + /// the given byte offset. + /// + /// This returns the index of the child node if one was found, and returns `None` + /// if no such child was found. + pub fn goto_first_child_for_point(&mut self, point: Point) -> Option { + let result = + unsafe { ffi::ts_tree_cursor_goto_first_child_for_point(&mut self.0, point.into()) }; + if result < 0 { + None + } else { + Some(result as usize) + } + } + /// Re-initialize this tree cursor to start at a different node. pub fn reset(&mut self, node: Node<'a>) { unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) }; } } +impl<'a> Clone for TreeCursor<'a> { + fn clone(&self) -> Self { + TreeCursor(unsafe { ffi::ts_tree_cursor_copy(&self.0) }, PhantomData) + } +} + impl<'a> Drop for TreeCursor<'a> { fn drop(&mut self) { unsafe { ffi::ts_tree_cursor_delete(&mut self.0) } @@ -1178,6 +1232,19 @@ // On failure, build an error based on the error code and offset. if ptr.is_null() { + if error_type == ffi::TSQueryError_TSQueryErrorLanguage { + return Err(QueryError { + row: 0, + column: 0, + offset: 0, + message: LanguageError { + version: language.version(), + } + .to_string(), + kind: QueryErrorKind::Language, + }); + } + let offset = error_offset as usize; let mut line_start = 0; let mut row = 0; @@ -1460,6 +1527,14 @@ &self.capture_names } + /// Get the index for a given capture name. + pub fn capture_index_for_name(&self, name: &str) -> Option { + self.capture_names + .iter() + .position(|n| n == name) + .map(|ix| ix as u32) + } + /// Get the properties that are checked for the given pattern index. /// /// This includes predicates with the operators `is?` and `is-not?`. @@ -1478,7 +1553,7 @@ /// /// This includes predicate with operators other than: /// * `match?` - /// * `eq?` and `not-eq? + /// * `eq?` and `not-eq?` /// * `is?` and `is-not?` /// * `set!` pub fn general_predicates(&self, index: usize) -> &[QueryPredicate] { @@ -1511,8 +1586,10 @@ /// /// A query step is 'definite' if its parent pattern will be guaranteed to match /// successfully once it reaches the step. - pub fn step_is_definite(&self, byte_offset: usize) -> bool { - unsafe { ffi::ts_query_step_is_definite(self.ptr.as_ptr(), byte_offset as u32) } + pub fn is_pattern_guaranteed_at_step(&self, byte_offset: usize) -> bool { + unsafe { + ffi::ts_query_is_pattern_guaranteed_at_step(self.ptr.as_ptr(), byte_offset as u32) + } } fn parse_property( @@ -1583,7 +1660,28 @@ /// /// The cursor stores the state that is needed to iteratively search for matches. pub fn new() -> Self { - QueryCursor(unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) }) + QueryCursor { + ptr: unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) }, + } + } + + /// Return the maximum number of in-progress matches for this cursor. + pub fn match_limit(&self) -> u32 { + unsafe { ffi::ts_query_cursor_match_limit(self.ptr.as_ptr()) } + } + + /// Set the maximum number of in-progress matches for this cursor. The limit must be > 0 and + /// <= 65536. + pub fn set_match_limit(&mut self, limit: u32) { + unsafe { + ffi::ts_query_cursor_set_match_limit(self.ptr.as_ptr(), limit); + } + } + + /// Check if, on its last execution, this cursor exceeded its maximum number of + /// in-progress matches. + pub fn did_exceed_match_limit(&self) -> bool { + unsafe { ffi::ts_query_cursor_did_exceed_match_limit(self.ptr.as_ptr()) } } /// Iterate over all of the matches in the order that they were found. @@ -1591,70 +1689,93 @@ /// Each match contains the index of the pattern that matched, and a list of captures. /// Because multiple patterns can match the same set of nodes, one match may contain /// captures that appear *before* some of the captures from a previous match. - pub fn matches<'a, T: AsRef<[u8]>>( + pub fn matches<'a, 'tree: 'a, T: TextProvider<'a> + 'a>( &'a mut self, query: &'a Query, - node: Node<'a>, - mut text_callback: impl FnMut(Node<'a>) -> T + 'a, - ) -> impl Iterator> + 'a { - let ptr = self.0.as_ptr(); + node: Node<'tree>, + text_provider: T, + ) -> QueryMatches<'a, 'tree, T> { + let ptr = self.ptr.as_ptr(); unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) }; - std::iter::from_fn(move || loop { - unsafe { - let mut m = MaybeUninit::::uninit(); - if ffi::ts_query_cursor_next_match(ptr, m.as_mut_ptr()) { - let result = QueryMatch::new(m.assume_init(), ptr); - if result.satisfies_text_predicates(query, &mut text_callback) { - return Some(result); - } - } else { - return None; - } - } - }) + QueryMatches { + ptr, + query, + text_provider, + buffer1: Default::default(), + buffer2: Default::default(), + _tree: PhantomData, + } } /// Iterate over all of the individual captures in the order that they appear. /// - /// This is useful if don't care about which pattern matched, and just want a single, + /// This is useful if you don't care about which pattern matched, and just want a single, /// ordered sequence of captures. - pub fn captures<'a, T: AsRef<[u8]>>( + pub fn captures<'a, 'tree: 'a, T: TextProvider<'a> + 'a>( &'a mut self, query: &'a Query, - node: Node<'a>, - text_callback: impl FnMut(Node<'a>) -> T + 'a, - ) -> QueryCaptures<'a, T> { - let ptr = self.0.as_ptr(); - unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) }; + node: Node<'tree>, + text_provider: T, + ) -> QueryCaptures<'a, 'tree, T> { + let ptr = self.ptr.as_ptr(); + unsafe { ffi::ts_query_cursor_exec(self.ptr.as_ptr(), query.ptr.as_ptr(), node.0) }; QueryCaptures { ptr, query, - text_callback: Box::new(text_callback), + text_provider, + buffer1: Default::default(), + buffer2: Default::default(), + _tree: PhantomData, } } /// Set the range in which the query will be executed, in terms of byte offsets. - pub fn set_byte_range(&mut self, start: usize, end: usize) -> &mut Self { + pub fn set_byte_range(&mut self, range: ops::Range) -> &mut Self { unsafe { - ffi::ts_query_cursor_set_byte_range(self.0.as_ptr(), start as u32, end as u32); + ffi::ts_query_cursor_set_byte_range( + self.ptr.as_ptr(), + range.start as u32, + range.end as u32, + ); } self } /// Set the range in which the query will be executed, in terms of rows and columns. - pub fn set_point_range(&mut self, start: Point, end: Point) -> &mut Self { + pub fn set_point_range(&mut self, range: ops::Range) -> &mut Self { unsafe { - ffi::ts_query_cursor_set_point_range(self.0.as_ptr(), start.into(), end.into()); + ffi::ts_query_cursor_set_point_range( + self.ptr.as_ptr(), + range.start.into(), + range.end.into(), + ); } self } } -impl<'a> QueryMatch<'a> { +impl<'a, 'tree> QueryMatch<'a, 'tree> { + pub fn id(&self) -> u32 { + self.id + } + pub fn remove(self) { unsafe { ffi::ts_query_cursor_remove_match(self.cursor, self.id) } } + pub fn nodes_for_capture_index( + &self, + capture_ix: u32, + ) -> impl Iterator> + '_ { + self.captures.iter().filter_map(move |capture| { + if capture.index == capture_ix { + Some(capture.node) + } else { + None + } + }) + } + fn new(m: ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self { QueryMatch { cursor, @@ -1663,7 +1784,7 @@ captures: if m.capture_count > 0 { unsafe { slice::from_raw_parts( - m.captures as *const QueryCapture<'a>, + m.captures as *const QueryCapture<'tree>, m.capture_count as usize, ) } @@ -1673,38 +1794,68 @@ } } - fn satisfies_text_predicates>( + fn satisfies_text_predicates( &self, query: &Query, - text_callback: &mut impl FnMut(Node<'a>) -> T, + buffer1: &mut Vec, + buffer2: &mut Vec, + text_provider: &mut impl TextProvider<'a>, ) -> bool { + fn get_text<'a, 'b: 'a, I: Iterator>( + buffer: &'a mut Vec, + mut chunks: I, + ) -> &'a [u8] { + let first_chunk = chunks.next().unwrap_or(&[]); + if let Some(next_chunk) = chunks.next() { + buffer.clear(); + buffer.extend_from_slice(first_chunk); + buffer.extend_from_slice(next_chunk); + for chunk in chunks { + buffer.extend_from_slice(chunk); + } + buffer.as_slice() + } else { + first_chunk + } + } + query.text_predicates[self.pattern_index] .iter() .all(|predicate| match predicate { TextPredicate::CaptureEqCapture(i, j, is_positive) => { - let node1 = self.capture_for_index(*i).unwrap(); - let node2 = self.capture_for_index(*j).unwrap(); - (text_callback(node1).as_ref() == text_callback(node2).as_ref()) == *is_positive + let node1 = self.nodes_for_capture_index(*i).next(); + let node2 = self.nodes_for_capture_index(*j).next(); + match (node1, node2) { + (Some(node1), Some(node2)) => { + let text1 = get_text(buffer1, text_provider.text(node1)); + let text2 = get_text(buffer2, text_provider.text(node2)); + (text1 == text2) == *is_positive + } + _ => true, + } } TextPredicate::CaptureEqString(i, s, is_positive) => { - let node = self.capture_for_index(*i).unwrap(); - (text_callback(node).as_ref() == s.as_bytes()) == *is_positive + let node = self.nodes_for_capture_index(*i).next(); + match node { + Some(node) => { + let text = get_text(buffer1, text_provider.text(node)); + (text == s.as_bytes()) == *is_positive + } + None => true, + } } TextPredicate::CaptureMatchString(i, r, is_positive) => { - let node = self.capture_for_index(*i).unwrap(); - r.is_match(text_callback(node).as_ref()) == *is_positive + let node = self.nodes_for_capture_index(*i).next(); + match node { + Some(node) => { + let text = get_text(buffer1, text_provider.text(node)); + r.is_match(text) == *is_positive + } + None => true, + } } }) } - - fn capture_for_index(&self, capture_index: u32) -> Option> { - for c in self.captures { - if c.index == capture_index { - return Some(c.node); - } - } - None - } } impl QueryProperty { @@ -1717,12 +1868,37 @@ } } -impl<'a, T: AsRef<[u8]>> Iterator for QueryCaptures<'a, T> { - type Item = (QueryMatch<'a>, usize); +impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryMatches<'a, 'tree, T> { + type Item = QueryMatch<'a, 'tree>; fn next(&mut self) -> Option { - loop { - unsafe { + unsafe { + loop { + let mut m = MaybeUninit::::uninit(); + if ffi::ts_query_cursor_next_match(self.ptr, m.as_mut_ptr()) { + let result = QueryMatch::new(m.assume_init(), self.ptr); + if result.satisfies_text_predicates( + self.query, + &mut self.buffer1, + &mut self.buffer2, + &mut self.text_provider, + ) { + return Some(result); + } + } else { + return None; + } + } + } + } +} + +impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> { + type Item = (QueryMatch<'a, 'tree>, usize); + + fn next(&mut self) -> Option { + unsafe { + loop { let mut capture_index = 0u32; let mut m = MaybeUninit::::uninit(); if ffi::ts_query_cursor_next_capture( @@ -1731,7 +1907,12 @@ &mut capture_index as *mut u32, ) { let result = QueryMatch::new(m.assume_init(), self.ptr); - if result.satisfies_text_predicates(self.query, &mut self.text_callback) { + if result.satisfies_text_predicates( + self.query, + &mut self.buffer1, + &mut self.buffer2, + &mut self.text_provider, + ) { return Some((result, capture_index as usize)); } else { result.remove(); @@ -1744,7 +1925,35 @@ } } -impl<'a> fmt::Debug for QueryMatch<'a> { +impl<'a, 'tree, T: TextProvider<'a>> QueryMatches<'a, 'tree, T> { + pub fn set_byte_range(&mut self, range: ops::Range) { + unsafe { + ffi::ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32); + } + } + + pub fn set_point_range(&mut self, range: ops::Range) { + unsafe { + ffi::ts_query_cursor_set_point_range(self.ptr, range.start.into(), range.end.into()); + } + } +} + +impl<'a, 'tree, T: TextProvider<'a>> QueryCaptures<'a, 'tree, T> { + pub fn set_byte_range(&mut self, range: ops::Range) { + unsafe { + ffi::ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32); + } + } + + pub fn set_point_range(&mut self, range: ops::Range) { + unsafe { + ffi::ts_query_cursor_set_point_range(self.ptr, range.start.into(), range.end.into()); + } + } +} + +impl<'cursor, 'tree> fmt::Debug for QueryMatch<'cursor, 'tree> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, @@ -1754,6 +1963,26 @@ } } +impl<'a, F, I> TextProvider<'a> for F +where + F: FnMut(Node) -> I, + I: Iterator + 'a, +{ + type I = I; + + fn text(&mut self, node: Node) -> Self::I { + (self)(node) + } +} + +impl<'a> TextProvider<'a> for &'a [u8] { + type I = iter::Once<&'a [u8]>; + + fn text(&mut self, node: Node) -> Self::I { + iter::once(&self[node.byte_range()]) + } +} + impl PartialEq for Query { fn eq(&self, other: &Self) -> bool { self.ptr == other.ptr @@ -1768,7 +1997,7 @@ impl Drop for QueryCursor { fn drop(&mut self) { - unsafe { ffi::ts_query_cursor_delete(self.0.as_ptr()) } + unsafe { ffi::ts_query_cursor_delete(self.ptr.as_ptr()) } } } @@ -1893,10 +2122,59 @@ } } +impl fmt::Display for IncludedRangesError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Incorrect range by index: {}", self.0) + } +} + +impl fmt::Display for LanguageError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "Incompatible language version {}. Expected minimum {}, maximum {}", + self.version, MIN_COMPATIBLE_LANGUAGE_VERSION, LANGUAGE_VERSION, + ) + } +} + +impl fmt::Display for QueryError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let msg = match self.kind { + QueryErrorKind::Field => "Invalid field name ", + QueryErrorKind::NodeType => "Invalid node type ", + QueryErrorKind::Capture => "Invalid capture name ", + QueryErrorKind::Predicate => "Invalid predicate: ", + QueryErrorKind::Structure => "Impossible pattern:\n", + QueryErrorKind::Syntax => "Invalid syntax:\n", + QueryErrorKind::Language => "", + }; + if msg.len() > 0 { + write!( + f, + "Query error at {}:{}. {}{}", + self.row + 1, + self.column + 1, + msg, + self.message + ) + } else { + write!(f, "{}", self.message) + } + } +} + +impl error::Error for IncludedRangesError {} +impl error::Error for LanguageError {} +impl error::Error for QueryError {} + unsafe impl Send for Language {} unsafe impl Send for Parser {} unsafe impl Send for Query {} -unsafe impl Send for Tree {} unsafe impl Send for QueryCursor {} +unsafe impl Send for Tree {} unsafe impl Sync for Language {} +unsafe impl Sync for Parser {} unsafe impl Sync for Query {} +unsafe impl Sync for QueryCursor {} +unsafe impl Sync for Tree {} diff -Nru rust-tree-sitter-0.17.1/binding_rust/README.md rust-tree-sitter-0.20.1/binding_rust/README.md --- rust-tree-sitter-0.17.1/binding_rust/README.md 2020-05-12 23:08:26.000000000 +0000 +++ rust-tree-sitter-0.20.1/binding_rust/README.md 1973-11-29 21:33:09.000000000 +0000 @@ -83,7 +83,7 @@ ### Text Input -The source code to parse can be provided either either as a string, a slice, a vector, or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16: +The source code to parse can be provided either as a string, a slice, a vector, or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16: ```rust // Store some source code in an array of lines. diff -Nru rust-tree-sitter-0.17.1/binding_rust/util.rs rust-tree-sitter-0.20.1/binding_rust/util.rs --- rust-tree-sitter-0.17.1/binding_rust/util.rs 2020-10-14 18:58:04.000000000 +0000 +++ rust-tree-sitter-0.20.1/binding_rust/util.rs 1973-11-29 21:33:09.000000000 +0000 @@ -1,71 +1,20 @@ use std::os::raw::c_void; +#[cfg(not(feature = "allocation-tracking"))] extern "C" { - /// In *Release* builds, the C library links directly against `malloc` and `free`. - /// - /// When freeing memory that was allocated by C code, use `free` directly. - #[cfg(not(debug_assertions))] + /// Normally, use `free(1)` to free memory allocated from C. #[link_name = "free"] pub fn free_ptr(ptr: *mut c_void); - - /// In *Test* builds, the C library is compiled with the `TREE_SITTER_TEST` macro, - /// so all calls to `malloc`, `free`, etc are linked against wrapper functions - /// called `ts_record_malloc`, `ts_record_free`, etc. These symbols are defined - /// in the `tree_sitter_cli::tests::helpers::allocations` module. - /// - /// When freeing memory that was allocated by C code, use the `free` function - /// from that module. - #[cfg(debug_assertions)] - #[link_name = "ts_record_free"] - pub fn free_ptr(ptr: *mut c_void); - - /// In *Debug* builds, the C library is compiled the same as in test builds: using - /// the wrapper functions. This prevents the C library from having to be recompiled - /// constantly when switching between running tests and compiling with RLS. - /// - /// But we don't want to actually record allocations when running the library in - /// debug mode, so we define symbols like `ts_record_malloc` to just delegate to - /// the normal `malloc` functions. - #[cfg(all(debug_assertions, not(test)))] - fn malloc(size: usize) -> *mut c_void; - #[cfg(all(debug_assertions, not(test)))] - fn calloc(count: usize, size: usize) -> *mut c_void; - #[cfg(all(debug_assertions, not(test)))] - fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void; - #[cfg(all(debug_assertions, not(test)))] - fn free(ptr: *mut c_void); -} - -#[cfg(all(debug_assertions, not(test)))] -#[no_mangle] -unsafe extern "C" fn ts_record_malloc(size: usize) -> *const c_void { - malloc(size) -} - -#[cfg(all(debug_assertions, not(test)))] -#[no_mangle] -unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *const c_void { - calloc(count, size) -} - -#[cfg(all(debug_assertions, not(test)))] -#[no_mangle] -unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *const c_void { - realloc(ptr, size) } -#[cfg(all(debug_assertions, not(test)))] -#[no_mangle] -unsafe extern "C" fn ts_record_free(ptr: *mut c_void) { - free(ptr) -} - -#[cfg(all(debug_assertions, not(test)))] -#[no_mangle] -extern "C" fn ts_toggle_allocation_recording(_: bool) -> bool { - false -} +/// When the `allocation-tracking` feature is enabled, the C library is compiled with +/// the `TREE_SITTER_TEST` macro, so all calls to `malloc`, `free`, etc are linked +/// against wrapper functions called `ts_record_malloc`, `ts_record_free`, etc. +/// When freeing buffers allocated from C, use the wrapper `free` function. +#[cfg(feature = "allocation-tracking")] +pub use crate::allocations::ts_record_free as free_ptr; +/// A raw pointer and a length, exposed as an iterator. pub struct CBufferIter { ptr: *mut T, count: usize, diff -Nru rust-tree-sitter-0.17.1/Cargo.toml rust-tree-sitter-0.20.1/Cargo.toml --- rust-tree-sitter-0.17.1/Cargo.toml 2020-11-03 04:53:39.000000000 +0000 +++ rust-tree-sitter-0.20.1/Cargo.toml 1970-01-01 00:00:01.000000000 +0000 @@ -3,16 +3,16 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] +edition = "2018" name = "tree-sitter" -version = "0.17.1" +version = "0.20.1" authors = ["Max Brunsfeld "] build = "binding_rust/build.rs" include = ["/binding_rust/*", "/Cargo.toml", "/include/*", "/src/*.h", "/src/*.c", "/src/unicode/*"] @@ -25,7 +25,18 @@ [lib] path = "binding_rust/lib.rs" +[dependencies.lazy_static] +version = "1.2.0" +optional = true + [dependencies.regex] version = "1" + +[dependencies.spin] +version = "0.7" +optional = true [build-dependencies.cc] -version = "1.0" +version = "^1.0.58" + +[features] +allocation-tracking = ["lazy_static", "spin"] diff -Nru rust-tree-sitter-0.17.1/Cargo.toml.orig rust-tree-sitter-0.20.1/Cargo.toml.orig --- rust-tree-sitter-0.17.1/Cargo.toml.orig 2020-11-03 04:51:48.000000000 +0000 +++ rust-tree-sitter-0.20.1/Cargo.toml.orig 1973-11-29 21:33:09.000000000 +0000 @@ -1,8 +1,9 @@ [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" -version = "0.17.1" +version = "0.20.1" authors = ["Max Brunsfeld "] +edition = "2018" license = "MIT" readme = "binding_rust/README.md" keywords = ["incremental", "parsing"] @@ -21,10 +22,17 @@ ] [dependencies] +lazy_static = { version="1.2.0", optional=true } regex = "1" +spin = { version="0.7", optional=true } [build-dependencies] -cc = "1.0" +cc = "^1.0.58" [lib] path = "binding_rust/lib.rs" + +# This feature is only useful for testing the Tree-sitter library itself. +# It is exposed because all of Tree-sitter's tests live in the Tree-sitter CLI crate. +[features] +allocation-tracking = ["lazy_static", "spin"] diff -Nru rust-tree-sitter-0.17.1/.cargo_vcs_info.json rust-tree-sitter-0.20.1/.cargo_vcs_info.json --- rust-tree-sitter-0.17.1/.cargo_vcs_info.json 2020-11-03 04:53:39.000000000 +0000 +++ rust-tree-sitter-0.20.1/.cargo_vcs_info.json 1970-01-01 00:00:01.000000000 +0000 @@ -1,5 +1,5 @@ { "git": { - "sha1": "281e75d74d78b0cbb6441bf497fdef0988ab49e4" + "sha1": "062421dece3315bd6f228ad6d468cba083d0a2d5" } } diff -Nru rust-tree-sitter-0.17.1/debian/changelog rust-tree-sitter-0.20.1/debian/changelog --- rust-tree-sitter-0.17.1/debian/changelog 2020-11-24 17:14:44.000000000 +0000 +++ rust-tree-sitter-0.20.1/debian/changelog 2021-12-22 15:35:39.000000000 +0000 @@ -1,3 +1,11 @@ +rust-tree-sitter (0.20.1-1) unstable; urgency=medium + + * Package tree-sitter 0.20.1 from crates.io using debcargo 2.5.0 + * Relax spin dep to 0.5 + * Add myself to Uploaders + + -- James McCoy Wed, 22 Dec 2021 10:35:39 -0500 + rust-tree-sitter (0.17.1-2) unstable; urgency=medium * Package tree-sitter 0.17.1 from crates.io using debcargo 2.4.3 diff -Nru rust-tree-sitter-0.17.1/debian/control rust-tree-sitter-0.20.1/debian/control --- rust-tree-sitter-0.17.1/debian/control 2020-11-24 17:14:44.000000000 +0000 +++ rust-tree-sitter-0.20.1/debian/control 2021-12-22 15:35:39.000000000 +0000 @@ -2,18 +2,20 @@ Section: rust Priority: optional Build-Depends: debhelper (>= 12), - dh-cargo (>= 24), + dh-cargo (>= 25), cargo:native , rustc:native , libstd-rust-dev , - librust-cc-1+default-dev , + librust-cc-1+default-dev (>= 1.0.58-~~) , librust-regex-1+default-dev Maintainer: Debian Rust Maintainers Uploaders: - Sylvestre Ledru -Standards-Version: 4.5.0 + Sylvestre Ledru , + James McCoy +Standards-Version: 4.5.1 Vcs-Git: https://salsa.debian.org/rust-team/debcargo-conf.git [src/tree-sitter] Vcs-Browser: https://salsa.debian.org/rust-team/debcargo-conf/tree/master/src/tree-sitter +X-Cargo-Crate: tree-sitter Rules-Requires-Root: no Package: librust-tree-sitter-dev @@ -21,16 +23,30 @@ Multi-Arch: same Depends: ${misc:Depends}, - librust-cc-1+default-dev, - librust-regex-1+default-dev + librust-cc-1+default-dev (>= 1.0.58-~~), + librust-lazy-static-1+default-dev (>= 1.2.0-~~), + librust-regex-1+default-dev, + librust-spin-0.5+default-dev Provides: + librust-tree-sitter+allocation-tracking-dev (= ${binary:Version}), librust-tree-sitter+default-dev (= ${binary:Version}), + librust-tree-sitter+lazy-static-dev (= ${binary:Version}), + librust-tree-sitter+spin-dev (= ${binary:Version}), librust-tree-sitter-0-dev (= ${binary:Version}), + librust-tree-sitter-0+allocation-tracking-dev (= ${binary:Version}), librust-tree-sitter-0+default-dev (= ${binary:Version}), - librust-tree-sitter-0.17-dev (= ${binary:Version}), - librust-tree-sitter-0.17+default-dev (= ${binary:Version}), - librust-tree-sitter-0.17.1-dev (= ${binary:Version}), - librust-tree-sitter-0.17.1+default-dev (= ${binary:Version}) + librust-tree-sitter-0+lazy-static-dev (= ${binary:Version}), + librust-tree-sitter-0+spin-dev (= ${binary:Version}), + librust-tree-sitter-0.20-dev (= ${binary:Version}), + librust-tree-sitter-0.20+allocation-tracking-dev (= ${binary:Version}), + librust-tree-sitter-0.20+default-dev (= ${binary:Version}), + librust-tree-sitter-0.20+lazy-static-dev (= ${binary:Version}), + librust-tree-sitter-0.20+spin-dev (= ${binary:Version}), + librust-tree-sitter-0.20.1-dev (= ${binary:Version}), + librust-tree-sitter-0.20.1+allocation-tracking-dev (= ${binary:Version}), + librust-tree-sitter-0.20.1+default-dev (= ${binary:Version}), + librust-tree-sitter-0.20.1+lazy-static-dev (= ${binary:Version}), + librust-tree-sitter-0.20.1+spin-dev (= ${binary:Version}) Description: Rust bindings to the Tree-sitter parsing library - Rust source code This package contains the source for the Rust tree-sitter crate, packaged by debcargo for use with cargo and dh-cargo. diff -Nru rust-tree-sitter-0.17.1/debian/copyright rust-tree-sitter-0.20.1/debian/copyright --- rust-tree-sitter-0.17.1/debian/copyright 2020-11-24 17:14:44.000000000 +0000 +++ rust-tree-sitter-0.20.1/debian/copyright 2021-12-22 15:35:39.000000000 +0000 @@ -4,10 +4,10 @@ Source: https://github.com/tree-sitter/tree-sitter Files: * -Copyright: 2018-2020 Max Brunsfeld +Copyright: 2018-2021 Max Brunsfeld License: MIT -Files: ./src/unicode/* +Files: src/unicode/* Copyright: 1991-2019 Unicode, Inc. All rights reserved. 1995-2016 International Business Machines Corporation and others @@ -26,8 +26,9 @@ Files: debian/* Copyright: - 2020 Debian Rust Maintainers - 2020 Sylvestre Ledru + 2020-2021 Debian Rust Maintainers + 2020-2021 Sylvestre Ledru + 2021 James McCoy License: MIT License: MIT diff -Nru rust-tree-sitter-0.17.1/debian/copyright.debcargo.hint rust-tree-sitter-0.20.1/debian/copyright.debcargo.hint --- rust-tree-sitter-0.17.1/debian/copyright.debcargo.hint 2020-11-24 17:14:44.000000000 +0000 +++ rust-tree-sitter-0.20.1/debian/copyright.debcargo.hint 2021-12-22 15:35:39.000000000 +0000 @@ -61,8 +61,9 @@ Files: debian/* Copyright: - 2020 Debian Rust Maintainers - 2020 Sylvestre Ledru + 2020-2021 Debian Rust Maintainers + 2020-2021 Sylvestre Ledru + 2020-2021 James McCoy License: MIT License: MIT diff -Nru rust-tree-sitter-0.17.1/debian/debcargo.toml rust-tree-sitter-0.20.1/debian/debcargo.toml --- rust-tree-sitter-0.17.1/debian/debcargo.toml 2020-11-24 17:14:44.000000000 +0000 +++ rust-tree-sitter-0.20.1/debian/debcargo.toml 2021-12-22 15:35:39.000000000 +0000 @@ -1,3 +1,4 @@ overlay = "." -uploaders = ["Sylvestre Ledru "] +uploaders = ["Sylvestre Ledru ", "James McCoy "] whitelist = ["**/*.c"] +collapse_features = true diff -Nru rust-tree-sitter-0.17.1/debian/patches/relax-deps.diff rust-tree-sitter-0.20.1/debian/patches/relax-deps.diff --- rust-tree-sitter-0.17.1/debian/patches/relax-deps.diff 1970-01-01 00:00:00.000000000 +0000 +++ rust-tree-sitter-0.20.1/debian/patches/relax-deps.diff 2021-12-22 15:35:39.000000000 +0000 @@ -0,0 +1,13 @@ +Index: tree-sitter/Cargo.toml +=================================================================== +--- tree-sitter.orig/Cargo.toml ++++ tree-sitter/Cargo.toml +@@ -34,7 +34,7 @@ optional = true + version = "1" + + [dependencies.spin] +-version = "0.7" ++version = "0.5" + optional = true + [build-dependencies.cc] + version = "^1.0.58" diff -Nru rust-tree-sitter-0.17.1/debian/patches/series rust-tree-sitter-0.20.1/debian/patches/series --- rust-tree-sitter-0.17.1/debian/patches/series 2020-11-24 17:14:44.000000000 +0000 +++ rust-tree-sitter-0.20.1/debian/patches/series 2021-12-22 15:35:39.000000000 +0000 @@ -1 +1,2 @@ dep-build.diff +relax-deps.diff diff -Nru rust-tree-sitter-0.17.1/debian/tests/control rust-tree-sitter-0.20.1/debian/tests/control --- rust-tree-sitter-0.17.1/debian/tests/control 2020-11-24 17:14:44.000000000 +0000 +++ rust-tree-sitter-0.20.1/debian/tests/control 2021-12-22 15:35:39.000000000 +0000 @@ -1,14 +1,29 @@ -Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.17.1 --all-targets --all-features +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.20.1 --all-targets --all-features Features: test-name=rust-tree-sitter:@ Depends: dh-cargo (>= 18), @ Restrictions: allow-stderr, skip-not-installable -Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.17.1 --all-targets +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.20.1 --all-targets --no-default-features --features allocation-tracking +Features: test-name=librust-tree-sitter-dev:allocation-tracking +Depends: dh-cargo (>= 18), @ +Restrictions: allow-stderr, skip-not-installable + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.20.1 --all-targets Features: test-name=librust-tree-sitter-dev:default Depends: dh-cargo (>= 18), @ Restrictions: allow-stderr, skip-not-installable -Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.17.1 --all-targets --no-default-features +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.20.1 --all-targets --no-default-features --features lazy_static +Features: test-name=librust-tree-sitter-dev:lazy_static +Depends: dh-cargo (>= 18), @ +Restrictions: allow-stderr, skip-not-installable + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.20.1 --all-targets --no-default-features --features spin +Features: test-name=librust-tree-sitter-dev:spin +Depends: dh-cargo (>= 18), @ +Restrictions: allow-stderr, skip-not-installable + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.20.1 --all-targets --no-default-features Features: test-name=librust-tree-sitter-dev: Depends: dh-cargo (>= 18), @ Restrictions: allow-stderr, skip-not-installable diff -Nru rust-tree-sitter-0.17.1/debian/watch rust-tree-sitter-0.20.1/debian/watch --- rust-tree-sitter-0.17.1/debian/watch 2020-11-24 17:14:44.000000000 +0000 +++ rust-tree-sitter-0.20.1/debian/watch 2021-12-22 15:35:39.000000000 +0000 @@ -1,2 +1,4 @@ version=4 -opts=filenamemangle=s/.*\/(.*)\/download/tree-sitter-$1\.tar\.gz/g,\\nuversionmangle=s/(\d)[_\.\-\+]?((RC|rc|pre|dev|beta|alpha)\d*)$/$1~$2/ \\nhttps://qa.debian.org/cgi-bin/fakeupstream.cgi?upstream=crates.io/tree-sitter .*/crates/tree-sitter/@ANY_VERSION@/download +opts=filenamemangle=s/.*\/(.*)\/download/tree-sitter-$1\.tar\.gz/g,\ +uversionmangle=s/(\d)[_\.\-\+]?((RC|rc|pre|dev|beta|alpha)\d*)$/$1~$2/ \ +https://qa.debian.org/cgi-bin/fakeupstream.cgi?upstream=crates.io/tree-sitter .*/crates/tree-sitter/@ANY_VERSION@/download diff -Nru rust-tree-sitter-0.17.1/include/tree_sitter/api.h rust-tree-sitter-0.20.1/include/tree_sitter/api.h --- rust-tree-sitter-0.17.1/include/tree_sitter/api.h 2020-10-28 19:29:45.000000000 +0000 +++ rust-tree-sitter-0.20.1/include/tree_sitter/api.h 1973-11-29 21:33:09.000000000 +0000 @@ -21,13 +21,13 @@ * The Tree-sitter library is generally backwards-compatible with languages * generated using older CLI versions, but is not forwards-compatible. */ -#define TREE_SITTER_LANGUAGE_VERSION 12 +#define TREE_SITTER_LANGUAGE_VERSION 13 /** * The earliest ABI version that is supported by the current version of the * library. */ -#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9 +#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 13 /*******************/ /* Section - Types */ @@ -131,6 +131,7 @@ TSQueryErrorField, TSQueryErrorCapture, TSQueryErrorStructure, + TSQueryErrorLanguage, } TSQueryError; /********************/ @@ -179,9 +180,7 @@ * If `length` is zero, then the entire document will be parsed. Otherwise, * the given ranges must be ordered from earliest to latest in the document, * and they must not overlap. That is, the following must hold for all - * `i` < `length - 1`: - * - * ranges[i].end_byte <= ranges[i + 1].start_byte + * `i` < `length - 1`: ranges[i].end_byte <= ranges[i + 1].start_byte * * If this requirement is not satisfied, the operation will fail, the ranges * will not be assigned, and this function will return `false`. On success, @@ -488,6 +487,12 @@ TSNode ts_node_child(TSNode, uint32_t); /** + * Get the field name for node's child at the given index, where zero represents + * the first child. Returns NULL, if no field is found. + */ +const char *ts_node_field_name_for_child(TSNode, uint32_t); + +/** * Get the node's number of children. */ uint32_t ts_node_child_count(TSNode); @@ -612,7 +617,7 @@ const char *ts_tree_cursor_current_field_name(const TSTreeCursor *); /** - * Get the field name of the tree cursor's current node. + * Get the field id of the tree cursor's current node. * * This returns zero if the current node doesn't have a field. * See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`. @@ -645,12 +650,13 @@ /** * Move the cursor to the first child of its current node that extends beyond - * the given byte offset. + * the given byte offset or point. * * This returns the index of the child node if one was found, and returns -1 * if no such child was found. */ int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t); +int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *, TSPoint); TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *); @@ -719,7 +725,7 @@ uint32_t *length ); -bool ts_query_step_is_definite( +bool ts_query_is_pattern_guaranteed_at_step( const TSQuery *self, uint32_t byte_offset ); @@ -792,6 +798,21 @@ void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode); /** + * Manage the maximum number of in-progress matches allowed by this query + * cursor. + * + * Query cursors have an optional maximum capacity for storing lists of + * in-progress captures. If this capacity is exceeded, then the + * earliest-starting match will silently be dropped to make room for further + * matches. This maximum capacity is optional — by default, query cursors allow + * any number of pending matches, dynamically allocating new space for them as + * needed as the query is executed. + */ +bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *); +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *); +void ts_query_cursor_set_match_limit(TSQueryCursor *, uint32_t); + +/** * Set the range of bytes or (row, column) positions in which the query * will be executed. */ diff -Nru rust-tree-sitter-0.17.1/include/tree_sitter/parser.h rust-tree-sitter-0.20.1/include/tree_sitter/parser.h --- rust-tree-sitter-0.17.1/include/tree_sitter/parser.h 2020-09-23 19:35:49.000000000 +0000 +++ rust-tree-sitter-0.20.1/include/tree_sitter/parser.h 1973-11-29 21:33:09.000000000 +0000 @@ -13,6 +13,8 @@ #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 +typedef uint16_t TSStateId; + #ifndef TREE_SITTER_API_H_ typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; @@ -30,12 +32,10 @@ uint16_t length; } TSFieldMapSlice; -typedef uint16_t TSStateId; - typedef struct { - bool visible : 1; - bool named : 1; - bool supertype: 1; + bool visible; + bool named; + bool supertype; } TSSymbolMetadata; typedef struct TSLexer TSLexer; @@ -57,21 +57,21 @@ TSParseActionTypeRecover, } TSParseActionType; -typedef struct { - union { - struct { - TSStateId state; - bool extra : 1; - bool repetition : 1; - } shift; - struct { - TSSymbol symbol; - int16_t dynamic_precedence; - uint8_t child_count; - uint8_t production_id; - } reduce; - } params; - TSParseActionType type : 4; +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; } TSParseAction; typedef struct { @@ -83,7 +83,7 @@ TSParseAction action; struct { uint8_t count; - bool reusable : 1; + bool reusable; } entry; } TSParseActionEntry; @@ -93,13 +93,24 @@ uint32_t alias_count; uint32_t token_count; uint32_t external_token_count; - const char **symbol_names; - const TSSymbolMetadata *symbol_metadata; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; const TSParseActionEntry *parse_actions; - const TSLexMode *lex_modes; + const char * const *symbol_names; + const char * const *field_names; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; const TSSymbol *alias_sequences; - uint16_t max_alias_sequence_length; + const TSLexMode *lex_modes; bool (*lex_fn)(TSLexer *, TSStateId); bool (*keyword_lex_fn)(TSLexer *, TSStateId); TSSymbol keyword_capture_token; @@ -112,16 +123,6 @@ unsigned (*serialize)(void *, char *); void (*deserialize)(void *, const char *, unsigned); } external_scanner; - uint32_t field_count; - const TSFieldMapSlice *field_map_slices; - const TSFieldMapEntry *field_map_entries; - const char **field_names; - uint32_t large_state_count; - const uint16_t *small_parse_table; - const uint32_t *small_parse_table_map; - const TSSymbol *public_symbol_map; - const uint16_t *alias_map; - uint32_t state_count; }; /* @@ -170,66 +171,50 @@ #define ACTIONS(id) id -#define SHIFT(state_value) \ - { \ - { \ - .params = { \ - .shift = { \ - .state = state_value \ - } \ - }, \ - .type = TSParseActionTypeShift \ - } \ - } +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value \ + } \ + }} #define SHIFT_REPEAT(state_value) \ - { \ - { \ - .params = { \ - .shift = { \ - .state = state_value, \ - .repetition = true \ - } \ - }, \ - .type = TSParseActionTypeShift \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value, \ + .repetition = true \ } \ - } - -#define RECOVER() \ - { \ - { .type = TSParseActionTypeRecover } \ - } + }} #define SHIFT_EXTRA() \ - { \ - { \ - .params = { \ - .shift = { \ - .extra = true \ - } \ - }, \ - .type = TSParseActionTypeShift \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ } \ - } + }} #define REDUCE(symbol_val, child_count_val, ...) \ - { \ - { \ - .params = { \ - .reduce = { \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - __VA_ARGS__ \ - }, \ - }, \ - .type = TSParseActionTypeReduce \ - } \ - } - -#define ACCEPT_INPUT() \ - { \ - { .type = TSParseActionTypeAccept } \ - } + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_val, \ + .child_count = child_count_val, \ + __VA_ARGS__ \ + }, \ + }} + +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} #ifdef __cplusplus } diff -Nru rust-tree-sitter-0.17.1/src/alloc.h rust-tree-sitter-0.20.1/src/alloc.h --- rust-tree-sitter-0.17.1/src/alloc.h 2020-11-02 20:30:55.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/alloc.h 1973-11-29 21:33:09.000000000 +0000 @@ -9,7 +9,7 @@ #include #include -#if defined(TREE_SITTER_TEST) +#if defined(TREE_SITTER_ALLOCATION_TRACKING) void *ts_record_malloc(size_t); void *ts_record_calloc(size_t, size_t); diff -Nru rust-tree-sitter-0.17.1/src/bits.h rust-tree-sitter-0.20.1/src/bits.h --- rust-tree-sitter-0.17.1/src/bits.h 2020-10-28 19:29:45.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/bits.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -#ifndef TREE_SITTER_BITS_H_ -#define TREE_SITTER_BITS_H_ - -#include - -static inline uint32_t bitmask_for_index(uint16_t id) { - return (1u << (31 - id)); -} - -#ifdef __TINYC__ - -// Algorithm taken from the Hacker's Delight book -// See also https://graphics.stanford.edu/~seander/bithacks.html -static inline uint32_t count_leading_zeros(uint32_t x) { - int count = 0; - if (x == 0) return 32; - x = x - ((x >> 1) & 0x55555555); - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - count = (((x + (x >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24; - return count; -} - -#elif defined _WIN32 && !defined __GNUC__ - -#include - -static inline uint32_t count_leading_zeros(uint32_t x) { - if (x == 0) return 32; - uint32_t result; - _BitScanReverse(&result, x); - return 31 - result; -} - -#else - -static inline uint32_t count_leading_zeros(uint32_t x) { - if (x == 0) return 32; - return __builtin_clz(x); -} - -#endif -#endif // TREE_SITTER_BITS_H_ diff -Nru rust-tree-sitter-0.17.1/src/language.c rust-tree-sitter-0.20.1/src/language.c --- rust-tree-sitter-0.17.1/src/language.c 2020-09-23 19:35:49.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/language.c 1973-11-29 21:33:09.000000000 +0000 @@ -12,11 +12,7 @@ } uint32_t ts_language_field_count(const TSLanguage *self) { - if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) { - return self->field_count; - } else { - return 0; - } + return self->field_count; } void ts_language_table_entry( @@ -57,11 +53,7 @@ TSSymbol symbol ) { if (symbol == ts_builtin_sym_error) return symbol; - if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) { - return self->public_symbol_map[symbol]; - } else { - return symbol; - } + return self->public_symbol_map[symbol]; } const char *ts_language_symbol_name( @@ -92,11 +84,7 @@ if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; const char *symbol_name = self->symbol_names[i]; if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { - if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) { - return self->public_symbol_map[i]; - } else { - return i; - } + return self->public_symbol_map[i]; } } return 0; @@ -107,7 +95,7 @@ TSSymbol symbol ) { TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol); - if (metadata.named) { + if (metadata.named && metadata.visible) { return TSSymbolTypeRegular; } else if (metadata.visible) { return TSSymbolTypeAnonymous; diff -Nru rust-tree-sitter-0.17.1/src/language.h rust-tree-sitter-0.20.1/src/language.h --- rust-tree-sitter-0.17.1/src/language.h 2020-09-02 17:32:46.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/language.h 1973-11-29 21:33:09.000000000 +0000 @@ -9,11 +9,6 @@ #include "tree_sitter/parser.h" #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) -#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING 11 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT 12 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_ALIAS_MAP 12 typedef struct { const TSParseAction *actions; @@ -59,16 +54,6 @@ return entry.actions; } -static inline bool ts_language_has_actions( - const TSLanguage *self, - TSStateId state, - TSSymbol symbol -) { - TableEntry entry; - ts_language_table_entry(self, state, symbol, &entry); - return entry.action_count > 0; -} - static inline bool ts_language_has_reduce_action( const TSLanguage *self, TSStateId state, @@ -91,10 +76,7 @@ TSStateId state, TSSymbol symbol ) { - if ( - self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES && - state >= self->large_state_count - ) { + if (state >= self->large_state_count) { uint32_t index = self->small_parse_table_map[state - self->large_state_count]; const uint16_t *data = &self->small_parse_table[index]; uint16_t group_count = *(data++); @@ -111,6 +93,14 @@ } } +static inline bool ts_language_has_actions( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol +) { + return ts_language_lookup(self, state, symbol) != 0; +} + // Iterate over all of the symbols that are valid in the given state. // // For 'large' parse states, this just requires iterating through @@ -121,9 +111,7 @@ const TSLanguage *self, TSStateId state ) { - bool is_small_state = - self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES && - state >= self->large_state_count; + bool is_small_state = state >= self->large_state_count; const uint16_t *data; const uint16_t *group_end = NULL; uint16_t group_count = 0; @@ -203,7 +191,7 @@ if (count > 0) { TSParseAction action = actions[count - 1]; if (action.type == TSParseActionTypeShift) { - return action.params.shift.extra ? state : action.params.shift.state; + return action.shift.extra ? state : action.shift.state; } } return 0; @@ -248,7 +236,7 @@ const TSFieldMapEntry **start, const TSFieldMapEntry **end ) { - if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS || self->field_count == 0) { + if (self->field_count == 0) { *start = NULL; *end = NULL; return; @@ -268,8 +256,6 @@ *start = &self->public_symbol_map[original_symbol]; *end = *start + 1; - if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_ALIAS_MAP) return; - unsigned i = 0; for (;;) { TSSymbol symbol = self->alias_map[i++]; diff -Nru rust-tree-sitter-0.17.1/src/lexer.c rust-tree-sitter-0.20.1/src/lexer.c --- rust-tree-sitter-0.17.1/src/lexer.c 2020-10-28 19:29:45.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/lexer.c 1973-11-29 21:33:09.000000000 +0000 @@ -102,6 +102,56 @@ } } +static void ts_lexer_goto(Lexer *self, Length position) { + self->current_position = position; + bool found_included_range = false; + + // Move to the first valid position at or after the given position. + for (unsigned i = 0; i < self->included_range_count; i++) { + TSRange *included_range = &self->included_ranges[i]; + if (included_range->end_byte > position.bytes) { + if (included_range->start_byte >= position.bytes) { + self->current_position = (Length) { + .bytes = included_range->start_byte, + .extent = included_range->start_point, + }; + } + + self->current_included_range_index = i; + found_included_range = true; + break; + } + } + + if (found_included_range) { + // If the current position is outside of the current chunk of text, + // then clear out the current chunk of text. + if (self->chunk && ( + position.bytes < self->chunk_start || + position.bytes >= self->chunk_start + self->chunk_size + )) { + ts_lexer__clear_chunk(self); + } + + self->lookahead_size = 0; + self->data.lookahead = '\0'; + } + + // If the given position is beyond any of included ranges, move to the EOF + // state - past the end of the included ranges. + else { + self->current_included_range_index = self->included_range_count; + TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1]; + self->current_position = (Length) { + .bytes = last_included_range->end_byte, + .extent = last_included_range->end_point, + }; + ts_lexer__clear_chunk(self); + self->lookahead_size = 1; + self->data.lookahead = '\0'; + } +} + // Advance to the next character in the source code, retrieving a new // chunk of source code if needed. static void ts_lexer__advance(TSLexer *_self, bool skip) { @@ -183,22 +233,8 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) { Lexer *self = (Lexer *)_self; - uint32_t goal_byte = self->current_position.bytes; - - self->current_position.bytes -= self->current_position.extent.column; - self->current_position.extent.column = 0; - - if (self->current_position.bytes < self->chunk_start) { - ts_lexer__get_chunk(self); - } - - uint32_t result = 0; - while (self->current_position.bytes < goal_byte) { - ts_lexer__advance(&self->data, false); - result++; - } - - return result; + self->did_get_column = true; + return self->current_position.extent.column; } // Is the lexer at a boundary between two disjoint included ranges of @@ -247,56 +283,6 @@ ts_free(self->included_ranges); } -static void ts_lexer_goto(Lexer *self, Length position) { - self->current_position = position; - bool found_included_range = false; - - // Move to the first valid position at or after the given position. - for (unsigned i = 0; i < self->included_range_count; i++) { - TSRange *included_range = &self->included_ranges[i]; - if (included_range->end_byte > position.bytes) { - if (included_range->start_byte > position.bytes) { - self->current_position = (Length) { - .bytes = included_range->start_byte, - .extent = included_range->start_point, - }; - } - - self->current_included_range_index = i; - found_included_range = true; - break; - } - } - - if (found_included_range) { - // If the current position is outside of the current chunk of text, - // then clear out the current chunk of text. - if (self->chunk && ( - position.bytes < self->chunk_start || - position.bytes >= self->chunk_start + self->chunk_size - )) { - ts_lexer__clear_chunk(self); - } - - self->lookahead_size = 0; - self->data.lookahead = '\0'; - } - - // If the given position is beyond any of included ranges, move to the EOF - // state - past the end of the included ranges. - else { - self->current_included_range_index = self->included_range_count; - TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1]; - self->current_position = (Length) { - .bytes = last_included_range->end_byte, - .extent = last_included_range->end_point, - }; - ts_lexer__clear_chunk(self); - self->lookahead_size = 1; - self->data.lookahead = '\0'; - } -} - void ts_lexer_set_input(Lexer *self, TSInput input) { self->input = input; ts_lexer__clear_chunk(self); @@ -315,6 +301,7 @@ self->token_start_position = self->current_position; self->token_end_position = LENGTH_UNDEFINED; self->data.result_symbol = 0; + self->did_get_column = false; if (!ts_lexer__eof(&self->data)) { if (!self->chunk_size) ts_lexer__get_chunk(self); if (!self->lookahead_size) ts_lexer__get_lookahead(self); diff -Nru rust-tree-sitter-0.17.1/src/lexer.h rust-tree-sitter-0.20.1/src/lexer.h --- rust-tree-sitter-0.17.1/src/lexer.h 2020-01-17 17:48:04.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/lexer.h 1973-11-29 21:33:09.000000000 +0000 @@ -17,16 +17,17 @@ Length token_end_position; TSRange *included_ranges; - size_t included_range_count; - size_t current_included_range_index; - const char *chunk; + TSInput input; + TSLogger logger; + + uint32_t included_range_count; + uint32_t current_included_range_index; uint32_t chunk_start; uint32_t chunk_size; uint32_t lookahead_size; + bool did_get_column; - TSInput input; - TSLogger logger; char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; } Lexer; diff -Nru rust-tree-sitter-0.17.1/src/node.c rust-tree-sitter-0.20.1/src/node.c --- rust-tree-sitter-0.17.1/src/node.c 2020-10-28 19:29:45.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/node.c 1973-11-29 21:33:09.000000000 +0000 @@ -150,9 +150,6 @@ while (ts_node_child_iterator_next(&iterator, &child)) { if (ts_node__is_relevant(child, include_anonymous)) { if (index == child_index) { - if (ts_node__is_relevant(self, true)) { - ts_tree_set_cached_parent(self.tree, &child, &self); - } return child; } index++; @@ -355,7 +352,6 @@ node = child; if (ts_node__is_relevant(node, include_anonymous)) { - ts_tree_set_cached_parent(self.tree, &child, &last_visible_node); last_visible_node = node; } did_descend = true; @@ -395,7 +391,6 @@ node = child; if (ts_node__is_relevant(node, include_anonymous)) { - ts_tree_set_cached_parent(self.tree, &child, &last_visible_node); last_visible_node = node; } did_descend = true; @@ -464,10 +459,7 @@ } TSNode ts_node_parent(TSNode self) { - TSNode node = ts_tree_get_cached_parent(self.tree, &self); - if (node.id) return node; - - node = ts_tree_root_node(self.tree); + TSNode node = ts_tree_root_node(self.tree); uint32_t end_byte = ts_node_end_byte(self); if (node.id == self.id) return ts_node__null(); @@ -486,7 +478,6 @@ if (iterator.position.bytes >= end_byte) { node = child; if (ts_node__is_relevant(child, true)) { - ts_tree_set_cached_parent(self.tree, &node, &last_visible_node); last_visible_node = node; } did_descend = true; @@ -561,17 +552,40 @@ return child; } - // If the field refers to a hidden node, return its first visible - // child. - else { + // If the field refers to a hidden node with visible children, + // return the first visible child. + else if (ts_node_child_count(child) > 0 ) { return ts_node_child(child, 0); } + + // Otherwise, continue searching subsequent children. + else { + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } } } return ts_node__null(); } +const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { + const TSFieldMapEntry *field_map_start = NULL, *field_map_end = NULL; + ts_language_field_map( + self.tree->language, + ts_node__subtree(self).ptr->production_id, + &field_map_start, + &field_map_end + ); + + for (const TSFieldMapEntry *i = field_map_start; i < field_map_end; i++) { + if (i->child_index == child_index) { + return self.tree->language->field_names[i->field_id]; + } + } + return NULL; +} + TSNode ts_node_child_by_field_name( TSNode self, const char *name, diff -Nru rust-tree-sitter-0.17.1/src/parser.c rust-tree-sitter-0.20.1/src/parser.c --- rust-tree-sitter-0.17.1/src/parser.c 2020-10-28 19:29:45.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/parser.c 1973-11-29 21:33:09.000000000 +0000 @@ -25,6 +25,36 @@ ts_parser__log(self); \ } +#define LOG_LOOKAHEAD(symbol_name, size) \ + if (self->lexer.logger.log || self->dot_graph_file) { \ + char *buf = self->lexer.debug_buffer; \ + const char *symbol = symbol_name; \ + int off = sprintf(buf, "lexed_lookahead sym:"); \ + for ( \ + int i = 0; \ + symbol[i] != '\0' \ + && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \ + i++ \ + ) { \ + switch (symbol[i]) { \ + case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \ + case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \ + case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \ + case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \ + case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \ + case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \ + default: buf[off++] = symbol[i]; break; \ + } \ + } \ + snprintf( \ + buf + off, \ + TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \ + ", size:%u", \ + size \ + ); \ + ts_parser__log(self); \ + } + #define LOG_STACK() \ if (self->dot_graph_file) { \ ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \ @@ -373,6 +403,7 @@ bool found_external_token = false; bool error_mode = parse_state == ERROR_STATE; bool skipped_error = false; + bool called_get_column = false; int32_t first_error_character = 0; Length error_start_position = length_zero(); Length error_end_position = length_zero(); @@ -386,7 +417,7 @@ LOG( "lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state, - current_position.extent.row + 1, + current_position.extent.row, current_position.extent.column ); ts_lexer_start(&self->lexer); @@ -415,6 +446,7 @@ (!error_mode && ts_stack_has_advanced_since_error(self->stack, version)) )) { found_external_token = true; + called_get_column = self->lexer.did_get_column; break; } @@ -424,7 +456,7 @@ LOG( "lex_internal state:%d, row:%u, column:%u", lex_mode.lex_state, - current_position.extent.row + 1, + current_position.extent.row, current_position.extent.column ); ts_lexer_start(&self->lexer); @@ -477,11 +509,9 @@ self->language ); - LOG( - "lexed_lookahead sym:%s, size:%u, character:'%c'", + LOG_LOOKAHEAD( SYM_NAME(ts_subtree_symbol(result)), - ts_subtree_total_size(result).bytes, - first_error_character + ts_subtree_total_size(result).bytes ); } else { if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) { @@ -518,6 +548,7 @@ lookahead_bytes, parse_state, found_external_token, + called_get_column, is_keyword, self->language ); @@ -534,8 +565,7 @@ ); } - LOG( - "lexed_lookahead sym:%s, size:%u", + LOG_LOOKAHEAD( SYM_NAME(ts_subtree_symbol(result)), ts_subtree_total_size(result).bytes ); @@ -756,17 +786,15 @@ Subtree lookahead, bool extra ) { - Subtree subtree_to_push; - if (extra != ts_subtree_extra(lookahead)) { + bool is_leaf = ts_subtree_child_count(lookahead) == 0; + Subtree subtree_to_push = lookahead; + if (extra != ts_subtree_extra(lookahead) && is_leaf) { MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead); - ts_subtree_set_extra(&result); + ts_subtree_set_extra(&result, extra); subtree_to_push = ts_subtree_from_mut(result); - } else { - subtree_to_push = lookahead; } - bool is_pending = ts_subtree_child_count(subtree_to_push) > 0; - ts_stack_push(self->stack, version, subtree_to_push, is_pending, state); + ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state); if (ts_subtree_has_external_tokens(subtree_to_push)) { ts_stack_set_last_external_token( self->stack, version, ts_subtree_last_external_token(subtree_to_push) @@ -985,15 +1013,15 @@ switch (action.type) { case TSParseActionTypeShift: case TSParseActionTypeRecover: - if (!action.params.shift.extra && !action.params.shift.repetition) has_shift_action = true; + if (!action.shift.extra && !action.shift.repetition) has_shift_action = true; break; case TSParseActionTypeReduce: - if (action.params.reduce.child_count > 0) + if (action.reduce.child_count > 0) ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){ - .symbol = action.params.reduce.symbol, - .count = action.params.reduce.child_count, - .dynamic_precedence = action.params.reduce.dynamic_precedence, - .production_id = action.params.reduce.production_id, + .symbol = action.reduce.symbol, + .count = action.reduce.child_count, + .dynamic_precedence = action.reduce.dynamic_precedence, + .production_id = action.reduce.production_id, }); break; default: @@ -1284,9 +1312,9 @@ // be counted in error cost calculations. unsigned n; const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); - if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.shift.extra) { + if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) { MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); - ts_subtree_set_extra(&mutable_lookahead); + ts_subtree_set_extra(&mutable_lookahead, true); lookahead = ts_subtree_from_mut(mutable_lookahead); } @@ -1414,17 +1442,13 @@ switch (action.type) { case TSParseActionTypeShift: { - if (action.params.shift.repetition) break; + if (action.shift.repetition) break; TSStateId next_state; - if (action.params.shift.extra) { - - // TODO: remove when TREE_SITTER_LANGUAGE_VERSION 9 is out. - if (state == ERROR_STATE) continue; - + if (action.shift.extra) { next_state = state; LOG("shift_extra"); } else { - next_state = action.params.shift.state; + next_state = action.shift.state; LOG("shift state:%u", next_state); } @@ -1433,7 +1457,7 @@ next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead)); } - ts_parser__shift(self, version, next_state, lookahead, action.params.shift.extra); + ts_parser__shift(self, version, next_state, lookahead, action.shift.extra); if (did_reuse) reusable_node_advance(&self->reusable_node); return true; } @@ -1441,10 +1465,10 @@ case TSParseActionTypeReduce: { bool is_fragile = table_entry.action_count > 1; bool end_of_non_terminal_extra = lookahead.ptr == NULL; - LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.reduce.symbol), action.params.reduce.child_count); + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count); StackVersion reduction_version = ts_parser__reduce( - self, version, action.params.reduce.symbol, action.params.reduce.child_count, - action.params.reduce.dynamic_precedence, action.params.reduce.production_id, + self, version, action.reduce.symbol, action.reduce.child_count, + action.reduce.dynamic_precedence, action.reduce.production_id, is_fragile, end_of_non_terminal_extra ); if (reduction_version != STACK_VERSION_NONE) { @@ -1858,7 +1882,7 @@ LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u", version, ts_stack_version_count(self->stack), ts_stack_state(self->stack, version), - ts_stack_position(self->stack, version).extent.row + 1, + ts_stack_position(self->stack, version).extent.row, ts_stack_position(self->stack, version).extent.column); if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL; diff -Nru rust-tree-sitter-0.17.1/src/point.h rust-tree-sitter-0.20.1/src/point.h --- rust-tree-sitter-0.17.1/src/point.h 2019-10-21 16:28:43.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/point.h 1973-11-29 21:33:09.000000000 +0000 @@ -33,6 +33,10 @@ return (a.row < b.row) || (a.row == b.row && a.column < b.column); } +static inline bool point_gt(TSPoint a, TSPoint b) { + return (a.row > b.row) || (a.row == b.row && a.column > b.column); +} + static inline bool point_eq(TSPoint a, TSPoint b) { return a.row == b.row && a.column == b.column; } diff -Nru rust-tree-sitter-0.17.1/src/query.c rust-tree-sitter-0.20.1/src/query.c --- rust-tree-sitter-0.17.1/src/query.c 2020-11-02 21:51:28.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/query.c 1973-11-29 21:33:09.000000000 +0000 @@ -1,7 +1,6 @@ #include "tree_sitter/api.h" #include "./alloc.h" #include "./array.h" -#include "./bits.h" #include "./language.h" #include "./point.h" #include "./tree_cursor.h" @@ -9,13 +8,12 @@ #include // #define DEBUG_ANALYZE_QUERY -// #define LOG(...) fprintf(stderr, __VA_ARGS__) -#define LOG(...) +// #define DEBUG_EXECUTE_QUERY -#define MAX_CAPTURE_LIST_COUNT 32 #define MAX_STEP_CAPTURE_COUNT 3 #define MAX_STATE_PREDECESSOR_COUNT 100 -#define MAX_ANALYSIS_STATE_DEPTH 12 +#define MAX_ANALYSIS_STATE_DEPTH 8 +#define MAX_NEGATED_FIELD_COUNT 8 /* * Stream - A sequence of unicode characters derived from a UTF8 string. @@ -31,9 +29,9 @@ /* * QueryStep - A step in the process of matching a query. Each node within - * a query S-expression maps to one of these steps. An entire pattern is - * represented as a sequence of these steps. Fields: - * + * a query S-expression corresponds to one of these steps. An entire pattern + * is represented as a sequence of these steps. The basic properties of a + * node are represented by these fields: * - `symbol` - The grammar symbol to match. A zero value represents the * wildcard symbol, '_'. * - `field` - The field name to match. A zero value means that a field name @@ -42,23 +40,64 @@ * associated with this node in the pattern, terminated by a `NONE` value. * - `depth` - The depth where this node occurs in the pattern. The root node * of the pattern has depth zero. - * - `alternative_index` - The index of a different query step that serves as - * an alternative to this step. + * - `negated_field_list_id` - An id representing a set of fields that must + * that must not be present on a node matching this step. + * + * Steps have some additional fields in order to handle the `.` (or "anchor") operator, + * which forbids additional child nodes: + * - `is_immediate` - Indicates that the node matching this step cannot be preceded + * by other sibling nodes that weren't specified in the pattern. + * - `is_last_child` - Indicates that the node matching this step cannot have any + * subsequent named siblings. + * + * For simple patterns, steps are matched in sequential order. But in order to + * handle alternative/repeated/optional sub-patterns, query steps are not always + * structured as a linear sequence; they sometimes need to split and merge. This + * is done using the following fields: + * - `alternative_index` - The index of a different query step that serves as + * an alternative to this step. A `NONE` value represents no alternative. + * When a query state reaches a step with an alternative index, the state + * is duplicated, with one copy remaining at the original step, and one copy + * moving to the alternative step. The alternative may have its own alternative + * step, so this splitting is an iterative process. + * - `is_dead_end` - Indicates that this state cannot be passed directly, and + * exists only in order to redirect to an alternative index, with no splitting. + * - `is_pass_through` - Indicates that state has no matching logic of its own, + * and exists only to split a state. One copy of the state advances immediately + * to the next step, and one moves to the alternative step. + * - `alternative_is_immediate` - Indicates that this step's alternative step + * should be treated as if `is_immediate` is true. + * + * Steps also store some derived state that summarizes how they relate to other + * steps within the same pattern. This is used to optimize the matching process: + * - `contains_captures` - Indicates that this step or one of its child steps + * has a non-empty `capture_ids` list. + * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then + * it and all of its subsequent sibling steps within the same parent pattern + * are guaranteed to match. + * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but + * for the entire top-level pattern. When iterating through a query's + * captures using `ts_query_cursor_next_capture`, this field is used to + * detect that a capture can safely be returned from a match that has not + * even completed yet. */ typedef struct { TSSymbol symbol; TSSymbol supertype_symbol; TSFieldId field; uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; - uint16_t alternative_index; uint16_t depth; - bool contains_captures: 1; + uint16_t alternative_index; + uint16_t negated_field_list_id; + bool is_named: 1; bool is_immediate: 1; bool is_last_child: 1; bool is_pass_through: 1; bool is_dead_end: 1; bool alternative_is_immediate: 1; - bool is_definite: 1; + bool contains_captures: 1; + bool root_pattern_guaranteed: 1; + bool parent_pattern_guaranteed: 1; } QueryStep; /* @@ -81,16 +120,20 @@ } SymbolTable; /* - * PatternEntry - Information about the starting point for matching a - * particular pattern, consisting of the index of the pattern within the query, - * and the index of the patter's first step in the shared `steps` array. These - * entries are stored in a 'pattern map' - a sorted array that makes it - * possible to efficiently lookup patterns based on the symbol for their first - * step. + * PatternEntry - Information about the starting point for matching a particular + * pattern. These entries are stored in a 'pattern map' - a sorted array that + * makes it possible to efficiently lookup patterns based on the symbol for their + * first step. The entry consists of the following fields: + * - `pattern_index` - the index of the pattern within the query + * - `step_index` - the index of the pattern's first step in the shared `steps` array + * - `is_rooted` - whether or not the pattern has a single root node. This property + * affects decisions about whether or not to start the pattern for nodes outside + * of a QueryCursor's range restriction. */ typedef struct { uint16_t step_index; uint16_t pattern_index; + bool is_rooted; } PatternEntry; typedef struct { @@ -126,14 +169,14 @@ * other states that have the same captures as this state, but are at * different steps in their pattern. This means that in order to obey the * 'longest-match' rule, this state should not be returned as a match until - * it is clear that there can be no longer match. + * it is clear that there can be no other alternative match with more captures. */ typedef struct { uint32_t id; + uint32_t capture_list_id; uint16_t start_depth; uint16_t step_index; uint16_t pattern_index; - uint16_t capture_list_id; uint16_t consumed_capture_count: 12; bool seeking_immediate_match: 1; bool has_in_progress_alternatives: 1; @@ -144,15 +187,23 @@ typedef Array(TSQueryCapture) CaptureList; /* - * CaptureListPool - A collection of *lists* of captures. Each QueryState - * needs to maintain its own list of captures. To avoid repeated allocations, - * the reuses a fixed set of capture lists, and keeps track of which ones - * are currently in use. + * CaptureListPool - A collection of *lists* of captures. Each query state needs + * to maintain its own list of captures. To avoid repeated allocations, this struct + * maintains a fixed set of capture lists, and keeps track of which ones are + * currently in use by a query state. */ typedef struct { - CaptureList list[MAX_CAPTURE_LIST_COUNT]; + Array(CaptureList) list; CaptureList empty_list; - uint32_t usage_map; + // The maximum number of capture lists that we are allowed to allocate. We + // never allow `list` to allocate more entries than this, dropping pending + // matches if needed to stay under the limit. + uint32_t max_capture_list_count; + // The number of capture lists allocated in `list` that are not currently in + // use. We reuse those existing-but-unused capture lists before trying to + // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture + // list's length to indicate that it's not in use. + uint32_t free_capture_list_count; } CaptureListPool; /* @@ -196,6 +247,8 @@ /* * StatePredecessorMap - A map that stores the predecessors of each parse state. + * This is used during query analysis to determine which parse states can lead + * to which reduce actions. */ typedef struct { TSStateId *contents; @@ -214,10 +267,10 @@ Array(TSQueryPredicateStep) predicate_steps; Array(QueryPattern) patterns; Array(StepOffset) step_offsets; + Array(TSFieldId) negated_fields; Array(char) string_buffer; const TSLanguage *language; uint16_t wildcard_root_pattern_count; - TSSymbol *symbol_map; }; /* @@ -232,18 +285,18 @@ uint32_t depth; uint32_t start_byte; uint32_t end_byte; - uint32_t next_state_id; TSPoint start_point; TSPoint end_point; + uint32_t next_state_id; bool ascending; bool halted; + bool did_exceed_match_limit; }; static const TSQueryError PARENT_DONE = -1; static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; static const uint16_t NONE = UINT16_MAX; static const TSSymbol WILDCARD_SYMBOL = 0; -static const TSSymbol NAMED_WILDCARD_SYMBOL = UINT16_MAX - 1; /********** * Stream @@ -331,54 +384,72 @@ static CaptureListPool capture_list_pool_new(void) { return (CaptureListPool) { + .list = array_new(), .empty_list = array_new(), - .usage_map = UINT32_MAX, + .max_capture_list_count = UINT32_MAX, + .free_capture_list_count = 0, }; } static void capture_list_pool_reset(CaptureListPool *self) { - self->usage_map = UINT32_MAX; - for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) { - array_clear(&self->list[i]); + for (uint16_t i = 0; i < self->list.size; i++) { + // This invalid size means that the list is not in use. + self->list.contents[i].size = UINT32_MAX; } + self->free_capture_list_count = self->list.size; } static void capture_list_pool_delete(CaptureListPool *self) { - for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) { - array_delete(&self->list[i]); + for (uint16_t i = 0; i < self->list.size; i++) { + array_delete(&self->list.contents[i]); } + array_delete(&self->list); } static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { - if (id >= MAX_CAPTURE_LIST_COUNT) return &self->empty_list; - return &self->list[id]; + if (id >= self->list.size) return &self->empty_list; + return &self->list.contents[id]; } static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { - assert(id < MAX_CAPTURE_LIST_COUNT); - return &self->list[id]; + assert(id < self->list.size); + return &self->list.contents[id]; } static bool capture_list_pool_is_empty(const CaptureListPool *self) { - return self->usage_map == 0; + // The capture list pool is empty if all allocated lists are in use, and we + // have reached the maximum allowed number of allocated lists. + return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; } static uint16_t capture_list_pool_acquire(CaptureListPool *self) { - // In the usage_map bitmask, ones represent free lists, and zeros represent - // lists that are in use. A free list id can quickly be found by counting - // the leading zeros in the usage map. An id of zero corresponds to the - // highest-order bit in the bitmask. - uint16_t id = count_leading_zeros(self->usage_map); - if (id >= MAX_CAPTURE_LIST_COUNT) return NONE; - self->usage_map &= ~bitmask_for_index(id); - array_clear(&self->list[id]); - return id; + // First see if any already allocated capture list is currently unused. + if (self->free_capture_list_count > 0) { + for (uint16_t i = 0; i < self->list.size; i++) { + if (self->list.contents[i].size == UINT32_MAX) { + array_clear(&self->list.contents[i]); + self->free_capture_list_count--; + return i; + } + } + } + + // Otherwise allocate and initialize a new capture list, as long as that + // doesn't put us over the requested maximum. + uint32_t i = self->list.size; + if (i >= self->max_capture_list_count) { + return NONE; + } + CaptureList list; + array_init(&list); + array_push(&self->list, list); + return i; } static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { - if (id >= MAX_CAPTURE_LIST_COUNT) return; - array_clear(&self->list[id]); - self->usage_map |= bitmask_for_index(id); + if (id >= self->list.size) return; + self->list.contents[id].size = UINT32_MAX; + self->free_capture_list_count++; } /************** @@ -455,11 +526,13 @@ .field = 0, .capture_ids = {NONE, NONE, NONE}, .alternative_index = NONE, + .negated_field_list_id = 0, .contains_captures = false, .is_last_child = false, + .is_named = false, .is_pass_through = false, .is_dead_end = false, - .is_definite = false, + .root_pattern_guaranteed = false, .is_immediate = is_immediate, .alternative_is_immediate = false, }; @@ -493,9 +566,14 @@ * StatePredecessorMap **********************/ -static inline StatePredecessorMap state_predecessor_map_new(const TSLanguage *language) { +static inline StatePredecessorMap state_predecessor_map_new( + const TSLanguage *language +) { return (StatePredecessorMap) { - .contents = ts_calloc(language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), sizeof(TSStateId)), + .contents = ts_calloc( + language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), + sizeof(TSStateId) + ), }; } @@ -510,7 +588,10 @@ ) { unsigned index = state * (MAX_STATE_PREDECESSOR_COUNT + 1); TSStateId *count = &self->contents[index]; - if (*count == 0 || (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)) { + if ( + *count == 0 || + (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor) + ) { (*count)++; self->contents[index + *count] = predecessor; } @@ -664,8 +745,7 @@ static inline void ts_query__pattern_map_insert( TSQuery *self, TSSymbol symbol, - uint32_t start_step_index, - uint32_t pattern_index + PatternEntry new_entry ) { uint32_t index; ts_query__pattern_map_search(self, symbol, &index); @@ -678,7 +758,7 @@ PatternEntry *entry = &self->pattern_map.contents[index]; if ( self->steps.contents[entry->step_index].symbol == symbol && - entry->pattern_index < pattern_index + entry->pattern_index < new_entry.pattern_index ) { index++; } else { @@ -686,32 +766,43 @@ } } - array_insert(&self->pattern_map, index, ((PatternEntry) { - .step_index = start_step_index, - .pattern_index = pattern_index, - })); + array_insert(&self->pattern_map, index, new_entry); } static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { - // Identify all of the patterns in the query that have child patterns, both at the - // top level and nested within other larger patterns. Record the step index where - // each pattern starts. + // Walk forward through all of the steps in the query, computing some + // basic information about each step. Mark all of the steps that contain + // captures, and record the indices of all of the steps that have child steps. Array(uint32_t) parent_step_indices = array_new(); for (unsigned i = 0; i < self->steps.size; i++) { QueryStep *step = &self->steps.contents[i]; - if (i + 1 < self->steps.size) { - QueryStep *next_step = &self->steps.contents[i + 1]; + if (step->depth == PATTERN_DONE_MARKER) { + step->parent_pattern_guaranteed = true; + step->root_pattern_guaranteed = true; + continue; + } + + bool has_children = false; + bool is_wildcard = step->symbol == WILDCARD_SYMBOL; + step->contains_captures = step->capture_ids[0] != NONE; + for (unsigned j = i + 1; j < self->steps.size; j++) { + QueryStep *next_step = &self->steps.contents[j]; if ( - step->symbol != WILDCARD_SYMBOL && - step->symbol != NAMED_WILDCARD_SYMBOL && - next_step->depth > step->depth && - next_step->depth != PATTERN_DONE_MARKER - ) { - array_push(&parent_step_indices, i); + next_step->depth == PATTERN_DONE_MARKER || + next_step->depth <= step->depth + ) break; + if (next_step->capture_ids[0] != NONE) { + step->contains_captures = true; } + if (!is_wildcard) { + next_step->root_pattern_guaranteed = true; + next_step->parent_pattern_guaranteed = true; + } + has_children = true; } - if (step->depth > 0) { - step->is_definite = true; + + if (has_children && !is_wildcard) { + array_push(&parent_step_indices, i); } } @@ -755,7 +846,7 @@ const TSSymbol *aliases, *aliases_end; ts_language_aliases_for_symbol( self->language, - action->params.reduce.symbol, + action->reduce.symbol, &aliases, &aliases_end ); @@ -772,20 +863,22 @@ if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { .state = state, - .production_id = action->params.reduce.production_id, - .child_index = action->params.reduce.child_count, + .production_id = action->reduce.production_id, + .child_index = action->reduce.child_count, .done = true, })); } } } - } else if (action->type == TSParseActionTypeShift && !action->params.shift.extra) { - TSStateId next_state = action->params.shift.state; + } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { + TSStateId next_state = action->shift.state; state_predecessor_map_add(&predecessor_map, next_state, state); } } - } else if (lookahead_iterator.next_state != 0 && lookahead_iterator.next_state != state) { - state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); + } else if (lookahead_iterator.next_state != 0) { + if (lookahead_iterator.next_state != state) { + state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); + } const TSSymbol *aliases, *aliases_end; ts_language_aliases_for_symbol( self->language, @@ -880,7 +973,7 @@ // For each non-terminal pattern, determine if the pattern can successfully match, // and identify all of the possible children within the pattern where matching could fail. - bool result = true; + bool all_patterns_are_valid = true; AnalysisStateSet states = array_new(); AnalysisStateSet next_states = array_new(); AnalysisStateSet deeper_states = array_new(); @@ -892,7 +985,7 @@ if (parent_symbol == ts_builtin_sym_error) continue; // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's - // root symbols is not a non-terminal, then return an error. + // root symbol is a terminal, then return an error. unsigned subgraph_index, exists; array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); if (!exists) { @@ -901,7 +994,7 @@ array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &i, &exists); assert(exists); *error_offset = self->step_offsets.contents[i].byte_offset; - result = false; + all_patterns_are_valid = false; break; } @@ -959,6 +1052,10 @@ } #endif + // If no further progress can be made within the current recursion depth limit, then + // bump the depth limit by one, and continue to process the states the exceeded the + // limit. But only allow this if progress has been made since the last time the depth + // limit was increased. if (states.size == 0) { if (deeper_states.size > 0 && final_step_indices.size > prev_final_step_count) { #ifdef DEBUG_ANALYZE_QUERY @@ -1016,24 +1113,27 @@ while (ts_lookahead_iterator_next(&lookahead_iterator)) { TSSymbol sym = lookahead_iterator.symbol; - TSStateId next_parse_state; + AnalysisSubgraphNode successor = { + .state = parse_state, + .child_index = child_index, + }; if (lookahead_iterator.action_count) { const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; - if (action->type == TSParseActionTypeShift && !action->params.shift.extra) { - next_parse_state = action->params.shift.state; + if (action->type == TSParseActionTypeShift) { + if (!action->shift.extra) { + successor.state = action->shift.state; + successor.child_index++; + } } else { continue; } - } else if (lookahead_iterator.next_state != 0 && lookahead_iterator.next_state != parse_state) { - next_parse_state = lookahead_iterator.next_state; + } else if (lookahead_iterator.next_state != 0) { + successor.state = lookahead_iterator.next_state; + successor.child_index++; } else { continue; } - AnalysisSubgraphNode successor = { - .state = next_parse_state, - .child_index = child_index + 1, - }; unsigned node_index; array_search_sorted_with( &subgraph->nodes, @@ -1064,20 +1164,25 @@ } } + // Create a new state that has advanced past this hypothetical subtree. AnalysisState next_state = *state; - analysis_state__top(&next_state)->child_index++; - analysis_state__top(&next_state)->parse_state = successor.state; - if (node->done) analysis_state__top(&next_state)->done = true; + AnalysisStateEntry *next_state_top = analysis_state__top(&next_state); + next_state_top->child_index = successor.child_index; + next_state_top->parse_state = successor.state; + if (node->done) next_state_top->done = true; // Determine if this hypothetical child node would match the current step // of the query pattern. bool does_match = false; if (visible_symbol) { does_match = true; - if (step->symbol == NAMED_WILDCARD_SYMBOL) { - if (!self->language->symbol_metadata[visible_symbol].named) does_match = false; - } else if (step->symbol != WILDCARD_SYMBOL) { - if (step->symbol != visible_symbol) does_match = false; + if (step->symbol == WILDCARD_SYMBOL) { + if ( + step->is_named && + !self->language->symbol_metadata[visible_symbol].named + ) does_match = false; + } else if (step->symbol != visible_symbol) { + does_match = false; } if (step->field && step->field != field_id) { does_match = false; @@ -1088,20 +1193,31 @@ ) does_match = false; } - // If this is a hidden child, then push a new entry to the stack, in order to - // walk through the children of this child. + // If this child is hidden, then descend into it and walk through its children. + // If the top entry of the stack is at the end of its rule, then that entry can + // be replaced. Otherwise, push a new entry onto the stack. else if (sym >= self->language->token_count) { - if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { - did_exceed_max_depth = true; - continue; + if (!next_state_top->done) { + if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { + #ifdef DEBUG_ANALYZE_QUERY + printf("Exceeded depth limit for state %u\n", j); + #endif + + did_exceed_max_depth = true; + continue; + } + + next_state.depth++; + next_state_top = analysis_state__top(&next_state); } - next_state.depth++; - analysis_state__top(&next_state)->parse_state = parse_state; - analysis_state__top(&next_state)->child_index = 0; - analysis_state__top(&next_state)->parent_symbol = sym; - analysis_state__top(&next_state)->field_id = field_id; - analysis_state__top(&next_state)->done = false; + *next_state_top = (AnalysisStateEntry) { + .parse_state = parse_state, + .parent_symbol = sym, + .child_index = 0, + .field_id = field_id, + .done = false, + }; if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { array_insert_sorted_with(&deeper_states, analysis_state__compare, next_state); @@ -1110,8 +1226,9 @@ } // Pop from the stack when this state reached the end of its current syntax node. - while (next_state.depth > 0 && analysis_state__top(&next_state)->done) { + while (next_state.depth > 0 && next_state_top->done) { next_state.depth--; + next_state_top = analysis_state__top(&next_state); } // If this hypothetical child did match the current step of the query pattern, @@ -1127,11 +1244,23 @@ next_step->depth <= parent_depth + 1 ) break; } + } else if (successor.state == parse_state) { + continue; } for (;;) { - // If this state can make further progress, then add it to the states for the next iteration. - // Otherwise, record the fact that matching can fail at this step of the pattern. + // Skip pass-through states. Although these states have alternatives, they are only + // used to implement repetitions, and query analysis does not need to process + // repetitions in order to determine whether steps are possible and definite. + if (next_step->is_pass_through) { + next_state.step_index++; + next_step++; + continue; + } + + // If the pattern is finished or hypothetical parent node is complete, then + // record that matching can terminate at this step of the pattern. Otherwise, + // add this state to the list of states to process on the next iteration. if (!next_step->is_dead_end) { bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1; if (did_finish_pattern) can_finish_pattern = true; @@ -1142,8 +1271,10 @@ } } - // If the state has advanced to a step with an alternative step, then add another state at - // that alternative step to the next iteration. + // If the state has advanced to a step with an alternative step, then add another state + // at that alternative step. This process is simpler than the process of actually matching a + // pattern during query exection, because for the purposes of query analysis, there is no + // need to process repetitions. if ( does_match && next_step->alternative_index != NONE && @@ -1174,7 +1305,8 @@ step->depth > parent_depth && !step->is_dead_end ) { - step->is_definite = false; + step->parent_pattern_guaranteed = false; + step->root_pattern_guaranteed = false; } } @@ -1186,21 +1318,22 @@ step->depth == PATTERN_DONE_MARKER ) break; if (!step->is_dead_end) { - step->is_definite = false; + step->parent_pattern_guaranteed = false; + step->root_pattern_guaranteed = false; } } } // If this pattern cannot match, store the pattern index so that it can be // returned to the caller. - if (result && !can_finish_pattern && !did_exceed_max_depth) { + if (all_patterns_are_valid && !can_finish_pattern && !did_exceed_max_depth) { assert(final_step_indices.size > 0); uint16_t impossible_step_index = *array_back(&final_step_indices); uint32_t i, exists; array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists); - assert(exists); + if (i >= self->step_offsets.size) i = self->step_offsets.size - 1; *error_offset = self->step_offsets.contents[i].byte_offset; - result = false; + all_patterns_are_valid = false; break; } } @@ -1236,25 +1369,27 @@ unsigned index, exists; array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists); if (exists) { - step->is_definite = false; + step->root_pattern_guaranteed = false; break; } } } } - // Propagate indefiniteness backwards. + // Propagate fallibility. If a pattern is fallible at a given step, then it is + // fallible at all of its preceding steps. bool done = self->steps.size == 0; while (!done) { done = true; for (unsigned i = self->steps.size - 1; i > 0; i--) { QueryStep *step = &self->steps.contents[i]; + if (step->depth == PATTERN_DONE_MARKER) continue; // Determine if this step is definite or has definite alternatives. - bool is_definite = false; + bool parent_pattern_guaranteed = false; for (;;) { - if (step->is_definite) { - is_definite = true; + if (step->root_pattern_guaranteed) { + parent_pattern_guaranteed = true; break; } if (step->alternative_index == NONE || step->alternative_index < i) { @@ -1264,14 +1399,14 @@ } // If not, mark its predecessor as indefinite. - if (!is_definite) { + if (!parent_pattern_guaranteed) { QueryStep *prev_step = &self->steps.contents[i - 1]; if ( !prev_step->is_dead_end && prev_step->depth != PATTERN_DONE_MARKER && - prev_step->is_definite + prev_step->root_pattern_guaranteed ) { - prev_step->is_definite = false; + prev_step->root_pattern_guaranteed = false; done = false; } } @@ -1286,13 +1421,15 @@ printf(" %u: DONE\n", i); } else { printf( - " %u: {symbol: %s, field: %s, is_definite: %d}\n", + " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n", i, - (step->symbol == WILDCARD_SYMBOL || step->symbol == NAMED_WILDCARD_SYMBOL) + (step->symbol == WILDCARD_SYMBOL) ? "ANY" : ts_language_symbol_name(self->language, step->symbol), (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"), - step->is_definite + step->depth, + step->parent_pattern_guaranteed, + step->root_pattern_guaranteed ); } } @@ -1313,24 +1450,59 @@ array_delete(&predicate_capture_ids); state_predecessor_map_delete(&predecessor_map); - return result; + return all_patterns_are_valid; } -static void ts_query__finalize_steps(TSQuery *self) { - for (unsigned i = 0; i < self->steps.size; i++) { - QueryStep *step = &self->steps.contents[i]; - uint32_t depth = step->depth; - if (step->capture_ids[0] != NONE) { - step->contains_captures = true; - } else { - step->contains_captures = false; - for (unsigned j = i + 1; j < self->steps.size; j++) { - QueryStep *s = &self->steps.contents[j]; - if (s->depth == PATTERN_DONE_MARKER || s->depth <= depth) break; - if (s->capture_ids[0] != NONE) step->contains_captures = true; +static void ts_query__add_negated_fields( + TSQuery *self, + uint16_t step_index, + TSFieldId *field_ids, + uint16_t field_count +) { + QueryStep *step = &self->steps.contents[step_index]; + + // The negated field array stores a list of field lists, separated by zeros. + // Try to find the start index of an existing list that matches this new list. + bool failed_match = false; + unsigned match_count = 0; + unsigned start_i = 0; + for (unsigned i = 0; i < self->negated_fields.size; i++) { + TSFieldId existing_field_id = self->negated_fields.contents[i]; + + // At each zero value, terminate the match attempt. If we've exactly + // matched the new field list, then reuse this index. Otherwise, + // start over the matching process. + if (existing_field_id == 0) { + if (match_count == field_count) { + step->negated_field_list_id = start_i; + return; + } else { + start_i = i + 1; + match_count = 0; + failed_match = false; } } + + // If the existing list matches our new list so far, then advance + // to the next element of the new list. + else if ( + match_count < field_count && + existing_field_id == field_ids[match_count] && + !failed_match + ) { + match_count++; + } + + // Otherwise, this existing list has failed to match. + else { + match_count = 0; + failed_match = true; + } } + + step->negated_field_list_id = self->negated_fields.size; + array_extend(&self->negated_fields, field_count, field_ids); + array_push(&self->negated_fields, 0); } static TSQueryError ts_query__parse_string_literal( @@ -1531,10 +1703,14 @@ is_immediate ); - if (e == PARENT_DONE && stream->next == ']' && branch_step_indices.size > 0) { - stream_advance(stream); - break; - } else if (e) { + if (e == PARENT_DONE) { + if (stream->next == ']' && branch_step_indices.size > 0) { + stream_advance(stream); + break; + } + e = TSQueryErrorSyntax; + } + if (e) { array_delete(&branch_step_indices); return e; } @@ -1582,12 +1758,14 @@ depth, child_is_immediate ); - if (e == PARENT_DONE && stream->next == ')') { - stream_advance(stream); - break; - } else if (e) { - return e; + if (e == PARENT_DONE) { + if (stream->next == ')') { + stream_advance(stream); + break; + } + e = TSQueryErrorSyntax; } + if (e) return e; child_is_immediate = false; } @@ -1603,15 +1781,8 @@ else { TSSymbol symbol; - // TODO - remove. - // For temporary backward compatibility, handle '*' as a wildcard. - if (stream->next == '*') { - symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL; - stream_advance(stream); - } - // Parse a normal node name - else if (stream_is_ident_start(stream)) { + if (stream_is_ident_start(stream)) { const char *node_name = stream->input; stream_scan_identifier(stream); uint32_t length = stream->input - node_name; @@ -1625,7 +1796,7 @@ // Parse the wildcard symbol else if (length == 1 && node_name[0] == '_') { - symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL; + symbol = WILDCARD_SYMBOL; } else { @@ -1646,10 +1817,13 @@ // Add a step for the node. array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); + QueryStep *step = array_back(&self->steps); if (ts_language_symbol_metadata(self->language, symbol).supertype) { - QueryStep *step = array_back(&self->steps); step->supertype_symbol = step->symbol; - step->symbol = NAMED_WILDCARD_SYMBOL; + step->symbol = WILDCARD_SYMBOL; + } + if (symbol == WILDCARD_SYMBOL) { + step->is_named = true; } stream_skip_whitespace(stream); @@ -1664,7 +1838,6 @@ stream_scan_identifier(stream); uint32_t length = stream->input - node_name; - QueryStep *step = array_back(&self->steps); step->symbol = ts_language_symbol_for_name( self->language, node_name, @@ -1681,43 +1854,84 @@ // Parse the child patterns bool child_is_immediate = false; - uint16_t child_start_step_index = self->steps.size; + uint16_t last_child_step_index = 0; + uint16_t negated_field_count = 0; + TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT]; for (;;) { + // Parse a negated field assertion + if (stream->next == '!') { + stream_advance(stream); + stream_skip_whitespace(stream); + if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + const char *field_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = stream->input - field_name; + stream_skip_whitespace(stream); + + TSFieldId field_id = ts_language_field_id_for_name( + self->language, + field_name, + length + ); + if (!field_id) { + stream->input = field_name; + return TSQueryErrorField; + } + + // Keep the field ids sorted. + if (negated_field_count < MAX_NEGATED_FIELD_COUNT) { + negated_field_ids[negated_field_count] = field_id; + negated_field_count++; + } + + continue; + } + + // Parse a sibling anchor if (stream->next == '.') { child_is_immediate = true; stream_advance(stream); stream_skip_whitespace(stream); } + uint16_t step_index = self->steps.size; TSQueryError e = ts_query__parse_pattern( self, stream, depth + 1, child_is_immediate ); - if (e == PARENT_DONE && stream->next == ')') { - if (child_is_immediate) { - self->steps.contents[child_start_step_index].is_last_child = true; + if (e == PARENT_DONE) { + if (stream->next == ')') { + if (child_is_immediate) { + if (last_child_step_index == 0) return TSQueryErrorSyntax; + self->steps.contents[last_child_step_index].is_last_child = true; + } + + if (negated_field_count) { + ts_query__add_negated_fields( + self, + starting_step_index, + negated_field_ids, + negated_field_count + ); + } + + stream_advance(stream); + break; } - stream_advance(stream); - break; - } else if (e) { - return e; + e = TSQueryErrorSyntax; } + if (e) return e; + last_child_step_index = step_index; child_is_immediate = false; } } } // Parse a wildcard pattern - else if ( - stream->next == '_' || - - // TODO remove. - // For temporary backward compatibility, handle '*' as a wildcard. - stream->next == '*' - ) { + else if (stream->next == '_') { stream_advance(stream); stream_skip_whitespace(stream); @@ -1806,8 +2020,6 @@ // Parse suffixes modifiers for this pattern for (;;) { - QueryStep *step = &self->steps.contents[starting_step_index]; - // Parse the one-or-more operator. if (stream->next == '+') { stream_advance(stream); @@ -1831,6 +2043,7 @@ repeat_step.alternative_is_immediate = true; array_push(&self->steps, repeat_step); + QueryStep *step = &self->steps.contents[starting_step_index]; while (step->alternative_index != NONE) { step = &self->steps.contents[step->alternative_index]; } @@ -1842,6 +2055,7 @@ stream_advance(stream); stream_skip_whitespace(stream); + QueryStep *step = &self->steps.contents[starting_step_index]; while (step->alternative_index != NONE) { step = &self->steps.contents[step->alternative_index]; } @@ -1866,6 +2080,7 @@ uint32_t step_index = starting_step_index; for (;;) { + QueryStep *step = &self->steps.contents[step_index]; query_step__add_capture(step, capture_id); if ( step->alternative_index != NONE && @@ -1896,31 +2111,13 @@ uint32_t *error_offset, TSQueryError *error_type ) { - TSSymbol *symbol_map; - if (ts_language_version(language) >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) { - symbol_map = NULL; - } else { - // Work around the fact that multiple symbols can currently be - // associated with the same name, due to "simple aliases". - // In the next language ABI version, this map will be contained - // in the language's `public_symbol_map` field. - uint32_t symbol_count = ts_language_symbol_count(language); - symbol_map = ts_malloc(sizeof(TSSymbol) * symbol_count); - for (unsigned i = 0; i < symbol_count; i++) { - const char *name = ts_language_symbol_name(language, i); - const TSSymbolType symbol_type = ts_language_symbol_type(language, i); - - symbol_map[i] = i; - - for (unsigned j = 0; j < i; j++) { - if (ts_language_symbol_type(language, j) == symbol_type) { - if (!strcmp(name, ts_language_symbol_name(language, j))) { - symbol_map[i] = j; - break; - } - } - } - } + if ( + !language || + language->version > TREE_SITTER_LANGUAGE_VERSION || + language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION + ) { + *error_type = TSQueryErrorLanguage; + return NULL; } TSQuery *self = ts_malloc(sizeof(TSQuery)); @@ -1933,11 +2130,13 @@ .patterns = array_new(), .step_offsets = array_new(), .string_buffer = array_new(), - .symbol_map = symbol_map, + .negated_fields = array_new(), .wildcard_root_pattern_count = 0, .language = language, }; + array_push(&self->negated_fields, 0); + // Parse all of the S-expressions in the given string. Stream stream = stream_new(source, source_len); stream_skip_whitespace(&stream); @@ -1975,7 +2174,7 @@ // then optimize the matching process by skipping matching the wildcard. // Later, during the matching process, the query cursor will check that // there is a parent node, and capture it if necessary. - if (step->symbol == WILDCARD_SYMBOL && step->depth == 0) { + if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) { QueryStep *second_step = &self->steps.contents[start_step_index + 1]; if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1) { wildcard_root_alternative_index = step->alternative_index; @@ -1984,7 +2183,24 @@ } } - ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index); + // Determine whether the pattern has a single root node. This affects + // decisions about whether or not to start matching the pattern when + // a query cursor has a range restriction. + bool is_rooted = true; + uint32_t start_depth = step->depth; + for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) { + QueryStep *step = &self->steps.contents[step_index]; + if (step->depth == start_depth) { + is_rooted = false; + break; + } + } + + ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) { + .step_index = start_step_index, + .pattern_index = pattern_index, + .is_rooted = is_rooted + }); if (step->symbol == WILDCARD_SYMBOL) { self->wildcard_root_pattern_count++; } @@ -2003,15 +2219,12 @@ } } - if (self->language->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT) { - if (!ts_query__analyze_patterns(self, error_offset)) { - *error_type = TSQueryErrorStructure; - ts_query_delete(self); - return NULL; - } + if (!ts_query__analyze_patterns(self, error_offset)) { + *error_type = TSQueryErrorStructure; + ts_query_delete(self); + return NULL; } - ts_query__finalize_steps(self); array_delete(&self->string_buffer); return self; } @@ -2024,9 +2237,9 @@ array_delete(&self->patterns); array_delete(&self->step_offsets); array_delete(&self->string_buffer); + array_delete(&self->negated_fields); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); - ts_free(self->symbol_map); ts_free(self); } } @@ -2079,7 +2292,7 @@ return self->patterns.contents[pattern_index].start_byte; } -bool ts_query_step_is_definite( +bool ts_query_is_pattern_guaranteed_at_step( const TSQuery *self, uint32_t byte_offset ) { @@ -2090,12 +2303,26 @@ step_index = step_offset->step_index; } if (step_index < self->steps.size) { - return self->steps.contents[step_index].is_definite; + return self->steps.contents[step_index].root_pattern_guaranteed; } else { return false; } } +bool ts_query__step_is_fallible( + const TSQuery *self, + uint16_t step_index +) { + assert((uint32_t)step_index + 1 < self->steps.size); + QueryStep *step = &self->steps.contents[step_index]; + QueryStep *next_step = &self->steps.contents[step_index + 1]; + return ( + next_step->depth != PATTERN_DONE_MARKER && + next_step->depth > step->depth && + !next_step->parent_pattern_guaranteed + ); +} + void ts_query_disable_capture( TSQuery *self, const char *name, @@ -2109,7 +2336,6 @@ QueryStep *step = &self->steps.contents[i]; query_step__remove_capture(step, id); } - ts_query__finalize_steps(self); } } @@ -2135,6 +2361,7 @@ TSQueryCursor *ts_query_cursor_new(void) { TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor)); *self = (TSQueryCursor) { + .did_exceed_match_limit = false, .ascending = false, .halted = false, .states = array_new(), @@ -2158,6 +2385,18 @@ ts_free(self); } +bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) { + return self->did_exceed_match_limit; +} + +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) { + return self->capture_list_pool.max_capture_list_count; +} + +void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { + self->capture_list_pool.max_capture_list_count = limit; +} + void ts_query_cursor_exec( TSQueryCursor *self, const TSQuery *query, @@ -2172,6 +2411,7 @@ self->ascending = false; self->halted = false; self->query = query; + self->did_exceed_match_limit = false; } void ts_query_cursor_set_byte_range( @@ -2180,7 +2420,6 @@ uint32_t end_byte ) { if (end_byte == 0) { - start_byte = 0; end_byte = UINT32_MAX; } self->start_byte = start_byte; @@ -2193,7 +2432,6 @@ TSPoint end_point ) { if (end_point.row == 0 && end_point.column == 0) { - start_point = POINT_ZERO; end_point = POINT_MAX; } self->start_point = start_point; @@ -2207,38 +2445,51 @@ uint32_t *state_index, uint32_t *byte_offset, uint32_t *pattern_index, - bool *is_definite + bool *root_pattern_guaranteed ) { bool result = false; *state_index = UINT32_MAX; *byte_offset = UINT32_MAX; *pattern_index = UINT32_MAX; for (unsigned i = 0; i < self->states.size; i++) { - const QueryState *state = &self->states.contents[i]; + QueryState *state = &self->states.contents[i]; if (state->dead) continue; + const CaptureList *captures = capture_list_pool_get( &self->capture_list_pool, state->capture_list_id ); - if (captures->size > state->consumed_capture_count) { - uint32_t capture_byte = ts_node_start_byte(captures->contents[state->consumed_capture_count].node); - if ( - !result || - capture_byte < *byte_offset || - (capture_byte == *byte_offset && state->pattern_index < *pattern_index) - ) { - QueryStep *step = &self->query->steps.contents[state->step_index]; - if (is_definite) { - *is_definite = step->is_definite; - } else if (step->is_definite) { - continue; - } + if (state->consumed_capture_count >= captures->size) { + continue; + } - result = true; - *state_index = i; - *byte_offset = capture_byte; - *pattern_index = state->pattern_index; + TSNode node = captures->contents[state->consumed_capture_count].node; + if ( + ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point) + ) { + state->consumed_capture_count++; + i--; + continue; + } + + uint32_t node_start_byte = ts_node_start_byte(node); + if ( + !result || + node_start_byte < *byte_offset || + (node_start_byte == *byte_offset && state->pattern_index < *pattern_index) + ) { + QueryStep *step = &self->query->steps.contents[state->step_index]; + if (root_pattern_guaranteed) { + *root_pattern_guaranteed = step->root_pattern_guaranteed; + } else if (step->root_pattern_guaranteed) { + continue; } + + result = true; + *state_index = i; + *byte_offset = node_start_byte; + *pattern_index = state->pattern_index; } } return result; @@ -2317,6 +2568,12 @@ } } +#ifdef DEBUG_EXECUTE_QUERY +#define LOG(...) fprintf(stderr, __VA_ARGS__) +#else +#define LOG(...) +#endif + static void ts_query_cursor__add_state( TSQueryCursor *self, const PatternEntry *pattern @@ -2348,12 +2605,13 @@ QueryState *prev_state = &self->states.contents[index - 1]; if (prev_state->start_depth < start_depth) break; if (prev_state->start_depth == start_depth) { - if (prev_state->pattern_index < pattern->pattern_index) break; - if (prev_state->pattern_index == pattern->pattern_index) { - // Avoid inserting an unnecessary duplicate state, which would be - // immediately pruned by the longest-match criteria. - if (prev_state->step_index == pattern->step_index) return; - } + // Avoid inserting an unnecessary duplicate state, which would be + // immediately pruned by the longest-match criteria. + if ( + prev_state->pattern_index == pattern->pattern_index && + prev_state->step_index == pattern->step_index + ) return; + if (prev_state->pattern_index <= pattern->pattern_index) break; } index--; } @@ -2364,6 +2622,7 @@ pattern->step_index ); array_insert(&self->states, index, ((QueryState) { + .id = UINT32_MAX, .capture_list_id = NONE, .step_index = pattern->step_index, .pattern_index = pattern->pattern_index, @@ -2391,6 +2650,7 @@ // state has captured the earliest node in the document, and steal its // capture list. if (state->capture_list_id == NONE) { + self->did_exceed_match_limit = true; uint32_t state_index, byte_offset, pattern_index; if ( ts_query_cursor__first_in_progress_capture( @@ -2504,7 +2764,11 @@ // Exit the current node. if (self->ascending) { - LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor))); + LOG( + "leave node. depth:%u, type:%s\n", + self->depth, + ts_node_type(ts_tree_cursor_current_node(&self->cursor)) + ); // Leave this node by stepping to its next sibling or to its parent. if (ts_tree_cursor_goto_next_sibling(&self->cursor)) { @@ -2512,7 +2776,7 @@ } else if (ts_tree_cursor_goto_parent(&self->cursor)) { self->depth--; } else { - LOG("halt at root"); + LOG("halt at root\n"); self->halted = true; } @@ -2527,7 +2791,6 @@ if (step->depth == PATTERN_DONE_MARKER) { if (state->start_depth > self->depth || self->halted) { LOG(" finish pattern %u\n", state->pattern_index); - state->id = self->next_state_id++; array_push(&self->finished_states, *state); did_match = true; deleted_count++; @@ -2560,34 +2823,11 @@ // Enter a new node. else { - // If this node is before the selected range, then avoid descending into it. - TSNode node = ts_tree_cursor_current_node(&self->cursor); - if ( - ts_node_end_byte(node) <= self->start_byte || - point_lte(ts_node_end_point(node), self->start_point) - ) { - if (!ts_tree_cursor_goto_next_sibling(&self->cursor)) { - self->ascending = true; - } - continue; - } - - // If this node is after the selected range, then stop walking. - if ( - self->end_byte <= ts_node_start_byte(node) || - point_lte(self->end_point, ts_node_start_point(node)) - ) { - LOG("halt at end of range"); - self->halted = true; - continue; - } - // Get the properties of the current node. + TSNode node = ts_tree_cursor_current_node(&self->cursor); + TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); TSSymbol symbol = ts_node_symbol(node); bool is_named = ts_node_is_named(node); - if (symbol != ts_builtin_sym_error && self->query->symbol_map) { - symbol = self->query->symbol_map[symbol]; - } bool has_later_siblings; bool has_later_named_siblings; bool can_have_later_siblings_with_this_field; @@ -2604,7 +2844,8 @@ &supertype_count ); LOG( - "enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", + "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", + self->depth, ts_node_type(node), ts_language_field_name_for_id(self->query->language, field_id), ts_node_start_point(node).row, @@ -2612,28 +2853,51 @@ self->finished_states.size ); - // Add new states for any patterns whose root node is a wildcard. + bool node_intersects_range = ( + ts_node_end_byte(node) > self->start_byte && + ts_node_start_byte(node) < self->end_byte && + point_gt(ts_node_end_point(node), self->start_point) && + point_lt(ts_node_start_point(node), self->end_point) + ); + + bool parent_intersects_range = ts_node_is_null(parent_node) || ( + ts_node_end_byte(parent_node) > self->start_byte && + ts_node_start_byte(parent_node) < self->end_byte && + point_gt(ts_node_end_point(parent_node), self->start_point) && + point_lt(ts_node_start_point(parent_node), self->end_point) + ); + + // Add new states for any patterns whose root node is a wildcard. for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { PatternEntry *pattern = &self->query->pattern_map.contents[i]; - QueryStep *step = &self->query->steps.contents[pattern->step_index]; // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. - if (step->field && field_id != step->field) continue; - if (step->supertype_symbol && !supertype_count) continue; - ts_query_cursor__add_state(self, pattern); + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + if ( + (node_intersects_range || (!pattern->is_rooted && parent_intersects_range)) && + (!step->field || field_id == step->field) && + (!step->supertype_symbol || supertype_count > 0) + ) { + ts_query_cursor__add_state(self, pattern); + } } // Add new states for any patterns whose root node matches this node. unsigned i; if (ts_query__pattern_map_search(self->query, symbol, &i)) { PatternEntry *pattern = &self->query->pattern_map.contents[i]; + QueryStep *step = &self->query->steps.contents[pattern->step_index]; do { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. - if (step->field && field_id != step->field) continue; - ts_query_cursor__add_state(self, pattern); + if ( + (node_intersects_range || (!pattern->is_rooted && parent_intersects_range)) && + (!step->field || field_id == step->field) + ) { + ts_query_cursor__add_state(self, pattern); + } // Advance to the next pattern whose root node matches this node. i++; @@ -2657,10 +2921,12 @@ // Determine if this node matches this step of the pattern, and also // if this node can have later siblings that match this step of the // pattern. - bool node_does_match = - step->symbol == symbol || - step->symbol == WILDCARD_SYMBOL || - (step->symbol == NAMED_WILDCARD_SYMBOL && is_named); + bool node_does_match = false; + if (step->symbol == WILDCARD_SYMBOL) { + node_does_match = is_named || !step->is_named; + } else { + node_does_match = symbol == step->symbol; + } bool later_sibling_can_match = has_later_siblings; if ((step->is_immediate && is_named) || state->seeking_immediate_match) { later_sibling_can_match = false; @@ -2688,6 +2954,22 @@ } } + if (step->negated_field_list_id) { + TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; + for (;;) { + TSFieldId negated_field_id = *negated_field_ids; + if (negated_field_id) { + negated_field_ids++; + if (ts_node_child_by_field_id(node, negated_field_id).id) { + node_does_match = false; + break; + } + } else { + break; + } + } + } + // Remove states immediately if it is ever clear that they cannot match. if (!node_does_match) { if (!later_sibling_can_match) { @@ -2711,7 +2993,10 @@ // parent, then this query state cannot simply be updated in place. It must be // split into two states: one that matches this node, and one which skips over // this node, to preserve the possibility of matching later siblings. - if (later_sibling_can_match && step->contains_captures) { + if (later_sibling_can_match && ( + step->contains_captures || + ts_query__step_is_fallible(self->query, state->step_index) + )) { if (ts_query_cursor__copy_state(self, &state)) { LOG( " split state for capture. pattern:%u, step:%u\n", @@ -2773,7 +3058,7 @@ ); QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (stop_on_definite_step && next_step->is_definite) did_match = true; + if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true; // If this state's next step has an alternative step, then copy the state in order // to pursue both alternatives. The alternative step itself may have an alternative, @@ -2884,7 +3169,7 @@ } } - // If there the state is at the end of its pattern, remove it from the list + // If the state is at the end of its pattern, remove it from the list // of in-progress states and add it to the list of finished states. if (!did_remove) { LOG( @@ -2900,7 +3185,6 @@ LOG(" defer finishing pattern %u\n", state->pattern_index); } else { LOG(" finish pattern %u\n", state->pattern_index); - state->id = self->next_state_id++; array_push(&self->finished_states, *state); array_erase(&self->states, state - self->states.contents); did_match = true; @@ -2910,8 +3194,32 @@ } } - // Continue descending if possible. - if (ts_tree_cursor_goto_first_child(&self->cursor)) { + // When the current node ends prior to the desired start offset, + // only descend for the purpose of continuing in-progress matches. + bool should_descend = node_intersects_range; + if (!should_descend) { + for (unsigned i = 0; i < self->states.size; i++) { + QueryState *state = &self->states.contents[i];; + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if ( + next_step->depth != PATTERN_DONE_MARKER && + state->start_depth + next_step->depth > self->depth + ) { + should_descend = true; + break; + } + } + } + + if (!should_descend) { + LOG( + " not descending. node end byte: %u, start byte: %u\n", + ts_node_end_byte(node), + self->start_byte + ); + } + + if (should_descend && ts_tree_cursor_goto_first_child(&self->cursor)) { self->depth++; } else { self->ascending = true; @@ -2931,6 +3239,7 @@ } QueryState *state = &self->finished_states.contents[0]; + if (state->id == UINT32_MAX) state->id = self->next_state_id++; match->id = state->id; match->pattern_index = state->pattern_index; const CaptureList *captures = capture_list_pool_get( @@ -2988,35 +3297,43 @@ QueryState *first_finished_state = NULL; uint32_t first_finished_capture_byte = first_unfinished_capture_byte; uint32_t first_finished_pattern_index = first_unfinished_pattern_index; - for (unsigned i = 0; i < self->finished_states.size; i++) { + for (unsigned i = 0; i < self->finished_states.size;) { QueryState *state = &self->finished_states.contents[i]; const CaptureList *captures = capture_list_pool_get( &self->capture_list_pool, state->capture_list_id ); - if (captures->size > state->consumed_capture_count) { - uint32_t capture_byte = ts_node_start_byte( - captures->contents[state->consumed_capture_count].node - ); - if ( - capture_byte < first_finished_capture_byte || - ( - capture_byte == first_finished_capture_byte && - state->pattern_index < first_finished_pattern_index - ) - ) { - first_finished_state = state; - first_finished_capture_byte = capture_byte; - first_finished_pattern_index = state->pattern_index; - } - } else { + + // Remove states whose captures are all consumed. + if (state->consumed_capture_count >= captures->size) { capture_list_pool_release( &self->capture_list_pool, state->capture_list_id ); array_erase(&self->finished_states, i); - i--; + continue; + } + + // Skip captures that precede the cursor's start byte. + TSNode node = captures->contents[state->consumed_capture_count].node; + if (ts_node_end_byte(node) <= self->start_byte) { + state->consumed_capture_count++; + continue; + } + + uint32_t node_start_byte = ts_node_start_byte(node); + if ( + node_start_byte < first_finished_capture_byte || + ( + node_start_byte == first_finished_capture_byte && + state->pattern_index < first_finished_pattern_index + ) + ) { + first_finished_state = state; + first_finished_capture_byte = node_start_byte; + first_finished_pattern_index = state->pattern_index; } + i++; } // If there is finished capture that is clearly before any unfinished @@ -3032,6 +3349,7 @@ } if (state) { + if (state->id == UINT32_MAX) state->id = self->next_state_id++; match->id = state->id; match->pattern_index = state->pattern_index; const CaptureList *captures = capture_list_pool_get( diff -Nru rust-tree-sitter-0.17.1/src/stack.c rust-tree-sitter-0.20.1/src/stack.c --- rust-tree-sitter-0.17.1/src/stack.c 2020-10-28 19:29:45.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/stack.c 1973-11-29 21:33:09.000000000 +0000 @@ -737,7 +737,7 @@ if (head->status == StackStatusHalted) continue; fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i); - fprintf(f, "node_head_%u -> node_%p [", i, head->node); + fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node); if (head->status == StackStatusPaused) { fprintf(f, "color=red "); @@ -782,7 +782,7 @@ if (!node) continue; all_iterators_done = false; - fprintf(f, "node_%p [", node); + fprintf(f, "node_%p [", (void *)node); if (node->state == ERROR_STATE) { fprintf(f, "label=\"?\""); } else if ( @@ -807,7 +807,7 @@ for (int j = 0; j < node->link_count; j++) { StackLink link = node->links[j]; - fprintf(f, "node_%p -> node_%p [", node, link.node); + fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node); if (link.is_pending) fprintf(f, "style=dashed "); if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray "); diff -Nru rust-tree-sitter-0.17.1/src/subtree.c rust-tree-sitter-0.20.1/src/subtree.c --- rust-tree-sitter-0.17.1/src/subtree.c 2020-10-28 19:29:45.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/subtree.c 1973-11-29 21:33:09.000000000 +0000 @@ -166,7 +166,8 @@ Subtree ts_subtree_new_leaf( SubtreePool *pool, TSSymbol symbol, Length padding, Length size, - uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens, + uint32_t lookahead_bytes, TSStateId parse_state, + bool has_external_tokens, bool depends_on_column, bool is_keyword, const TSLanguage *language ) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); @@ -213,6 +214,7 @@ .fragile_right = false, .has_changes = false, .has_external_tokens = has_external_tokens, + .depends_on_column = depends_on_column, .is_missing = false, .is_keyword = is_keyword, {{.first_leaf = {.symbol = 0, .parse_state = 0}}} @@ -245,7 +247,7 @@ ) { Subtree result = ts_subtree_new_leaf( pool, ts_builtin_sym_error, padding, size, bytes_scanned, - parse_state, false, false, language + parse_state, false, false, false, language ); SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; data->fragile_left = true; @@ -378,6 +380,7 @@ self.ptr->repeat_depth = 0; self.ptr->node_count = 1; self.ptr->has_external_tokens = false; + self.ptr->depends_on_column = false; self.ptr->dynamic_precedence = 0; uint32_t structural_index = 0; @@ -388,6 +391,13 @@ for (uint32_t i = 0; i < self.ptr->child_count; i++) { Subtree child = children[i]; + if ( + self.ptr->size.extent.row == 0 && + ts_subtree_depends_on_column(child) + ) { + self.ptr->depends_on_column = true; + } + if (i == 0) { self.ptr->padding = ts_subtree_padding(child); self.ptr->size = ts_subtree_size(child); @@ -545,7 +555,7 @@ ) { Subtree result = ts_subtree_new_leaf( pool, symbol, padding, length_zero(), 0, - 0, false, false, language + 0, false, false, false, language ); if (result.data.is_inline) { result.data.is_missing = true; @@ -670,6 +680,7 @@ Edit edit = entry.edit; bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes; bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; + bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); Length size = ts_subtree_size(*entry.tree); Length padding = ts_subtree_padding(*entry.tree); @@ -733,6 +744,7 @@ data->fragile_right = false; data->has_changes = false; data->has_external_tokens = false; + data->depends_on_column = false; data->is_missing = result.data.is_missing; data->is_keyword = result.data.is_keyword; result.ptr = data; @@ -755,9 +767,18 @@ // If this child ends before the edit, it is not affected. if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue; - // If this child starts after the edit, then we're done processing children. - if (child_left.bytes > edit.old_end.bytes || - (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break; + // Keep editing child nodes until a node is reached that starts after the edit. + // Also, if this node's validity depends on its column position, then continue + // invaliditing child nodes until reaching a line break. + if (( + (child_left.bytes > edit.old_end.bytes) || + (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0) + ) && ( + !invalidate_first_row || + child_left.extent.row > entry.tree->ptr->padding.extent.row + )) { + break; + } // Transform edit into the child's coordinate space. Edit child_edit = { @@ -775,8 +796,10 @@ // Interpret all inserted text as applying to the *first* child that touches the edit. // Subsequent children are only never have any text inserted into them; they are only // shrunk to compensate for the edit. - if (child_right.bytes > edit.start.bytes || - (child_right.bytes == edit.start.bytes && is_pure_insertion)) { + if ( + child_right.bytes > edit.start.bytes || + (child_right.bytes == edit.start.bytes && is_pure_insertion) + ) { edit.new_end = edit.start; } @@ -969,7 +992,7 @@ TSSymbol subtree_symbol = ts_subtree_symbol(*self); TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol; uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self); - fprintf(f, "tree_%p [label=\"", self); + fprintf(f, "tree_%p [label=\"", (void *)self); ts_subtree__write_dot_string(f, ts_language_symbol_name(language, symbol)); fprintf(f, "\""); @@ -981,12 +1004,14 @@ "state: %d\n" "error-cost: %u\n" "has-changes: %u\n" + "depends-on-column: %u\n" "repeat-depth: %u\n" "lookahead-bytes: %u", start_offset, end_offset, ts_subtree_parse_state(*self), ts_subtree_error_cost(*self), ts_subtree_has_changes(*self), + ts_subtree_depends_on_column(*self), ts_subtree_repeat_depth(*self), ts_subtree_lookahead_bytes(*self) ); @@ -1009,7 +1034,7 @@ child_info_offset++; } ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f); - fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i); + fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i); child_start_offset += ts_subtree_total_bytes(*child); } } diff -Nru rust-tree-sitter-0.17.1/src/subtree.h rust-tree-sitter-0.20.1/src/subtree.h --- rust-tree-sitter-0.17.1/src/subtree.h 2020-10-28 19:29:45.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/subtree.h 1973-11-29 21:33:09.000000000 +0000 @@ -78,6 +78,7 @@ bool fragile_right : 1; bool has_changes : 1; bool has_external_tokens : 1; + bool depends_on_column: 1; bool is_missing : 1; bool is_keyword : 1; @@ -138,7 +139,7 @@ Subtree ts_subtree_new_leaf( SubtreePool *, TSSymbol, Length, Length, uint32_t, - TSStateId, bool, bool, const TSLanguage * + TSStateId, bool, bool, bool, const TSLanguage * ); Subtree ts_subtree_new_error( SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage * @@ -186,11 +187,11 @@ #define ts_subtree_children(self) \ ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) -static inline void ts_subtree_set_extra(MutableSubtree *self) { +static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) { if (self->data.is_inline) { - self->data.extra = true; + self->data.extra = is_extra; } else { - self->ptr->extra = true; + self->ptr->extra = is_extra; } } @@ -284,6 +285,10 @@ return self.data.is_inline ? false : self.ptr->has_external_tokens; } +static inline bool ts_subtree_depends_on_column(Subtree self) { + return self.data.is_inline ? false : self.ptr->depends_on_column; +} + static inline bool ts_subtree_is_fragile(Subtree self) { return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right); } diff -Nru rust-tree-sitter-0.17.1/src/tree.c rust-tree-sitter-0.20.1/src/tree.c --- rust-tree-sitter-0.17.1/src/tree.c 2020-05-15 22:35:39.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/tree.c 1973-11-29 21:33:09.000000000 +0000 @@ -5,8 +5,6 @@ #include "./tree_cursor.h" #include "./tree.h" -static const unsigned PARENT_CACHE_CAPACITY = 32; - TSTree *ts_tree_new( Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count @@ -14,9 +12,6 @@ TSTree *result = ts_malloc(sizeof(TSTree)); result->root = root; result->language = language; - result->parent_cache = NULL; - result->parent_cache_start = 0; - result->parent_cache_size = 0; result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange)); memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange)); result->included_range_count = included_range_count; @@ -35,7 +30,6 @@ ts_subtree_release(&pool, self->root); ts_subtree_pool_delete(&pool); ts_free(self->included_ranges); - if (self->parent_cache) ts_free(self->parent_cache); ts_free(self); } @@ -78,8 +72,6 @@ SubtreePool pool = ts_subtree_pool_new(0); self->root = ts_subtree_edit(self->root, edit, &pool); - self->parent_cache_start = 0; - self->parent_cache_size = 0; ts_subtree_pool_delete(&pool); } @@ -111,38 +103,3 @@ void ts_tree_print_dot_graph(const TSTree *self, FILE *file) { ts_subtree_print_dot_graph(self->root, self->language, file); } - -TSNode ts_tree_get_cached_parent(const TSTree *self, const TSNode *node) { - for (uint32_t i = 0; i < self->parent_cache_size; i++) { - uint32_t index = (self->parent_cache_start + i) % PARENT_CACHE_CAPACITY; - ParentCacheEntry *entry = &self->parent_cache[index]; - if (entry->child == node->id) { - return ts_node_new(self, entry->parent, entry->position, entry->alias_symbol); - } - } - return ts_node_new(NULL, NULL, length_zero(), 0); -} - -void ts_tree_set_cached_parent(const TSTree *_self, const TSNode *node, const TSNode *parent) { - TSTree *self = (TSTree *)_self; - if (!self->parent_cache) { - self->parent_cache = ts_calloc(PARENT_CACHE_CAPACITY, sizeof(ParentCacheEntry)); - } - - uint32_t index = (self->parent_cache_start + self->parent_cache_size) % PARENT_CACHE_CAPACITY; - self->parent_cache[index] = (ParentCacheEntry) { - .child = node->id, - .parent = (const Subtree *)parent->id, - .position = { - parent->context[0], - {parent->context[1], parent->context[2]} - }, - .alias_symbol = parent->context[3], - }; - - if (self->parent_cache_size == PARENT_CACHE_CAPACITY) { - self->parent_cache_start++; - } else { - self->parent_cache_size++; - } -} diff -Nru rust-tree-sitter-0.17.1/src/tree_cursor.c rust-tree-sitter-0.20.1/src/tree_cursor.c --- rust-tree-sitter-0.17.1/src/tree_cursor.c 2020-11-02 22:06:09.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/tree_cursor.c 1973-11-29 21:33:09.000000000 +0000 @@ -159,10 +159,43 @@ } } while (did_descend); - if (self->stack.size > initial_size && - ts_tree_cursor_goto_next_sibling((TSTreeCursor *)self)) { - return visible_child_index; - } + self->stack.size = initial_size; + return -1; +} + +int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *_self, TSPoint goal_point) { + TreeCursor *self = (TreeCursor *)_self; + uint32_t initial_size = self->stack.size; + uint32_t visible_child_index = 0; + + bool did_descend; + do { + did_descend = false; + + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + TSPoint end_point = point_add(entry.position.extent, ts_subtree_size(*entry.subtree).extent); + bool at_goal = point_gt(end_point, goal_point); + uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree); + if (at_goal) { + if (visible) { + array_push(&self->stack, entry); + return visible_child_index; + } + if (visible_child_count > 0) { + array_push(&self->stack, entry); + did_descend = true; + break; + } + } else if (visible) { + visible_child_index++; + } else { + visible_child_index += visible_child_count; + } + } + } while (did_descend); self->stack.size = initial_size; return -1; @@ -353,14 +386,12 @@ // Determine if the current node can have later siblings with the same field name. if (*field_id) { for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { - if (i->field_id == *field_id) { - if ( - i->child_index > entry->structural_child_index || - (i->child_index == entry->structural_child_index && *has_later_named_siblings) - ) { - *can_have_later_siblings_with_this_field = true; - break; - } + if ( + i->field_id == *field_id && + i->child_index > entry->structural_child_index + ) { + *can_have_later_siblings_with_this_field = true; + break; } } } @@ -448,6 +479,7 @@ TSTreeCursor res = {NULL, NULL, {0, 0}}; TreeCursor *copy = (TreeCursor *)&res; copy->tree = cursor->tree; + array_init(©->stack); array_push_all(©->stack, &cursor->stack); return res; } diff -Nru rust-tree-sitter-0.17.1/src/tree.h rust-tree-sitter-0.20.1/src/tree.h --- rust-tree-sitter-0.17.1/src/tree.h 2019-02-05 17:57:59.000000000 +0000 +++ rust-tree-sitter-0.20.1/src/tree.h 1973-11-29 21:33:09.000000000 +0000 @@ -15,17 +15,12 @@ struct TSTree { Subtree root; const TSLanguage *language; - ParentCacheEntry *parent_cache; - uint32_t parent_cache_start; - uint32_t parent_cache_size; TSRange *included_ranges; unsigned included_range_count; }; TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned); TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol); -TSNode ts_tree_get_cached_parent(const TSTree *, const TSNode *); -void ts_tree_set_cached_parent(const TSTree *, const TSNode *, const TSNode *); #ifdef __cplusplus }