diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..7aa760c6 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,21 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +## [Unreleased] +- Changed all APIs to use Emacs's 1-based byte positions instead of 0-based byte offsets. +- Changed all APIs to use Emacs's 1-based line numbers instead of 0-based row numbering. +- Changed representation of tree-sitter point from 2-element vector to cons cell. +- Changed representation of query match/capture from 2-element vector to cons cell. + +## [0.2.0] - 2020-02-02 +- Upgraded `tree-sitter` to 0.6.0. +- Added `tree-sitter-cli`. +- Added `tree-sitter-langs` (utilities to download pre-compiled modules and grammars). + +## [0.1.0] - 2020-01-27 +Initial release + +[Unreleased]: https://github.com/ubolonton/emacs-tree-sitte/compare/0.2.0...HEAD +[0.2.0]: https://github.com/ubolonton/emacs-tree-sitte/compare/0.1.0...0.2.0 diff --git a/Cargo.lock b/Cargo.lock index 7fefdd91..32d72cb3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -163,7 +163,7 @@ dependencies = [ [[package]] name = "emacs" -version = "0.12.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "ctor 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", @@ -188,7 +188,7 @@ dependencies = [ name = "emacs-tree-sitter" version = "0.1.0" dependencies = [ - "emacs 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", + "emacs 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)", "libloading 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -540,7 +540,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum darling 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858" "checksum darling_core 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b" "checksum darling_macro 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" -"checksum emacs 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05ea8eac275435f7f184f27150efa7c2be95d00b39c335d03d784475797e115b" +"checksum emacs 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)" = "25591ded546fabae8566bd79ba8934ea7b8220944a767eb1f81996d9345a3524" "checksum emacs-macros 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37ba1dc2c46a026b429de25063571ab49a348a8319ddde7ceb892b7a0a032424" "checksum emacs_module 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a1a2ddc45eb97ac4ffba3dc6a6f547c38debdf9a9473bf46223b57b69e8a740" "checksum env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3" diff --git a/Cargo.toml b/Cargo.toml index bd39c94e..806d038f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ name = "tree_sitter_dyn" crate-type = ["cdylib"] [dependencies] -emacs = "0.12.1" +emacs = "0.12.3" libloading = "0.5.1" tree-sitter = "0.6.0" diff --git a/README.md b/README.md index ceaba8e4..49ec1b64 100644 --- a/README.md +++ b/README.md @@ -74,11 +74,17 @@ If you want to hack on `emacs-tree-sitter` itself, see the section [Setup for De ### Types - `language`, `parser`, `tree`, `node`, `cursor`, `query`: corresponding tree-sitter types, embedded in `user-ptr` objects. -- `point`: a vector in the form of `[row column]`, where `row` and `column` are zero-based. This is different from Emacs's concept of "point". Also note that `column` counts bytes, unlike the current built-in function `current-column`. -- `range`: a vector in the form of `[start-point end-point]`. +- `point`: a pair of `(LINE-NUMBER . BYTE-COLUMN)`. + + `LINE-NUMBER` is the absolute line number returned by `line-number-at-pos`, counting from 1. + + `BYTE-COLUMN` counts from 0, like `current-column`. However, unlike that function, it counts bytes, instead of displayed glyphs. +- `range`: a vector in the form of `[START-BYTEPOS END-BYTEPOS START-POINT END-POINT]`. These types are understood only by this package. They are not recognized by `type-of`, but have corresponding type-checking predicates, which are useful for debugging: `ts-language-p`, `ts-tree-p`, `ts-node-p`... +For consistency with Emacs's conventions, this binding has some differences compared to the tree-sitter's C/Rust APIs: +- It uses 1-based byte position, not 0-based byte offset. +- It uses 1-based line number, not 0-based row. + ### Functions - Language: diff --git a/src/cursor.rs b/src/cursor.rs index 8d443051..13b62d94 100644 --- a/src/cursor.rs +++ b/src/cursor.rs @@ -53,12 +53,12 @@ fn current_field_name(cursor: Value) -> Result { } macro_rules! defun_cursor_walks { - ($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident: $itype:ty ),* ) )? -> $type:ty)*) => { + ($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident $($into:ident)? : $itype:ty ),* ) )? -> $type:ty)*) => { $( $(#[$meta])* #[defun$((name = $lisp_name))?] fn $name(cursor: &mut RCursor, $( $( $param: $itype ),* )? ) -> Result<$type> { - Ok(cursor.borrow_mut().$name( $( $( $param ),* )? )) + Ok(cursor.borrow_mut().$name( $( $( $param $(.$into())? ),* )? )) } )* }; @@ -77,9 +77,9 @@ defun_cursor_walks! { /// Return t if CURSOR successfully moved, nil if there was no next sibling node. fn goto_next_sibling -> bool - /// Move CURSOR to the first child that extends beyond the given byte offset. + /// Move CURSOR to the first child that extends beyond the given BYTEPOS. /// Return the index of the child node if one was found, nil otherwise. - "goto-first-child-for-byte" fn goto_first_child_for_byte(index: usize) -> Option + "goto-first-child-for-byte" fn goto_first_child_for_byte(bytepos into: BytePos) -> Option } /// Re-initialize CURSOR to start at a different NODE. diff --git a/src/node.rs b/src/node.rs index d460ed43..a50693a6 100644 --- a/src/node.rs +++ b/src/node.rs @@ -69,19 +69,19 @@ defun_node_props! { // Position ------------------------------------------------------------------------------------ - /// Return NODE's start byte. - "node-start-byte" fn start_byte -> usize + /// Return NODE's start byte position. + "node-start-byte" fn start_byte -> BytePos; into - /// Return NODE's start point, as a `[ROW COLUMN]' vector. + /// Return NODE's start point, in the form of (LINE-NUMBER . BYTE-COLUMN). "node-start-point" fn start_position -> Point; into - /// Return NODE's end byte. - "node-end-byte" fn end_byte -> usize + /// Return NODE's end byte position. + "node-end-byte" fn end_byte -> BytePos; into - /// Return NODE's end point, as a `[ROW COLUMN]' vector. + /// Return NODE's end point, in the form of (LINE-NUMBER . BYTE-COLUMN). "node-end-point" fn end_position -> Point; into - /// Return NODE's `[START-BYTE END-BYTE START-POINT END-POINT]'. + /// Return a vector of NODE's [START-BYTEPOS END-BYTEPOS START-POINT END-POINT]. "node-range" fn range -> Range; into // Counting child nodes ------------------------------------------------------------------------ @@ -116,10 +116,10 @@ defun_node_navs! { // Child --------------------------------------------------------------------------------------- - /// Return NODE's child at the given zero-based index. + /// Return NODE's child at the given 0-based index. "get-nth-child" fn child(i: usize) - /// Return NODE's named child at the given zero-based index. + /// Return NODE's named child at the given 0-based index. "get-nth-named-child" fn named_child(i: usize) /// Return NODE's child with the given FIELD-NAME. @@ -144,16 +144,18 @@ defun_node_navs! { // Descendant ---------------------------------------------------------------------------------- - /// Return the smallest node within NODE that spans the given range of bytes. - "get-descendant-for-byte-range" fn descendant_for_byte_range(start: usize, end: usize) + /// Return the smallest node within NODE that spans the given range of byte + /// positions. + "get-descendant-for-byte-range" fn descendant_for_byte_range(start into: BytePos, end into: BytePos) - /// Return the smallest node within NODE that spans the given range of points. + /// Return the smallest node within NODE that spans the given point range. "get-descendant-for-point-range" fn descendant_for_point_range(start into: Point, end into: Point) - /// Return the smallest named node within NODE that spans the given range of bytes. - "get-named-descendant-for-byte-range" fn named_descendant_for_byte_range(start: usize, end: usize) + /// Return the smallest named node within NODE that spans the given range of byte + /// positions. + "get-named-descendant-for-byte-range" fn named_descendant_for_byte_range(start into: BytePos, end into: BytePos) - /// Return the smallest named node within NODE that spans the given range of points. + /// Return the smallest named node within NODE that spans the given point range. "get-named-descendant-for-point-range" fn named_descendant_for_point_range(start into: Point, end into: Point) } @@ -164,26 +166,33 @@ defun_node_props! { /// Edit NODE to keep it in sync with source code that has been edited. /// +/// You must describe the edit both in terms of byte positions and in terms of +/// (LINE-NUMBER . BYTE-COLUMN) coordinates. +/// +/// LINE-NUMBER should be the number returned by `line-number-at-pos', which counts +/// from 1. +/// +/// BYTE-COLUMN should count from 0, like Emacs's `current-column'. However, unlike +/// that function, it should count bytes, instead of displayed glyphs. +/// /// This function is only rarely needed. When you edit a syntax tree, all of the /// nodes that you retrieve from the tree afterward will already reflect the edit. /// You only need to use this function when you have a node that you want to keep /// and continue to use after an edit. -/// -/// Note that indexing must be zero-based. #[defun] fn edit_node( node: &mut RNode, - start_byte: usize, - old_end_byte: usize, - new_end_byte: usize, + start_bytepos: BytePos, + old_end_bytepos: BytePos, + new_end_bytepos: BytePos, start_point: Point, old_end_point: Point, new_end_point: Point, ) -> Result<()> { let edit = InputEdit { - start_byte, - old_end_byte, - new_end_byte, + start_byte: start_bytepos.into(), + old_end_byte: old_end_bytepos.into(), + new_end_byte: new_end_bytepos.into(), start_position: start_point.into(), old_end_position: old_end_point.into(), new_end_position: new_end_point.into(), diff --git a/src/parser.rs b/src/parser.rs index 31276b4c..9c802d56 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,7 +1,7 @@ use emacs::{defun, Result, Value, Vector}; use emacs::failure; -use tree_sitter::{Parser, Point, Tree}; +use tree_sitter::{Parser, Tree}; use crate::types::*; @@ -34,18 +34,22 @@ fn language(parser: &Parser) -> Result> { /// Parse source code generated by INPUT-FUNCTION with PARSER, returning a tree. /// -/// INPUT-FUNCTION should take 3 parameters: (BYTE-OFFSET ROW COLUMN), and return a -/// fragment of the source code, starting from the position identified by either -/// BYTE-OFFSET or [ROW COLUMN]. +/// INPUT-FUNCTION should take 3 parameters: (BYTEPOS LINE-NUMBER BYTE-COLUMN), and +/// return a fragment of the source code, starting from the position identified by +/// either BYTEPOS or (LINE-NUMBER . BYTE-COLUMN). +/// +/// BYTEPOS is Emacs's 1-based byte position. +/// +/// LINE-NUMBER is the number returned by `line-number-at-pos', which counts from 1. +/// +/// BYTE-COLUMN counts from 0, likes Emacs's `current-column'. However, unlike that +/// function, it counts bytes, instead of displayed glyphs. /// /// If you have already parsed an earlier version of this document, and it has since /// been edited, pass the previously parsed OLD-TREE so that its unchanged parts can /// be reused. This will save time and memory. For this to work correctly, you must /// have already edited it using `ts-edit-tree' function in a way that exactly /// matches the source code changes. -/// -/// Note that indexing is assumed to be zero-based, while Emacs normally uses -/// one-based indexing for accessing buffer content. #[defun] fn parse(parser: &mut Parser, input_function: Value, old_tree: Option<&Shared>) -> Result> { let old_tree = match old_tree { @@ -60,8 +64,10 @@ fn parse(parser: &mut Parser, input_function: Value, old_tree: Option<&Shared String { - input_function.call((byte, position.row, position.column)) + let input = &mut |byte: usize, point: tree_sitter::Point| -> String { + let bytepos: BytePos = byte.into(); + let point: Point = point.into(); + input_function.call((bytepos, point.line_number(), point.byte_column())) .and_then(|v| v.into_rust()) .unwrap_or_else(|e| { input_error = Some(e); diff --git a/src/query.rs b/src/query.rs index 6230bfb9..58d6e422 100644 --- a/src/query.rs +++ b/src/query.rs @@ -28,20 +28,20 @@ fn _make_query(language: Language, source: String) -> Result { } macro_rules! defun_query_methods { - ($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident : $type:ty ),* ) )? -> $rtype:ty )*) => { + ($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident : $type:ty ),* ) )? -> $rtype:ty $(; $into:ident)? )*) => { $( #[defun$((name = $lisp_name))?] $(#[$meta])* fn $name(query: &Query, $( $( $param : $type ),* )? ) -> Result<$rtype> { - Ok(query.$name( $( $( $param ),* )? )) + Ok(query.$name( $( $( $param ),* )? )$(.$into())?) } )* }; } defun_query_methods! { - /// Return the byte offset where the NTH pattern starts in QUERY's source. - "query-start-byte-for-pattern" fn start_byte_for_pattern(nth: usize) -> usize + /// Return the byte position where the NTH pattern starts in QUERY's source. + "query-start-byte-for-pattern" fn start_byte_for_pattern(nth: usize) -> BytePos; into /// Return the number of patterns in QUERY. "query-count-patterns" fn pattern_count -> usize @@ -120,13 +120,13 @@ fn _query_cursor_matches<'e>( for (ci, c) in m.captures.iter().enumerate() { let captured_node = node.map(|_| c.node); let capture = if index_only.is_some() { - env.vector((c.index, captured_node))? + env.cons(c.index, captured_node)? } else { - env.vector((&capture_names[c.index as usize], captured_node))? + env.cons(&capture_names[c.index as usize], captured_node)? }; captures.set(ci, capture)?; } - let _match = env.vector((m.pattern_index, captures))?; + let _match = env.cons(m.pattern_index, captures)?; vec.push(_match); } vec_to_vector(env, vec) @@ -156,23 +156,26 @@ fn _query_cursor_captures<'e>( let c = m.captures[capture_index]; let captured_node = node.map(|_| c.node); let capture = if index_only.is_some() { - env.vector((c.index, captured_node))? + env.cons(c.index, captured_node)? } else { - env.vector((&capture_names[c.index as usize], captured_node))? + env.cons(&capture_names[c.index as usize], captured_node)? }; vec.push(capture); } vec_to_vector(env, vec) } -/// Limit CURSOR's query executions to the byte range [BEG END]. +/// Limit CURSOR's query executions to the range of byte positions, from BEG to END. #[defun] -fn set_byte_range(cursor: &mut QueryCursor, beg: usize, end: usize) -> Result<()> { - cursor.set_byte_range(beg, end); +fn set_byte_range(cursor: &mut QueryCursor, beg: BytePos, end: BytePos) -> Result<()> { + cursor.set_byte_range(beg.into(), end.into()); Ok(()) } -/// Limit CURSOR's query executions to the point range [BEG END]. +/// Limit CURSOR's query executions to the point range, from BEG to END. +/// +/// A "point" in this context is a (LINE-NUMBER . BYTE-COLUMN) pair. See `ts-parse' +/// for a more detailed explanation. #[defun] fn set_point_range(cursor: &mut QueryCursor, beg: Point, end: Point) -> Result<()> { cursor.set_point_range(beg.into(), end.into()); diff --git a/src/tree.rs b/src/tree.rs index a48f6c86..a1ed9f4a 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -24,22 +24,28 @@ fn root_node(tree: Borrowed) -> Result { /// Edit the syntax TREE to keep it in sync with source code that has been edited. /// -/// You must describe the edit both in terms of byte offsets and in terms of -/// `[ROW COLUMN]' coordinates, using zero-based indexing. +/// You must describe the edit both in terms of byte positions and in terms of +/// (LINE-NUMBER . BYTE-COLUMN) coordinates. +/// +/// LINE-NUMBER should be the number returned by `line-number-at-pos', which counts +/// from 1. +/// +/// BYTE-COLUMN should count from 0, like Emacs's `current-column'. However, unlike +/// that function, it should count bytes, instead of displayed glyphs. #[defun] fn edit_tree( tree: Borrowed, - start_byte: usize, - old_end_byte: usize, - new_end_byte: usize, + start_bytepos: BytePos, + old_end_bytepos: BytePos, + new_end_bytepos: BytePos, start_point: Point, old_end_point: Point, new_end_point: Point, ) -> Result<()> { let edit = InputEdit { - start_byte, - old_end_byte, - new_end_byte, + start_byte: start_bytepos.into(), + old_end_byte: old_end_bytepos.into(), + new_end_byte: new_end_bytepos.into(), start_position: start_point.into(), old_end_position: old_end_point.into(), new_end_position: new_end_point.into(), @@ -48,8 +54,6 @@ fn edit_tree( Ok(()) } -// TODO: walk_with_properties - /// Compare an edited OLD-TREE to NEW-TREE, both representing the same document. /// /// This function returns a vector of ranges whose syntactic structure has changed. diff --git a/src/types.rs b/src/types.rs index cfa1dc0a..690847a6 100644 --- a/src/types.rs +++ b/src/types.rs @@ -50,20 +50,64 @@ impl_newtype_traits!(Point); impl IntoLisp<'_> for Point { fn into_lisp(self, env: &Env) -> Result { - let inner = self.0; - env.call("vector", (inner.row, inner.column)) + env.cons(self.line_number(), self.byte_column()) } } impl FromLisp<'_> for Point { fn from_lisp(value: Value) -> Result { - let vector = Vector(value); - let row = vector.get(0)?; - let column = vector.get(1)?; + let row = value.car::()? - 1; + let column = value.cdr()?; Ok(tree_sitter::Point { row, column }.into()) } } +impl Point { + #[inline(always)] + pub(crate) fn line_number(&self) -> usize { + self.0.row + 1 + } + + #[inline(always)] + pub(crate) fn byte_column(&self) -> usize { + self.0.column + } +} + +// ------------------------------------------------------------------------------------------------- +// Emacs Byte Position (1-based, which is different from byte offset, which is 0-based). + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct BytePos(usize); + +impl From for BytePos { + #[inline(always)] + fn from(byte_offset: usize) -> Self { + Self(byte_offset + 1) + } +} + +impl Into for BytePos { + #[inline(always)] + fn into(self) -> usize { + self.0 - 1 + } +} + +impl FromLisp<'_> for BytePos { + #[inline(always)] + fn from_lisp(value: Value) -> Result { + value.into_rust().map(Self) + } +} + +impl IntoLisp<'_> for BytePos { + #[inline(always)] + fn into_lisp(self, env: &Env) -> Result { + self.0.into_lisp(env) + } +} + // ------------------------------------------------------------------------------------------------- // Range @@ -75,9 +119,11 @@ impl_newtype_traits!(Range); impl IntoLisp<'_> for Range { fn into_lisp(self, env: &Env) -> Result { let inner = self.0; + let start_byte_pos: BytePos = inner.start_byte.into(); + let end_byte_pos: BytePos = inner.end_byte.into(); env.call("vector", ( - inner.start_byte, - inner.end_byte, + start_byte_pos, + end_byte_pos, Point(inner.start_point), Point(inner.end_point), )) @@ -87,10 +133,10 @@ impl IntoLisp<'_> for Range { impl FromLisp<'_> for Range { fn from_lisp(value: Value) -> Result { let vector = Vector(value); - let start_byte = vector.get(0)?; - let end_byte = vector.get(1)?; - let start_point = vector.get::(2)?.0; - let end_point = vector.get::(3)?.0; + let start_byte = vector.get::(0)?.into(); + let end_byte = vector.get::(1)?.into(); + let start_point = vector.get::(2)?.into(); + let end_point = vector.get::(3)?.into(); Ok(tree_sitter::Range { start_byte, end_byte, start_point, end_point }.into()) } } diff --git a/tree-sitter-core.el b/tree-sitter-core.el index 17de0fea..1e5871b1 100644 --- a/tree-sitter-core.el +++ b/tree-sitter-core.el @@ -51,67 +51,55 @@ ;;; Type conversion. -(defsubst ts-byte-from-position (position) - "Return tree-sitter (0-based) byte offset for character at POSITION." - (- (position-bytes position) 1)) - -(defsubst ts-byte-to-position (byte) - "Return the character position for tree-sitter (0-based) BYTE offset." - (byte-to-position (1+ byte))) - (defun ts-point-from-position (position) - "Convert POSITION to a valid (0-based indexed) tree-sitter point. -The returned column counts bytes, which is different from `current-column'." + "Convert POSITION to a valid tree-sitter point. + +A \"point\" in this context is a (LINE-NUMBER . BYTE-COLUMN) pair. See `ts-parse' +for a more detailed explanation." (ts--save-context (ts--point-from-position position))) (defun ts--point-from-position (position) - "Convert POSITION to a valid (0-based indexed) tree-sitter point. -Prefer `ts-byte-to-position', unless there's a real performance bottleneck. + "Convert POSITION to a valid tree-sitter point. +Prefer `ts-point-from-position', unless there's a real performance bottleneck. This function must be called within a `ts--save-context' block." (goto-char position) - (let ((row (- (line-number-at-pos position) 1)) + (let ((line-number (line-number-at-pos position)) ;; TODO: Add tests that fail if `current-column' is used instead. - (column (- (position-bytes position) - (position-bytes (line-beginning-position))))) - (vector row column))) + (byte-column (- (position-bytes position) + (position-bytes (line-beginning-position))))) + (cons line-number byte-column))) (defun ts-point-to-position (point) - "Convert tree-sitter POINT to buffer position." + "Convert tree-sitter POINT to buffer position. + +A \"point\" in this context is a (LINE-NUMBER . BYTE-COLUMN) pair. See `ts-parse' +for a more detailed explanation." (ts--save-context - (let ((row (aref point 0)) - (column (aref point 1))) + (let ((line-number (car point)) + (byte-column (cdr point))) (goto-char 1) - (forward-line row) - (ts-byte-to-position (+ column (ts-byte-from-position (line-beginning-position))))))) + (forward-line (- line-number 1)) + (byte-to-position (+ byte-column (position-bytes (line-beginning-position))))))) ;;; Extracting buffer's text. -(defsubst ts-buffer-substring (beg-byte end-byte) - "Return the current buffer's text between (0-based) BEG-BYTE and END-BYTE. -This function must be called with narrowing disabled, e.g. within a -`ts--without-restriction' block." - (buffer-substring-no-properties - (ts-byte-to-position beg-byte) - (ts-byte-to-position end-byte))) - -(defun ts-buffer-input (byte _row _column) - "Return a portion of the current buffer's text, starting from BYTE. -BYTE is zero-based, and is automatically clamped to the range valid for the -current buffer. +(defun ts-buffer-input (bytepos _line-number _byte-column) + "Return a portion of the current buffer's text, starting from BYTEPOS. +BYTEPOS is automatically clamped to the range valid for the current buffer. This function must be called with narrowing disabled, e.g. within a `ts--without-restriction' block." - (let* ((max-position (point-max)) - (beg-byte (max 0 byte)) + (let* ((max-pos (point-max)) + (beg-byte (max 1 bytepos)) ;; ;; TODO: Don't hard-code read length. (end-byte (+ 1024 beg-byte)) - ;; nil means > max-position, since we already made sure they are non-negative. - (start (or (ts-byte-to-position beg-byte) max-position)) - (end (or (ts-byte-to-position end-byte) max-position))) - (buffer-substring-no-properties start end))) + ;; nil means > max-pos, since we already made sure they are non-negative. + (beg-pos (or (byte-to-position beg-byte) max-pos)) + (end-pos (or (byte-to-position end-byte) max-pos))) + (buffer-substring-no-properties beg-pos end-pos))) (defun ts--node-text (node) "Return NODE's text, assuming it's from the current buffer's syntax tree. @@ -119,7 +107,9 @@ Prefer `ts-node-text', unless there's a real bottleneck. This function must be called within a `ts--without-restriction' block." (pcase-let ((`[,beg ,end] (ts-node-range node))) - (ts-buffer-substring beg end))) + (buffer-substring-no-properties + (byte-to-position beg) + (byte-to-position end)))) (defun ts-node-text (node) "Return NODE's text, assuming it's from the current buffer's syntax tree." @@ -133,28 +123,28 @@ This function must be called within a `ts--without-restriction' block." "Return the smallest node within NODE that spans the position range [BEG END]." (ts-get-descendant-for-byte-range node - (ts-byte-from-position beg) - (ts-byte-from-position end))) + (position-bytes beg) + (position-bytes end))) (defun ts-get-named-descendant-for-position-range (node beg end) "Return the smallest named node within NODE that spans the position range [BEG END]." (ts-get-named-descendant-for-byte-range node - (ts-byte-from-position beg) - (ts-byte-from-position end))) + (position-bytes beg) + (position-bytes end))) (defun ts-node-start-position (node) "Return NODE's start position." - (ts-byte-to-position (ts-node-start-byte node))) + (byte-to-position (ts-node-start-byte node))) (defun ts-node-end-position (node) "Return NODE's end position." - (ts-byte-to-position (ts-node-end-byte node))) + (byte-to-position (ts-node-end-byte node))) (defun ts-goto-first-child-for-position (cursor position) "Move CURSOR to the first child that extends beyond the given POSITION. Return the index of the child node if one was found, nil otherwise." - (ts-goto-first-child-for-byte cursor (ts-byte-from-position position))) + (ts-goto-first-child-for-byte cursor (position-bytes position))) ;;; Language loading mechanism. @@ -212,7 +202,7 @@ parsed with LANGUAGE." "Execute QUERY on NODE and return a vector of matches. Matches are sorted in the order they were found. -Each match is a `[PATTERN-INDEX MATCH-CAPTURES]' vector, where PATTERN-INDEX is +Each match has the form (PATTERN-INDEX . MATCH-CAPTURES), where PATTERN-INDEX is the position of the matched pattern within QUERY, and MATCH-CAPTURES is a vector of captures by the match, similar to that returned by `ts-query-captures'. If the optional arg INDEX-ONLY is non-nil, positions of the capture patterns within @@ -230,7 +220,7 @@ Otherwise `ts-node-text' is used." "Execute QUERY on NODE and return a vector of captures. Matches are sorted in the order they appear. -Each capture is a `[CAPTURE-NAME CAPTURED-NODE]' vector. If the optional arg +Each capture has the form (CAPTURE-NAME . CAPTURED-NODE). If the optional arg INDEX-ONLY is non-nil, the position of the capture pattern within QUERY is returned instead of its name. diff --git a/tree-sitter-tests.el b/tree-sitter-tests.el index 09684915..ee1b35a2 100644 --- a/tree-sitter-tests.el +++ b/tree-sitter-tests.el @@ -134,7 +134,7 @@ tree is held (since nodes internally reference the tree)." (ert-info ("Testing buffer boundaries") (let ((min (point-min)) (max (point-max))) - (should (equal [0 0] (ts-point-from-position min))) + (should (equal '(1 . 0) (ts-point-from-position min))) (should (= min (ts-point-to-position (ts-point-from-position min)))) (should (= max (ts-point-to-position (ts-point-from-position max)))))) (ert-info ("Testing arbitrary points") @@ -142,32 +142,6 @@ tree is held (since nodes internally reference the tree)." (let ((p (1+ (random (buffer-size))))) (should (= p (ts-point-to-position (ts-point-from-position p))))))))) -(ert-deftest conversion::position<->ts-byte () - (ts-test-with-temp-buffer "tree-sitter-tests.el" - ;; Some non-ascii texts to exercise this test: - ;; Nguyễn Tuấn Anh - ;; Нгуен Туан Ань - ;; 阮俊英 - (ert-info ("Testing buffer boundaries") - (let ((min (point-min)) - (max (point-max))) - (should (equal 0 (ts-byte-from-position min))) - (should (> (ts-byte-from-position max) (buffer-size))) - (should (= min (ts-byte-to-position (ts-byte-from-position min)))) - (should (= max (ts-byte-to-position (ts-byte-from-position max)))))) - (ert-info ("Testing arbitrary points") - (dotimes (_ 100) - (let* ((p0 (1+ (random (buffer-size)))) - (p1 (1+ (random (buffer-size)))) - (b0 (ts-byte-from-position p0)) - (b1 (ts-byte-from-position p1))) - (should (>= (1+ b0) p0)) - (should (= p0 (ts-byte-to-position b0))) - (ert-info ("Checking substrings") - (should (equal - (buffer-substring-no-properties (min p0 p1) (max p0 p1)) - (ts-buffer-substring (min b0 b1) (max b0 b1)))))))))) - (ert-deftest buffer-input::non-ascii-characters () (with-temp-buffer (insert "\"Tuấn-Anh Nguyễn\";") @@ -207,11 +181,11 @@ tree is held (since nodes internally reference the tree)." (match? @function \"make_query\")) (macro_definition (identifier) @macro)")) (node-texts (mapcar (lambda (capture) - (pcase-let ((`[_ ,node] capture)) + (pcase-let ((`(_ . ,node) capture)) (ts-node-text node))) captures)) (capture-names (mapcar (lambda (capture) - (pcase-let ((`[,name node] capture)) name)) + (pcase-let ((`(,name . _) capture)) name)) captures))) (ert-info ("Should match specified functions and not more") (should (member "_make_query" node-texts)) diff --git a/tree-sitter.el b/tree-sitter.el index 1e25d4a8..11fa6655 100644 --- a/tree-sitter.el +++ b/tree-sitter.el @@ -70,19 +70,19 @@ tree-sitter CLI." (defvar-local tree-sitter-language nil "Tree-sitter language.") -(defvar-local tree-sitter--start-byte 0) -(defvar-local tree-sitter--old-end-byte 0) -(defvar-local tree-sitter--new-end-byte 0) +(defvar-local tree-sitter--start-byte nil) +(defvar-local tree-sitter--old-end-byte nil) +(defvar-local tree-sitter--new-end-byte nil) -(defvar-local tree-sitter--start-point [0 0]) -(defvar-local tree-sitter--old-end-point [0 0]) -(defvar-local tree-sitter--new-end-point [0 0]) +(defvar-local tree-sitter--start-point nil) +(defvar-local tree-sitter--old-end-point nil) +(defvar-local tree-sitter--new-end-point nil) (defun tree-sitter--before-change (beg end) "Update relevant editing states. Installed on `before-change-functions'. BEG and END are the begin and end of the text to be changed." - (setq tree-sitter--start-byte (ts-byte-from-position beg) - tree-sitter--old-end-byte (ts-byte-from-position end)) + (setq tree-sitter--start-byte (position-bytes beg) + tree-sitter--old-end-byte (position-bytes end)) (ts--save-context ;; TODO: Keep mutating the same vectors instead of creating a new one each time. (setq tree-sitter--start-point (ts--point-from-position beg) @@ -108,7 +108,7 @@ BEG and END are the begin and end of the text to be changed." Installed on `after-change-functions'. END is the end of the changed text." - (setq tree-sitter--new-end-byte (ts-byte-from-position end) + (setq tree-sitter--new-end-byte (position-bytes end) tree-sitter--new-end-point (ts-point-from-position end)) (when tree-sitter-tree (ts-edit-tree tree-sitter-tree