Merge pull request #26 from ubolonton/type-revamp

Type Revamp
emacs-tree-sitter · Feb 20, 2020 · b8d1c2a · b8d1c2a
2 parents a558c3e + 236e38e
commit b8d1c2a
Show file tree

Hide file tree

Showing 13 changed files with 222 additions and 163 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,21 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
+
+## [Unreleased]
+- Changed all APIs to use Emacs's 1-based byte positions instead of 0-based byte offsets.
+- Changed all APIs to use Emacs's 1-based line numbers instead of 0-based row numbering.
+- Changed representation of tree-sitter point from 2-element vector to cons cell.
+- Changed representation of query match/capture from 2-element vector to cons cell.
+
+## [0.2.0] - 2020-02-02
+- Upgraded `tree-sitter` to 0.6.0.
+- Added `tree-sitter-cli`.
+- Added `tree-sitter-langs` (utilities to download pre-compiled modules and grammars).
+
+## [0.1.0] - 2020-01-27
+Initial release
+
+[Unreleased]: https://github.com/ubolonton/emacs-tree-sitte/compare/0.2.0...HEAD
+[0.2.0]: https://github.com/ubolonton/emacs-tree-sitte/compare/0.1.0...0.2.0
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,7 +11,7 @@ name = "tree_sitter_dyn"
 crate-type = ["cdylib"]
 
 [dependencies]
-emacs = "0.12.1"
+emacs = "0.12.3"
 libloading = "0.5.1"
 tree-sitter = "0.6.0"
 

diff --git a/README.md b/README.md
@@ -74,11 +74,17 @@ If you want to hack on `emacs-tree-sitter` itself, see the section [Setup for De
 ### Types
 
 - `language`, `parser`, `tree`, `node`, `cursor`, `query`: corresponding tree-sitter types, embedded in `user-ptr` objects.
-- `point`: a vector in the form of `[row column]`, where `row` and `column` are zero-based. This is different from Emacs's concept of "point". Also note that `column` counts bytes, unlike the current built-in function `current-column`.
-- `range`: a vector in the form of `[start-point end-point]`.
+- `point`: a pair of `(LINE-NUMBER . BYTE-COLUMN)`.
+  + `LINE-NUMBER` is the absolute line number returned by `line-number-at-pos`, counting from 1.
+  + `BYTE-COLUMN` counts from 0, like `current-column`. However, unlike that function, it counts bytes, instead of displayed glyphs.
+- `range`: a vector in the form of `[START-BYTEPOS END-BYTEPOS START-POINT END-POINT]`.
 
 These types are understood only by this package. They are not recognized by `type-of`, but have corresponding type-checking predicates, which are useful for debugging: `ts-language-p`, `ts-tree-p`, `ts-node-p`...
 
+For consistency with Emacs's conventions, this binding has some differences compared to the tree-sitter's C/Rust APIs:
+- It uses 1-based byte position, not 0-based byte offset.
+- It uses 1-based line number, not 0-based row.
+
 ### Functions
 
 - Language:

diff --git a/src/cursor.rs b/src/cursor.rs
@@ -53,12 +53,12 @@ fn current_field_name(cursor: Value) -> Result<Value> {
 }
 
 macro_rules! defun_cursor_walks {
-    ($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident: $itype:ty ),* ) )? -> $type:ty)*) => {
+    ($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident $($into:ident)? : $itype:ty ),* ) )? -> $type:ty)*) => {
         $(
             $(#[$meta])*
             #[defun$((name = $lisp_name))?]
             fn $name(cursor: &mut RCursor, $( $( $param: $itype ),* )? ) -> Result<$type> {
-                Ok(cursor.borrow_mut().$name( $( $( $param ),* )? ))
+                Ok(cursor.borrow_mut().$name( $( $( $param $(.$into())? ),* )? ))
             }
         )*
     };
@@ -77,9 +77,9 @@ defun_cursor_walks! {
     /// Return t if CURSOR successfully moved, nil if there was no next sibling node.
     fn goto_next_sibling -> bool
 
-    /// Move CURSOR to the first child that extends beyond the given byte offset.
+    /// Move CURSOR to the first child that extends beyond the given BYTEPOS.
     /// Return the index of the child node if one was found, nil otherwise.
-    "goto-first-child-for-byte" fn goto_first_child_for_byte(index: usize) -> Option<usize>
+    "goto-first-child-for-byte" fn goto_first_child_for_byte(bytepos into: BytePos) -> Option<usize>
 }
 
 /// Re-initialize CURSOR to start at a different NODE.

diff --git a/src/node.rs b/src/node.rs
@@ -69,19 +69,19 @@ defun_node_props! {
 
     // Position ------------------------------------------------------------------------------------
 
-    /// Return NODE's start byte.
-    "node-start-byte" fn start_byte -> usize
+    /// Return NODE's start byte position.
+    "node-start-byte" fn start_byte -> BytePos; into
 
-    /// Return NODE's start point, as a `[ROW COLUMN]' vector.
+    /// Return NODE's start point, in the form of (LINE-NUMBER . BYTE-COLUMN).
     "node-start-point" fn start_position -> Point; into
 
-    /// Return NODE's end byte.
-    "node-end-byte" fn end_byte -> usize
+    /// Return NODE's end byte position.
+    "node-end-byte" fn end_byte -> BytePos; into
 
-    /// Return NODE's end point, as a `[ROW COLUMN]' vector.
+    /// Return NODE's end point, in the form of (LINE-NUMBER . BYTE-COLUMN).
     "node-end-point" fn end_position -> Point; into
 
-    /// Return NODE's `[START-BYTE END-BYTE START-POINT END-POINT]'.
+    /// Return a vector of NODE's [START-BYTEPOS END-BYTEPOS START-POINT END-POINT].
     "node-range" fn range -> Range; into
 
     // Counting child nodes ------------------------------------------------------------------------
@@ -116,10 +116,10 @@ defun_node_navs! {
 
     // Child ---------------------------------------------------------------------------------------
 
-    /// Return NODE's child at the given zero-based index.
+    /// Return NODE's child at the given 0-based index.
     "get-nth-child" fn child(i: usize)
 
-    /// Return NODE's named child at the given zero-based index.
+    /// Return NODE's named child at the given 0-based index.
     "get-nth-named-child" fn named_child(i: usize)
 
     /// Return NODE's child with the given FIELD-NAME.
@@ -144,16 +144,18 @@ defun_node_navs! {
 
     // Descendant ----------------------------------------------------------------------------------
 
-    /// Return the smallest node within NODE that spans the given range of bytes.
-    "get-descendant-for-byte-range" fn descendant_for_byte_range(start: usize, end: usize)
+    /// Return the smallest node within NODE that spans the given range of byte
+    /// positions.
+    "get-descendant-for-byte-range" fn descendant_for_byte_range(start into: BytePos, end into: BytePos)
 
-    /// Return the smallest node within NODE that spans the given range of points.
+    /// Return the smallest node within NODE that spans the given point range.
     "get-descendant-for-point-range" fn descendant_for_point_range(start into: Point, end into: Point)
 
-    /// Return the smallest named node within NODE that spans the given range of bytes.
-    "get-named-descendant-for-byte-range" fn named_descendant_for_byte_range(start: usize, end: usize)
+    /// Return the smallest named node within NODE that spans the given range of byte
+    /// positions.
+    "get-named-descendant-for-byte-range" fn named_descendant_for_byte_range(start into: BytePos, end into: BytePos)
 
-    /// Return the smallest named node within NODE that spans the given range of points.
+    /// Return the smallest named node within NODE that spans the given point range.
     "get-named-descendant-for-point-range" fn named_descendant_for_point_range(start into: Point, end into: Point)
 }
 
@@ -164,26 +166,33 @@ defun_node_props! {
 
 /// Edit NODE to keep it in sync with source code that has been edited.
 ///
+/// You must describe the edit both in terms of byte positions and in terms of
+/// (LINE-NUMBER . BYTE-COLUMN) coordinates.
+///
+/// LINE-NUMBER should be the number returned by `line-number-at-pos', which counts
+/// from 1.
+///
+/// BYTE-COLUMN should count from 0, like Emacs's `current-column'. However, unlike
+/// that function, it should count bytes, instead of displayed glyphs.
+///
 /// This function is only rarely needed. When you edit a syntax tree, all of the
 /// nodes that you retrieve from the tree afterward will already reflect the edit.
 /// You only need to use this function when you have a node that you want to keep
 /// and continue to use after an edit.
-///
-/// Note that indexing must be zero-based.
 #[defun]
 fn edit_node(
     node: &mut RNode,
-    start_byte: usize,
-    old_end_byte: usize,
-    new_end_byte: usize,
+    start_bytepos: BytePos,
+    old_end_bytepos: BytePos,
+    new_end_bytepos: BytePos,
     start_point: Point,
     old_end_point: Point,
     new_end_point: Point,
 ) -> Result<()> {
     let edit = InputEdit {
-        start_byte,
-        old_end_byte,
-        new_end_byte,
+        start_byte: start_bytepos.into(),
+        old_end_byte: old_end_bytepos.into(),
+        new_end_byte: new_end_bytepos.into(),
         start_position: start_point.into(),
         old_end_position: old_end_point.into(),
         new_end_position: new_end_point.into(),

diff --git a/src/parser.rs b/src/parser.rs
@@ -1,7 +1,7 @@
 use emacs::{defun, Result, Value, Vector};
 use emacs::failure;
 
-use tree_sitter::{Parser, Point, Tree};
+use tree_sitter::{Parser, Tree};
 
 use crate::types::*;
 
@@ -34,18 +34,22 @@ fn language(parser: &Parser) -> Result<Option<Language>> {
 
 /// Parse source code generated by INPUT-FUNCTION with PARSER, returning a tree.
 ///
-/// INPUT-FUNCTION should take 3 parameters: (BYTE-OFFSET ROW COLUMN), and return a
-/// fragment of the source code, starting from the position identified by either
-/// BYTE-OFFSET or [ROW COLUMN].
+/// INPUT-FUNCTION should take 3 parameters: (BYTEPOS LINE-NUMBER BYTE-COLUMN), and
+/// return a fragment of the source code, starting from the position identified by
+/// either BYTEPOS or (LINE-NUMBER . BYTE-COLUMN).
+///
+/// BYTEPOS is Emacs's 1-based byte position.
+///
+/// LINE-NUMBER is the number returned by `line-number-at-pos', which counts from 1.
+///
+/// BYTE-COLUMN counts from 0, likes Emacs's `current-column'. However, unlike that
+/// function, it counts bytes, instead of displayed glyphs.
 ///
 /// If you have already parsed an earlier version of this document, and it has since
 /// been edited, pass the previously parsed OLD-TREE so that its unchanged parts can
 /// be reused. This will save time and memory. For this to work correctly, you must
 /// have already edited it using `ts-edit-tree' function in a way that exactly
 /// matches the source code changes.
-///
-/// Note that indexing is assumed to be zero-based, while Emacs normally uses
-/// one-based indexing for accessing buffer content.
 #[defun]
 fn parse(parser: &mut Parser, input_function: Value, old_tree: Option<&Shared<Tree>>) -> Result<Shared<Tree>> {
     let old_tree = match old_tree {
@@ -60,8 +64,10 @@ fn parse(parser: &mut Parser, input_function: Value, old_tree: Option<&Shared<Tr
     // unwinding across FFI boundary during a panic is UB (future Rust versions will abort).
     // See https://github.com/rust-lang/rust/issues/52652.
     let mut input_error = None;
-    let input = &mut |byte: usize, position: Point| -> String {
-        input_function.call((byte, position.row, position.column))
+    let input = &mut |byte: usize, point: tree_sitter::Point| -> String {
+        let bytepos: BytePos = byte.into();
+        let point: Point = point.into();
+        input_function.call((bytepos, point.line_number(), point.byte_column()))
             .and_then(|v| v.into_rust())
             .unwrap_or_else(|e| {
                 input_error = Some(e);

diff --git a/src/query.rs b/src/query.rs
@@ -28,20 +28,20 @@ fn _make_query(language: Language, source: String) -> Result<Query> {
 }
 
 macro_rules! defun_query_methods {
-    ($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident : $type:ty ),* ) )? -> $rtype:ty )*) => {
+    ($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident : $type:ty ),* ) )? -> $rtype:ty $(; $into:ident)? )*) => {
         $(
             #[defun$((name = $lisp_name))?]
             $(#[$meta])*
             fn $name(query: &Query, $( $( $param : $type ),* )? ) -> Result<$rtype> {
-                Ok(query.$name( $( $( $param ),* )? ))
+                Ok(query.$name( $( $( $param ),* )? )$(.$into())?)
             }
         )*
     };
 }
 
 defun_query_methods! {
-    /// Return the byte offset where the NTH pattern starts in QUERY's source.
-    "query-start-byte-for-pattern" fn start_byte_for_pattern(nth: usize) -> usize
+    /// Return the byte position where the NTH pattern starts in QUERY's source.
+    "query-start-byte-for-pattern" fn start_byte_for_pattern(nth: usize) -> BytePos; into
 
     /// Return the number of patterns in QUERY.
     "query-count-patterns" fn pattern_count -> usize
@@ -120,13 +120,13 @@ fn _query_cursor_matches<'e>(
         for (ci, c) in m.captures.iter().enumerate() {
             let captured_node = node.map(|_| c.node);
             let capture = if index_only.is_some() {
-                env.vector((c.index, captured_node))?
+                env.cons(c.index, captured_node)?
             } else {
-                env.vector((&capture_names[c.index as usize], captured_node))?
+                env.cons(&capture_names[c.index as usize], captured_node)?
             };
             captures.set(ci, capture)?;
         }
-        let _match = env.vector((m.pattern_index, captures))?;
+        let _match = env.cons(m.pattern_index, captures)?;
         vec.push(_match);
     }
     vec_to_vector(env, vec)
@@ -156,23 +156,26 @@ fn _query_cursor_captures<'e>(
         let c = m.captures[capture_index];
         let captured_node = node.map(|_| c.node);
         let capture = if index_only.is_some() {
-            env.vector((c.index, captured_node))?
+            env.cons(c.index, captured_node)?
         } else {
-            env.vector((&capture_names[c.index as usize], captured_node))?
+            env.cons(&capture_names[c.index as usize], captured_node)?
         };
         vec.push(capture);
     }
     vec_to_vector(env, vec)
 }
 
-/// Limit CURSOR's query executions to the byte range [BEG END].
+/// Limit CURSOR's query executions to the range of byte positions, from BEG to END.
 #[defun]
-fn set_byte_range(cursor: &mut QueryCursor, beg: usize, end: usize) -> Result<()> {
-    cursor.set_byte_range(beg, end);
+fn set_byte_range(cursor: &mut QueryCursor, beg: BytePos, end: BytePos) -> Result<()> {
+    cursor.set_byte_range(beg.into(), end.into());
     Ok(())
 }
 
-/// Limit CURSOR's query executions to the point range [BEG END].
+/// Limit CURSOR's query executions to the point range, from BEG to END.
+///
+/// A "point" in this context is a (LINE-NUMBER . BYTE-COLUMN) pair. See `ts-parse'
+/// for a more detailed explanation.
 #[defun]
 fn set_point_range(cursor: &mut QueryCursor, beg: Point, end: Point) -> Result<()> {
     cursor.set_point_range(beg.into(), end.into());

diff --git a/src/tree.rs b/src/tree.rs
@@ -24,22 +24,28 @@ fn root_node(tree: Borrowed<Tree>) -> Result<RNode> {
 
 /// Edit the syntax TREE to keep it in sync with source code that has been edited.
 ///
-/// You must describe the edit both in terms of byte offsets and in terms of
-/// `[ROW COLUMN]' coordinates, using zero-based indexing.
+/// You must describe the edit both in terms of byte positions and in terms of
+/// (LINE-NUMBER . BYTE-COLUMN) coordinates.
+///
+/// LINE-NUMBER should be the number returned by `line-number-at-pos', which counts
+/// from 1.
+///
+/// BYTE-COLUMN should count from 0, like Emacs's `current-column'. However, unlike
+/// that function, it should count bytes, instead of displayed glyphs.
 #[defun]
 fn edit_tree(
     tree: Borrowed<Tree>,
-    start_byte: usize,
-    old_end_byte: usize,
-    new_end_byte: usize,
+    start_bytepos: BytePos,
+    old_end_bytepos: BytePos,
+    new_end_bytepos: BytePos,
     start_point: Point,
     old_end_point: Point,
     new_end_point: Point,
 ) -> Result<()> {
     let edit = InputEdit {
-        start_byte,
-        old_end_byte,
-        new_end_byte,
+        start_byte: start_bytepos.into(),
+        old_end_byte: old_end_bytepos.into(),
+        new_end_byte: new_end_bytepos.into(),
         start_position: start_point.into(),
         old_end_position: old_end_point.into(),
         new_end_position: new_end_point.into(),
@@ -48,8 +54,6 @@ fn edit_tree(
     Ok(())
 }
 
-// TODO: walk_with_properties
-
 /// Compare an edited OLD-TREE to NEW-TREE, both representing the same document.
 ///
 /// This function returns a vector of ranges whose syntactic structure has changed.