Add Node::unparse and tweak the cursor creation API (#666)

Closes #583 Ref #628 for the modified cursor/offset API Changes are best reviewed commit-by-commit.
NomicFoundation · Nov 22, 2023 · 0434b68 · 0434b68
1 parent 54af80d
commit 0434b68
Show file tree

Hide file tree

Showing 28 changed files with 249 additions and 146 deletions.
diff --git a/.changeset/strange-hats-itch.md b/.changeset/strange-hats-itch.md
@@ -0,0 +1,5 @@
+---
+"@nomicfoundation/slang": minor
+---
+
+Add `Node::unparse()` that allows to reconstruct the source code from the CST node
diff --git a/.cspell.json b/.cspell.json
@@ -19,6 +19,7 @@
     "structs",
     "tera",
     "ufixed",
+    "unparse",
     "usize"
   ]
 }
diff --git a/crates/codegen/parser/runtime/src/cst.rs b/crates/codegen/parser/runtime/src/cst.rs
@@ -54,63 +54,83 @@ impl Node {
         }
     }
 
-    pub fn create_cursor(&self, text_offset: TextIndex) -> Cursor {
+    /// Creates a [`Cursor`] that starts at the current node as the root and a given initial `text_offset`.
+    pub fn cursor_with_offset(&self, text_offset: TextIndex) -> Cursor {
         Cursor::new(self.clone(), text_offset)
     }
 
+    /// Reconstructs the original source code from the parse tree.
+    pub fn unparse(self) -> String {
+        match self {
+            Self::Rule(rule) => rule.unparse(),
+            Self::Token(token) => token.text.clone(),
+        }
+    }
+
     pub fn as_rule(&self) -> Option<&Rc<RuleNode>> {
         match self {
             Self::Rule(node) => Some(node),
             _ => None,
         }
     }
 
+    pub fn into_rule(self) -> Option<Rc<RuleNode>> {
+        match self {
+            Self::Rule(node) => Some(node),
+            _ => None,
+        }
+    }
+
     pub fn as_token(&self) -> Option<&Rc<TokenNode>> {
         match self {
             Self::Token(node) => Some(node),
             _ => None,
         }
     }
 
-    pub fn as_token_with_kind(&self, kinds: &[TokenKind]) -> Option<&Rc<TokenNode>> {
-        if let Node::Token(token_node) = self {
-            if kinds.contains(&token_node.kind) {
-                return Some(token_node);
-            }
+    pub fn into_token(self) -> Option<Rc<TokenNode>> {
+        match self {
+            Self::Token(node) => Some(node),
+            _ => None,
         }
-        return None;
     }
 
-    pub fn as_token_matching<F: Fn(&Rc<TokenNode>) -> bool>(
-        &self,
-        predicate: F,
-    ) -> Option<&Rc<TokenNode>> {
-        if let Node::Token(token_node) = self {
-            if predicate(&token_node) {
-                return Some(token_node);
-            }
-        }
-        return None;
+    pub fn as_token_with_kind(&self, kinds: &[TokenKind]) -> Option<&Rc<TokenNode>> {
+        self.as_token().filter(|token| kinds.contains(&token.kind))
     }
 
     pub fn as_rule_with_kind(&self, kinds: &[RuleKind]) -> Option<&Rc<RuleNode>> {
-        if let Node::Rule(rule_node) = self {
-            if kinds.contains(&rule_node.kind) {
-                return Some(rule_node);
-            }
-        }
-        return None;
+        self.as_rule().filter(|rule| kinds.contains(&rule.kind))
     }
+}
 
-    pub fn as_rule_matching<F: Fn(&Rc<RuleNode>) -> bool>(
-        &self,
-        predicate: F,
-    ) -> Option<&Rc<RuleNode>> {
-        if let Node::Rule(rule_node) = self {
-            if predicate(&rule_node) {
-                return Some(rule_node);
-            }
-        }
-        return None;
+impl From<Rc<RuleNode>> for Node {
+    fn from(node: Rc<RuleNode>) -> Self {
+        Self::Rule(node)
+    }
+}
+
+impl From<Rc<TokenNode>> for Node {
+    fn from(node: Rc<TokenNode>) -> Self {
+        Self::Token(node)
+    }
+}
+
+impl RuleNode {
+    /// Creates a [`Cursor`] that starts at the current node as the root and a given initial `text_offset`.
+    pub fn cursor_with_offset(self: Rc<Self>, text_offset: TextIndex) -> Cursor {
+        Cursor::new(Node::Rule(self), text_offset)
+    }
+
+    /// Reconstructs the original source code from the parse tree.
+    pub fn unparse(self: Rc<Self>) -> String {
+        let acc = String::with_capacity(self.text_len.utf8);
+
+        self.cursor_with_offset(TextIndex::ZERO)
+            .filter_map(Node::into_token)
+            .fold(acc, |mut acc, token| {
+                acc.push_str(&token.text);
+                acc
+            })
     }
 }
diff --git a/crates/codegen/parser/runtime/src/cursor.rs b/crates/codegen/parser/runtime/src/cursor.rs
@@ -370,7 +370,7 @@ impl Cursor {
         None
     }
 
-    /// In contract to `Iterator::find_*`, this does not consume the first item when found.
+    /// In contrast to `Iterator::find_*`, this does not consume the first item when found.
     fn find_noconsume<F: Fn(&Node) -> Option<R>, R>(&mut self, predicate: F) -> Option<R> {
         while !self.is_completed {
             match predicate(&self.current.node) {
@@ -398,7 +398,7 @@ impl Cursor {
         &mut self,
         predicate: F,
     ) -> Option<Rc<TokenNode>> {
-        self.find_noconsume(|node| node.as_token_matching(&predicate).cloned())
+        self.find_noconsume(|node| node.as_token().filter(|node| predicate(node)).cloned())
     }
 
     /// Finds the first rule node with either of the given kinds.
@@ -415,6 +415,6 @@ impl Cursor {
         &mut self,
         predicate: F,
     ) -> Option<Rc<RuleNode>> {
-        self.find_noconsume(|node| node.as_rule_matching(&predicate).cloned())
+        self.find_noconsume(|node| node.as_rule().filter(|node| predicate(node)).cloned())
     }
 }
diff --git a/crates/codegen/parser/runtime/src/napi/napi_cst.rs b/crates/codegen/parser/runtime/src/napi/napi_cst.rs
@@ -54,7 +54,7 @@ impl RuleNode {
     #[napi(ts_return_type = "cursor.Cursor")]
     pub fn create_cursor(&self, text_offset: TextIndex) -> Cursor {
         RustNode::Rule(self.0.clone())
-            .create_cursor((&text_offset).into())
+            .cursor_with_offset((&text_offset).into())
             .into()
     }
 }
@@ -88,7 +88,7 @@ impl TokenNode {
     #[napi(ts_return_type = "cursor.Cursor")]
     pub fn create_cursor(&self, text_offset: TextIndex) -> Cursor {
         RustNode::Token(self.0.clone())
-            .create_cursor((&text_offset).into())
+            .cursor_with_offset((&text_offset).into())
             .into()
     }
 }

diff --git a/crates/codegen/parser/runtime/src/parse_output.rs b/crates/codegen/parser/runtime/src/parse_output.rs
@@ -1,4 +1,4 @@
-use crate::{cst, cursor::Cursor, parse_error::ParseError};
+use crate::{cst, cursor::Cursor, parse_error::ParseError, text_index::TextIndex};
 
 #[derive(Debug, PartialEq)]
 pub struct ParseOutput {
@@ -21,6 +21,6 @@ impl ParseOutput {
 
     /// Creates a cursor that starts at the root of the parse tree.
     pub fn create_tree_cursor(&self) -> Cursor {
-        return self.parse_tree.create_cursor(Default::default());
+        return self.parse_tree.cursor_with_offset(TextIndex::ZERO);
     }
 }
diff --git a/crates/codegen/parser/runtime/src/support/choice_helper.rs b/crates/codegen/parser/runtime/src/support/choice_helper.rs
@@ -146,7 +146,7 @@ pub fn total_not_skipped_span(result: &ParserResult) -> usize {
 
     nodes
         .iter()
-        .flat_map(|node| cst::Node::create_cursor(node, Default::default()))
+        .flat_map(|node| cst::Node::cursor_with_offset(node, TextIndex::ZERO))
         .filter_map(|node| match node {
             cst::Node::Token(token) if token.kind != TokenKind::SKIPPED => Some(token.text.len()),
             _ => None,

diff --git a/crates/codegen/parser/runtime/src/support/parser_function.rs b/crates/codegen/parser/runtime/src/support/parser_function.rs
@@ -1,7 +1,10 @@
 use std::rc::Rc;
 
 use super::{
-    super::{cst, kinds::TokenKind, parse_error::ParseError, parse_output::ParseOutput},
+    super::{
+        cst, kinds::TokenKind, parse_error::ParseError, parse_output::ParseOutput,
+        text_index::TextIndex,
+    },
     context::ParserContext,
     parser_result::*,
 };
@@ -95,7 +98,7 @@ where
                     debug_assert_eq!(
                         errors.len() > 0,
                         parse_tree
-                            .create_cursor(Default::default())
+                            .cursor_with_offset(TextIndex::ZERO)
                             .any(|x| x.as_token_with_kind(&[TokenKind::SKIPPED]).is_some())
                     );
 

diff --git a/crates/codegen/parser/runtime/src/support/parser_result.rs b/crates/codegen/parser/runtime/src/support/parser_result.rs
@@ -1,4 +1,4 @@
-use super::super::{cst, kinds::*};
+use super::super::{cst, kinds::*, text_index::TextIndex};
 
 #[derive(PartialEq, Eq, Clone, Debug)]
 pub enum ParserResult {
@@ -93,7 +93,7 @@ impl Match {
     pub fn is_full_recursive(&self) -> bool {
         self.nodes
             .iter()
-            .flat_map(|node| cst::Node::create_cursor(node, Default::default()))
+            .flat_map(|node| cst::Node::cursor_with_offset(node, TextIndex::ZERO))
             .all(|node| node.as_token_with_kind(&[TokenKind::SKIPPED]).is_none())
     }
 }

diff --git a/crates/codegen/parser/runtime/src/text_index.rs b/crates/codegen/parser/runtime/src/text_index.rs
@@ -12,6 +12,15 @@ pub struct TextIndex {
     pub char: usize,
 }
 
+impl TextIndex {
+    /// Shorthand for `TextIndex { utf8: 0, utf16: 0, char: 0 }`.
+    pub const ZERO: TextIndex = TextIndex {
+        utf8: 0,
+        utf16: 0,
+        char: 0,
+    };
+}
+
 impl PartialOrd for TextIndex {
     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
         self.utf8.partial_cmp(&other.utf8)

diff --git a/crates/solidity/outputs/cargo/crate/src/generated/cst.rs b/crates/solidity/outputs/cargo/crate/src/generated/cst.rs
diff --git a/crates/solidity/outputs/cargo/crate/src/generated/cursor.rs b/crates/solidity/outputs/cargo/crate/src/generated/cursor.rs