From 902e5abaea73948384eadc0928b6666c2ec8cc24 Mon Sep 17 00:00:00 2001 From: Antony Blakey Date: Sat, 3 Feb 2024 21:27:51 +1100 Subject: [PATCH] Reduce cursor clone cost by sharing the ancestor path (#754) --- crates/codegen/parser/runtime/src/cursor.rs | 224 +++++++++--------- .../cargo/crate/src/generated/cursor.rs | 224 +++++++++--------- 2 files changed, 224 insertions(+), 224 deletions(-) diff --git a/crates/codegen/parser/runtime/src/cursor.rs b/crates/codegen/parser/runtime/src/cursor.rs index 25bdcb6c8b..429489e562 100644 --- a/crates/codegen/parser/runtime/src/cursor.rs +++ b/crates/codegen/parser/runtime/src/cursor.rs @@ -6,27 +6,22 @@ use crate::cst::{NamedNode, Node, RuleNode}; use crate::kinds::{FieldName, RuleKind, TokenKind}; use crate::text_index::{TextIndex, TextRange}; -/// A [`PathNode`] that points to a [`RuleNode`]. +/// A node in the ancestor path of a [`Cursor`]. #[derive(Clone, Debug, PartialEq, Eq)] -struct PathRuleNode { +struct PathAncestor { + parent: Option>, rule_node: Rc, child_number: usize, text_offset: TextIndex, } -impl PathRuleNode { - fn into_path_node(self) -> PathNode { - PathNode { - node: Node::Rule(self.rule_node), - child_number: self.child_number, - text_offset: self.text_offset, - } - } -} - -/// A pointer to a [`Node`] in a CST, used by the [`Cursor`] to implement the traversal. +/// A cursor that can traverse a CST. +/// +/// Nodes are visited in a DFS pre-order traversal. #[derive(Clone, Debug, PartialEq, Eq)] -struct PathNode { +pub struct Cursor { + /// The parent path of this cursor + parent: Option>, /// The node the cursor is currently pointing to. node: Node, /// The index of the current child node in the parent's children. @@ -34,40 +29,31 @@ struct PathNode { child_number: usize, /// Text offset that corresponds to the beginning of the currently pointed to node. text_offset: TextIndex, + /// Whether the cursor is completed, i.e. at the root node as a result of traversal (or when `complete`d). + /// If `true`, the cursor cannot be moved. + is_completed: bool, } -impl PathNode { - fn text_range(&self) -> TextRange { - let start = self.text_offset; - let end = start + self.node.text_len(); - start..end - } - - fn to_path_rule_node(&self) -> Option { +impl Cursor { + fn as_ancestor_node(&self) -> Option> { if let Node::Rule(rule_node) = &self.node { - Some(PathRuleNode { + Some(Rc::new(PathAncestor { + parent: self.parent.clone(), rule_node: rule_node.clone(), child_number: self.child_number, text_offset: self.text_offset, - }) + })) } else { None } } -} -/// A cursor that can traverse a CST. -/// -/// Nodes are visited in a DFS pre-order traversal. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct Cursor { - /// The list of ancestor rule nodes that the `current` node is a part of. - path: Vec, - /// The node the cursor is currently pointing to. - current: PathNode, - /// Whether the cursor is completed, i.e. at the root node as a result of traversal (or when `complete`d). - /// If `true`, the cursor cannot be moved. - is_completed: bool, + fn set_from_ancestor_node(&mut self, ancestor: &Rc) { + self.parent = ancestor.parent.clone(); + self.node = Node::Rule(ancestor.rule_node.clone()); + self.child_number = ancestor.child_number; + self.text_offset = ancestor.text_offset; + } } impl Iterator for Cursor { @@ -88,12 +74,10 @@ impl Iterator for Cursor { impl Cursor { pub(crate) fn new(node: Node, text_offset: TextIndex) -> Self { Self { - path: vec![], - current: PathNode { - node, - child_number: 0, - text_offset, - }, + parent: None, + node, + child_number: 0, + text_offset, is_completed: false, } } @@ -106,8 +90,12 @@ impl Cursor { /// Completes the cursor, setting it to the root node. pub fn complete(&mut self) { - if let Some(root) = self.path.drain(..).next() { - self.current = root.into_path_node(); + if let Some(parent) = &self.parent.clone() { + let mut parent = parent; + while let Some(grandparent) = &parent.parent { + parent = grandparent; + } + self.set_from_ancestor_node(parent); } self.is_completed = true; } @@ -118,9 +106,11 @@ impl Cursor { #[must_use] pub fn spawn(&self) -> Self { Self { - path: vec![], - current: self.current.clone(), is_completed: false, + parent: None, + node: self.node.clone(), + child_number: 0, + text_offset: self.text_offset, } } @@ -131,12 +121,12 @@ impl Cursor { /// Returns the currently pointed to [`Node`]. pub fn node(&self) -> Node { - self.current.node.clone() + self.node.clone() } pub fn node_name(&self) -> Option { - self.path.last().and_then(|parent| { - let this = &parent.rule_node.children[self.current.child_number]; + self.parent.as_ref().and_then(|parent| { + let this = &parent.rule_node.children[self.child_number]; this.name }) @@ -144,22 +134,51 @@ impl Cursor { /// Returns the text offset that corresponds to the beginning of the currently pointed to node. pub fn text_offset(&self) -> TextIndex { - self.current.text_offset + self.text_offset } /// Returns the text range that corresponds to the currently pointed to node. pub fn text_range(&self) -> TextRange { - self.current.text_range() + let start = self.text_offset; + let end = start + self.node.text_len(); + start..end } /// Returns the depth of the current node in the CST, i.e. the number of ancestors. pub fn depth(&self) -> usize { - self.path.len() + let mut depth = 0; + if let Some(parent) = &self.parent { + let mut parent = parent; + depth += 1; + while let Some(grandparent) = &parent.parent { + depth += 1; + parent = grandparent; + } + } + + depth } - /// Returns an iterator over the current node's ancestors, starting from the cursor root node. - pub fn ancestors(&self) -> impl Iterator> { - self.path.iter().map(|elem| &elem.rule_node) + /// Returns an iterator over the current node's ancestors, starting from the parent of the current node. + pub fn ancestors(&self) -> impl Iterator> { + struct Iter { + a: Option>, + } + impl Iterator for Iter { + type Item = Rc; + + fn next(&mut self) -> Option { + if let Some(a) = self.a.take() { + self.a = a.parent.clone(); + Some(a.rule_node.clone()) + } else { + None + } + } + } + Iter { + a: self.parent.clone(), + } } /// Attempts to go to current node's next one, according to the DFS pre-order traversal. @@ -213,8 +232,8 @@ impl Cursor { /// /// Returns `false` if the cursor is finished and at the root. pub fn go_to_parent(&mut self) -> bool { - if let Some(parent) = self.path.pop() { - self.current = parent.into_path_node(); + if let Some(parent) = &self.parent.clone() { + self.set_from_ancestor_node(parent); true } else { @@ -233,15 +252,11 @@ impl Cursor { } // If the current cursor is a node and it has children, go to first children - if let Some(parent) = self.current.to_path_rule_node() { - if let Some(child) = parent.rule_node.children.first().cloned() { - self.current = PathNode { - node: child.node, - text_offset: parent.text_offset, - child_number: 0, - }; - - self.path.push(parent); + if let Some(new_parent) = self.as_ancestor_node() { + if let Some(new_child) = new_parent.rule_node.children.first().cloned() { + self.parent = Some(new_parent); + self.node = new_child.node; + self.child_number = 0; return true; } @@ -251,26 +266,20 @@ impl Cursor { } /// Attempts to go to current node's last child. - + /// /// Returns `false` if the cursor is finished or there's no child to go to. pub fn go_to_last_child(&mut self) -> bool { if self.is_completed { return false; } - if let Some(parent) = self.current.to_path_rule_node() { - let child_number = parent.rule_node.children.len() - 1; - if let Some(child) = parent.rule_node.children.get(child_number).cloned() { + if let Some(new_parent) = self.as_ancestor_node() { + if let Some(new_child) = new_parent.rule_node.children.last().cloned() { + self.child_number = new_parent.rule_node.children.len() - 1; // This is cheaper than summing up the length of the children - let text_offset = parent.text_offset + parent.rule_node.text_len - child.text_len(); - - self.path.push(parent); - - self.current = PathNode { - node: child.node, - text_offset, - child_number, - }; + self.text_offset += new_parent.rule_node.text_len - new_child.text_len(); + self.node = new_child.node; + self.parent = Some(new_parent); return true; } @@ -287,22 +296,17 @@ impl Cursor { return false; } - if let Some(parent) = self.current.to_path_rule_node() { - if let Some(child) = parent.rule_node.children.get(child_number).cloned() { + if let Some(new_parent) = self.as_ancestor_node() { + if let Some(new_child) = new_parent.rule_node.children.get(child_number).cloned() { + self.node = new_child.node; + self.child_number = child_number; // Sum up the length of the children before this child // TODO: it might sometimes be quicker to start from the end (like `go_to_last_child`) - let text_offset = parent.text_offset - + parent.rule_node.children[..child_number] - .iter() - .map(|node| node.text_len()) - .sum(); - - self.path.push(parent); - self.current = PathNode { - node: child.node, - text_offset, - child_number, - }; + self.text_offset += new_parent.rule_node.children[..child_number] + .iter() + .map(|node| node.text_len()) + .sum(); + self.parent = Some(new_parent); return true; } @@ -319,14 +323,12 @@ impl Cursor { return false; } - if let Some(parent_path_element) = self.path.last() { - let new_child_number = self.current.child_number + 1; - if let Some(new_child) = parent_path_element.rule_node.children.get(new_child_number) { - self.current = PathNode { - node: new_child.node.clone(), - text_offset: self.current.text_offset + self.current.node.text_len(), - child_number: new_child_number, - }; + if let Some(parent) = &self.parent { + let new_child_number = self.child_number + 1; + if let Some(new_child) = parent.rule_node.children.get(new_child_number) { + self.text_offset += self.node.text_len(); + self.node = new_child.node.clone(); + self.child_number = new_child_number; return true; } @@ -343,16 +345,14 @@ impl Cursor { return false; } - if self.current.child_number > 0 { - if let Some(parent_path_element) = self.path.last() { - let new_child_number = self.current.child_number - 1; - let new_child = parent_path_element.rule_node.children[new_child_number].clone(); + if let Some(parent) = &self.parent { + if self.child_number > 0 { + let new_child_number = self.child_number - 1; + let new_child = &parent.rule_node.children[new_child_number]; + self.text_offset -= new_child.node.text_len(); + self.node = new_child.node.clone(); + self.child_number = new_child_number; - self.current = PathNode { - node: new_child.node, - text_offset: self.current.text_offset - self.current.node.text_len(), - child_number: new_child_number, - }; return true; } } @@ -404,7 +404,7 @@ impl Cursor { fn go_to_next_matching(&mut self, pred: impl Fn(&Node) -> bool) -> bool { while self.go_to_next() { - if pred(&self.current.node) { + if pred(&self.node) { return true; } } diff --git a/crates/solidity/outputs/cargo/crate/src/generated/cursor.rs b/crates/solidity/outputs/cargo/crate/src/generated/cursor.rs index aa49275c2f..dba7bb2924 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/cursor.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/cursor.rs @@ -8,27 +8,22 @@ use crate::cst::{NamedNode, Node, RuleNode}; use crate::kinds::{FieldName, RuleKind, TokenKind}; use crate::text_index::{TextIndex, TextRange}; -/// A [`PathNode`] that points to a [`RuleNode`]. +/// A node in the ancestor path of a [`Cursor`]. #[derive(Clone, Debug, PartialEq, Eq)] -struct PathRuleNode { +struct PathAncestor { + parent: Option>, rule_node: Rc, child_number: usize, text_offset: TextIndex, } -impl PathRuleNode { - fn into_path_node(self) -> PathNode { - PathNode { - node: Node::Rule(self.rule_node), - child_number: self.child_number, - text_offset: self.text_offset, - } - } -} - -/// A pointer to a [`Node`] in a CST, used by the [`Cursor`] to implement the traversal. +/// A cursor that can traverse a CST. +/// +/// Nodes are visited in a DFS pre-order traversal. #[derive(Clone, Debug, PartialEq, Eq)] -struct PathNode { +pub struct Cursor { + /// The parent path of this cursor + parent: Option>, /// The node the cursor is currently pointing to. node: Node, /// The index of the current child node in the parent's children. @@ -36,40 +31,31 @@ struct PathNode { child_number: usize, /// Text offset that corresponds to the beginning of the currently pointed to node. text_offset: TextIndex, + /// Whether the cursor is completed, i.e. at the root node as a result of traversal (or when `complete`d). + /// If `true`, the cursor cannot be moved. + is_completed: bool, } -impl PathNode { - fn text_range(&self) -> TextRange { - let start = self.text_offset; - let end = start + self.node.text_len(); - start..end - } - - fn to_path_rule_node(&self) -> Option { +impl Cursor { + fn as_ancestor_node(&self) -> Option> { if let Node::Rule(rule_node) = &self.node { - Some(PathRuleNode { + Some(Rc::new(PathAncestor { + parent: self.parent.clone(), rule_node: rule_node.clone(), child_number: self.child_number, text_offset: self.text_offset, - }) + })) } else { None } } -} -/// A cursor that can traverse a CST. -/// -/// Nodes are visited in a DFS pre-order traversal. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct Cursor { - /// The list of ancestor rule nodes that the `current` node is a part of. - path: Vec, - /// The node the cursor is currently pointing to. - current: PathNode, - /// Whether the cursor is completed, i.e. at the root node as a result of traversal (or when `complete`d). - /// If `true`, the cursor cannot be moved. - is_completed: bool, + fn set_from_ancestor_node(&mut self, ancestor: &Rc) { + self.parent = ancestor.parent.clone(); + self.node = Node::Rule(ancestor.rule_node.clone()); + self.child_number = ancestor.child_number; + self.text_offset = ancestor.text_offset; + } } impl Iterator for Cursor { @@ -90,12 +76,10 @@ impl Iterator for Cursor { impl Cursor { pub(crate) fn new(node: Node, text_offset: TextIndex) -> Self { Self { - path: vec![], - current: PathNode { - node, - child_number: 0, - text_offset, - }, + parent: None, + node, + child_number: 0, + text_offset, is_completed: false, } } @@ -108,8 +92,12 @@ impl Cursor { /// Completes the cursor, setting it to the root node. pub fn complete(&mut self) { - if let Some(root) = self.path.drain(..).next() { - self.current = root.into_path_node(); + if let Some(parent) = &self.parent.clone() { + let mut parent = parent; + while let Some(grandparent) = &parent.parent { + parent = grandparent; + } + self.set_from_ancestor_node(parent); } self.is_completed = true; } @@ -120,9 +108,11 @@ impl Cursor { #[must_use] pub fn spawn(&self) -> Self { Self { - path: vec![], - current: self.current.clone(), is_completed: false, + parent: None, + node: self.node.clone(), + child_number: 0, + text_offset: self.text_offset, } } @@ -133,12 +123,12 @@ impl Cursor { /// Returns the currently pointed to [`Node`]. pub fn node(&self) -> Node { - self.current.node.clone() + self.node.clone() } pub fn node_name(&self) -> Option { - self.path.last().and_then(|parent| { - let this = &parent.rule_node.children[self.current.child_number]; + self.parent.as_ref().and_then(|parent| { + let this = &parent.rule_node.children[self.child_number]; this.name }) @@ -146,22 +136,51 @@ impl Cursor { /// Returns the text offset that corresponds to the beginning of the currently pointed to node. pub fn text_offset(&self) -> TextIndex { - self.current.text_offset + self.text_offset } /// Returns the text range that corresponds to the currently pointed to node. pub fn text_range(&self) -> TextRange { - self.current.text_range() + let start = self.text_offset; + let end = start + self.node.text_len(); + start..end } /// Returns the depth of the current node in the CST, i.e. the number of ancestors. pub fn depth(&self) -> usize { - self.path.len() + let mut depth = 0; + if let Some(parent) = &self.parent { + let mut parent = parent; + depth += 1; + while let Some(grandparent) = &parent.parent { + depth += 1; + parent = grandparent; + } + } + + depth } - /// Returns an iterator over the current node's ancestors, starting from the cursor root node. - pub fn ancestors(&self) -> impl Iterator> { - self.path.iter().map(|elem| &elem.rule_node) + /// Returns an iterator over the current node's ancestors, starting from the parent of the current node. + pub fn ancestors(&self) -> impl Iterator> { + struct Iter { + a: Option>, + } + impl Iterator for Iter { + type Item = Rc; + + fn next(&mut self) -> Option { + if let Some(a) = self.a.take() { + self.a = a.parent.clone(); + Some(a.rule_node.clone()) + } else { + None + } + } + } + Iter { + a: self.parent.clone(), + } } /// Attempts to go to current node's next one, according to the DFS pre-order traversal. @@ -215,8 +234,8 @@ impl Cursor { /// /// Returns `false` if the cursor is finished and at the root. pub fn go_to_parent(&mut self) -> bool { - if let Some(parent) = self.path.pop() { - self.current = parent.into_path_node(); + if let Some(parent) = &self.parent.clone() { + self.set_from_ancestor_node(parent); true } else { @@ -235,15 +254,11 @@ impl Cursor { } // If the current cursor is a node and it has children, go to first children - if let Some(parent) = self.current.to_path_rule_node() { - if let Some(child) = parent.rule_node.children.first().cloned() { - self.current = PathNode { - node: child.node, - text_offset: parent.text_offset, - child_number: 0, - }; - - self.path.push(parent); + if let Some(new_parent) = self.as_ancestor_node() { + if let Some(new_child) = new_parent.rule_node.children.first().cloned() { + self.parent = Some(new_parent); + self.node = new_child.node; + self.child_number = 0; return true; } @@ -253,26 +268,20 @@ impl Cursor { } /// Attempts to go to current node's last child. - + /// /// Returns `false` if the cursor is finished or there's no child to go to. pub fn go_to_last_child(&mut self) -> bool { if self.is_completed { return false; } - if let Some(parent) = self.current.to_path_rule_node() { - let child_number = parent.rule_node.children.len() - 1; - if let Some(child) = parent.rule_node.children.get(child_number).cloned() { + if let Some(new_parent) = self.as_ancestor_node() { + if let Some(new_child) = new_parent.rule_node.children.last().cloned() { + self.child_number = new_parent.rule_node.children.len() - 1; // This is cheaper than summing up the length of the children - let text_offset = parent.text_offset + parent.rule_node.text_len - child.text_len(); - - self.path.push(parent); - - self.current = PathNode { - node: child.node, - text_offset, - child_number, - }; + self.text_offset += new_parent.rule_node.text_len - new_child.text_len(); + self.node = new_child.node; + self.parent = Some(new_parent); return true; } @@ -289,22 +298,17 @@ impl Cursor { return false; } - if let Some(parent) = self.current.to_path_rule_node() { - if let Some(child) = parent.rule_node.children.get(child_number).cloned() { + if let Some(new_parent) = self.as_ancestor_node() { + if let Some(new_child) = new_parent.rule_node.children.get(child_number).cloned() { + self.node = new_child.node; + self.child_number = child_number; // Sum up the length of the children before this child // TODO: it might sometimes be quicker to start from the end (like `go_to_last_child`) - let text_offset = parent.text_offset - + parent.rule_node.children[..child_number] - .iter() - .map(|node| node.text_len()) - .sum(); - - self.path.push(parent); - self.current = PathNode { - node: child.node, - text_offset, - child_number, - }; + self.text_offset += new_parent.rule_node.children[..child_number] + .iter() + .map(|node| node.text_len()) + .sum(); + self.parent = Some(new_parent); return true; } @@ -321,14 +325,12 @@ impl Cursor { return false; } - if let Some(parent_path_element) = self.path.last() { - let new_child_number = self.current.child_number + 1; - if let Some(new_child) = parent_path_element.rule_node.children.get(new_child_number) { - self.current = PathNode { - node: new_child.node.clone(), - text_offset: self.current.text_offset + self.current.node.text_len(), - child_number: new_child_number, - }; + if let Some(parent) = &self.parent { + let new_child_number = self.child_number + 1; + if let Some(new_child) = parent.rule_node.children.get(new_child_number) { + self.text_offset += self.node.text_len(); + self.node = new_child.node.clone(); + self.child_number = new_child_number; return true; } @@ -345,16 +347,14 @@ impl Cursor { return false; } - if self.current.child_number > 0 { - if let Some(parent_path_element) = self.path.last() { - let new_child_number = self.current.child_number - 1; - let new_child = parent_path_element.rule_node.children[new_child_number].clone(); + if let Some(parent) = &self.parent { + if self.child_number > 0 { + let new_child_number = self.child_number - 1; + let new_child = &parent.rule_node.children[new_child_number]; + self.text_offset -= new_child.node.text_len(); + self.node = new_child.node.clone(); + self.child_number = new_child_number; - self.current = PathNode { - node: new_child.node, - text_offset: self.current.text_offset - self.current.node.text_len(), - child_number: new_child_number, - }; return true; } } @@ -406,7 +406,7 @@ impl Cursor { fn go_to_next_matching(&mut self, pred: impl Fn(&Node) -> bool) -> bool { while self.go_to_next() { - if pred(&self.current.node) { + if pred(&self.node) { return true; } }