Skip to content

Commit

Permalink
feat: add search methods to Repository (#212)
Browse files Browse the repository at this point in the history
Adds `Repository` methods to find a given path in multiple trees.

`find_nodes_from_path`: Searches for an explicitly given path
`find_matching_nodes`: Searches using an arbitrary matching criterion
  • Loading branch information
aawsome authored Apr 30, 2024
1 parent 6495674 commit f3ad6e9
Show file tree
Hide file tree
Showing 18 changed files with 496 additions and 49 deletions.
1 change: 1 addition & 0 deletions crates/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ xattr = "1"
[dev-dependencies]
expect-test = "1.4.1"
flate2 = "1.0.28"
globset = "0.4.14"
insta = { version = "1.36.1", features = ["redactions", "ron"] }
mockall = "0.12.1"
pretty_assertions = "1.4.0"
Expand Down
10 changes: 6 additions & 4 deletions crates/core/src/backend/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ use crate::error::NodeErrorKind;

use crate::id::Id;

#[derive(Default, Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Constructor)]
#[derive(
Default, Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Constructor, PartialOrd, Ord,
)]
/// A node within the tree hierarchy
pub struct Node {
/// Name of the node: filename or dirname.
Expand Down Expand Up @@ -63,7 +65,7 @@ pub struct Node {
}

#[serde_as]
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
#[serde(tag = "type", rename_all = "lowercase")]
/// Types a [`Node`] can have with type-specific additional information
pub enum NodeType {
Expand Down Expand Up @@ -190,7 +192,7 @@ impl Default for NodeType {
Option => #[serde(default, skip_serializing_if = "Option::is_none")],
u64 => #[serde(default, skip_serializing_if = "is_default")],
)]
#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq, Eq)]
#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct Metadata {
/// Unix file mode
pub mode: Option<u32>,
Expand Down Expand Up @@ -247,7 +249,7 @@ where
}

/// Extended attribute of a [`Node`]
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Ord)]
pub struct ExtendedAttribute {
/// Name of the extended attribute
pub name: String,
Expand Down
192 changes: 191 additions & 1 deletion crates/core/src/blob/tree.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{
cmp::Ordering,
collections::{BTreeSet, BinaryHeap},
collections::{BTreeMap, BTreeSet, BinaryHeap},
ffi::{OsStr, OsString},
mem,
path::{Component, Path, PathBuf, Prefix},
Expand Down Expand Up @@ -158,6 +158,196 @@ impl Tree {

Ok(node)
}

pub(crate) fn find_nodes_from_path(
be: &impl DecryptReadBackend,
index: &impl ReadGlobalIndex,
ids: impl IntoIterator<Item = Id>,
path: &Path,
) -> RusticResult<FindNode> {
// helper function which is recursively called
fn find_node_from_component(
be: &impl DecryptReadBackend,
index: &impl ReadGlobalIndex,
tree_id: Id,
path_comp: &[OsString],
results_cache: &mut [BTreeMap<Id, Option<usize>>],
nodes: &mut BTreeMap<Node, usize>,
idx: usize,
) -> RusticResult<Option<usize>> {
if let Some(result) = results_cache[idx].get(&tree_id) {
return Ok(*result);
}

let tree = Tree::from_backend(be, index, tree_id)?;
let result = if let Some(node) = tree
.nodes
.into_iter()
.find(|node| node.name() == path_comp[idx])
{
if idx == path_comp.len() - 1 {
let new_idx = nodes.len();
let node_idx = nodes.entry(node).or_insert(new_idx);
Some(*node_idx)
} else {
let id = node
.subtree
.ok_or_else(|| TreeErrorKind::NotADirectory(path_comp[idx].clone()))?;

find_node_from_component(
be,
index,
id,
path_comp,
results_cache,
nodes,
idx + 1,
)?
}
} else {
None
};
_ = results_cache[idx].insert(tree_id, result);
Ok(result)
}

let path_comp: Vec<_> = path
.components()
.filter_map(|p| comp_to_osstr(p).transpose())
.collect::<RusticResult<_>>()?;

// caching all results
let mut results_cache = vec![BTreeMap::new(); path_comp.len()];
let mut nodes = BTreeMap::new();

let matches: Vec<_> = ids
.into_iter()
.map(|id| {
find_node_from_component(
be,
index,
id,
&path_comp,
&mut results_cache,
&mut nodes,
0,
)
})
.collect::<RusticResult<_>>()?;

// sort nodes by index and return a Vec
let mut nodes: Vec<_> = nodes.into_iter().collect();
nodes.sort_unstable_by_key(|n| n.1);
let nodes = nodes.into_iter().map(|n| n.0).collect();

Ok(FindNode { nodes, matches })
}

pub(crate) fn find_matching_nodes(
be: &impl DecryptReadBackend,
index: &impl ReadGlobalIndex,
ids: impl IntoIterator<Item = Id>,
matches: &impl Fn(&Path, &Node) -> bool,
) -> RusticResult<FindMatches> {
// internal state used to save match information in find_matching_nodes
#[derive(Default)]
struct MatchInternalState {
// we cache all results
cache: BTreeMap<(Id, PathBuf), Vec<(usize, usize)>>,
nodes: BTreeMap<Node, usize>,
paths: BTreeMap<PathBuf, usize>,
}

impl MatchInternalState {
fn insert_result(&mut self, path: PathBuf, node: Node) -> (usize, usize) {
let new_idx = self.nodes.len();
let node_idx = self.nodes.entry(node).or_insert(new_idx);
let new_idx = self.paths.len();
let node_path_idx = self.paths.entry(path).or_insert(new_idx);
(*node_path_idx, *node_idx)
}
}

// helper function which is recursively called
fn find_matching_nodes_recursive(
be: &impl DecryptReadBackend,
index: &impl ReadGlobalIndex,
tree_id: Id,
path: &Path,
state: &mut MatchInternalState,
matches: &impl Fn(&Path, &Node) -> bool,
) -> RusticResult<Vec<(usize, usize)>> {
let mut result = Vec::new();
if let Some(result) = state.cache.get(&(tree_id, path.to_path_buf())) {
return Ok(result.clone());
}

let tree = Tree::from_backend(be, index, tree_id)?;
for node in tree.nodes {
let node_path = path.join(node.name());
if node.is_dir() {
let id = node
.subtree
.ok_or_else(|| TreeErrorKind::NotADirectory(node.name()))?;
result.append(&mut find_matching_nodes_recursive(
be, index, id, &node_path, state, matches,
)?);
}
if matches(&node_path, &node) {
result.push(state.insert_result(node_path, node));
}
}
_ = state
.cache
.insert((tree_id, path.to_path_buf()), result.clone());
Ok(result)
}

let mut state = MatchInternalState::default();

let initial_path = PathBuf::new();
let matches: Vec<_> = ids
.into_iter()
.map(|id| {
find_matching_nodes_recursive(be, index, id, &initial_path, &mut state, matches)
})
.collect::<RusticResult<_>>()?;

// sort paths by index and return a Vec
let mut paths: Vec<_> = state.paths.into_iter().collect();
paths.sort_unstable_by_key(|n| n.1);
let paths = paths.into_iter().map(|n| n.0).collect();

// sort nodes by index and return a Vec
let mut nodes: Vec<_> = state.nodes.into_iter().collect();
nodes.sort_unstable_by_key(|n| n.1);
let nodes = nodes.into_iter().map(|n| n.0).collect();
Ok(FindMatches {
paths,
nodes,
matches,
})
}
}

/// Results from `find_node_from_path`
#[derive(Debug, Serialize)]
pub struct FindNode {
/// found nodes for the given path
pub nodes: Vec<Node>,
/// found nodes for all given snapshots. usize is the index of the node
pub matches: Vec<Option<usize>>,
}

/// Results from `find_matching_nodes`
#[derive(Debug, Serialize)]
pub struct FindMatches {
/// found matching paths
pub paths: Vec<PathBuf>,
/// found matching nodes
pub nodes: Vec<Node>,
/// found paths/nodes for all given snapshots. (usize,usize) is the path / node index
pub matches: Vec<Vec<(usize, usize)>>,
}

/// Converts a [`Component`] to an [`OsString`].
Expand Down
2 changes: 1 addition & 1 deletion crates/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ pub use crate::{
FileType, ReadBackend, ReadSource, ReadSourceEntry, ReadSourceOpen, RepositoryBackends,
WriteBackend, ALL_FILE_TYPES,
},
blob::tree::TreeStreamerOptions as LsOptions,
blob::tree::{FindMatches, FindNode, TreeStreamerOptions as LsOptions},
commands::{
backup::{BackupOptions, ParentOptions},
check::CheckOptions,
Expand Down
36 changes: 35 additions & 1 deletion crates/core/src/repository.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::{
FileType, ReadBackend, WriteBackend,
},
blob::{
tree::{NodeStreamer, TreeStreamerOptions as LsOptions},
tree::{FindMatches, FindNode, NodeStreamer, TreeStreamerOptions as LsOptions},
BlobType,
},
commands::{
Expand Down Expand Up @@ -1499,6 +1499,40 @@ impl<P, S: IndexedTree> Repository<P, S> {
pub fn node_from_path(&self, root_tree: Id, path: &Path) -> RusticResult<Node> {
Tree::node_from_path(self.dbe(), self.index(), root_tree, Path::new(path))
}

/// Get all [`Node`]s from given root trees and a path
///
/// # Arguments
///
/// * `ids` - The tree ids to search in
/// * `path` - The path
///
/// # Errors
/// if loading trees from the backend fails
pub fn find_nodes_from_path(
&self,
ids: impl IntoIterator<Item = Id>,
path: &Path,
) -> RusticResult<FindNode> {
Tree::find_nodes_from_path(self.dbe(), self.index(), ids, path)
}

/// Get all [`Node`]s/[`Path`]s from given root trees and a matching criterion
///
/// # Arguments
///
/// * `ids` - The tree ids to search in
/// * `matches` - The matching criterion
///
/// # Errors
/// if loading trees from the backend fails
pub fn find_matching_nodes(
&self,
ids: impl IntoIterator<Item = Id>,
matches: &impl Fn(&Path, &Node) -> bool,
) -> RusticResult<FindMatches> {
Tree::find_matching_nodes(self.dbe(), self.index(), ids, matches)
}
}

impl<P: ProgressBars, S: IndexedTree> Repository<P, S> {
Expand Down
Loading

0 comments on commit f3ad6e9

Please sign in to comment.