Skip to content

Commit

Permalink
WIP: disktree iteration
Browse files Browse the repository at this point in the history
  • Loading branch information
JayKickliter committed Nov 2, 2023
1 parent 09d892c commit d73f7ba
Show file tree
Hide file tree
Showing 3 changed files with 267 additions and 1 deletion.
204 changes: 204 additions & 0 deletions src/disktree/iter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
use crate::{
cell::CellStack,
disktree::{dptr, tree::HDR_SZ},
error::Result,
Cell,
};
use byteorder::ReadBytesExt;
use std::io::{Read, Seek, SeekFrom};

pub trait Reader<R> {
/// This can be the serialized type or a result, depending on
/// fallability.
type T;
fn read(&self, rdr: &mut R) -> Self::T;
}

impl<R, T, F> Reader<R> for F
where
R: Read,
F: Fn(&mut R) -> T,
{
type T = T;
fn read(&self, rdr: &mut R) -> T {
self(rdr)
}
}

pub(crate) struct Iter<'a, R, F> {
cell_stack: CellStack,
curr: Option<(u8, u64)>,
rdr: &'a mut R,
recycle_bin: Vec<Vec<(u8, u64)>>,
stack: Vec<Vec<(u8, u64)>>,
f: F,
}

enum Node {
// File position for the fist byte of value data.
Leaf(u64),
// (H3 Cell digit, file position of child's node tag)
Parent(Vec<(u8, u64)>),
}

impl<'a, R, F> Iter<'a, R, F>
where
R: Seek + Read,
{
fn seek_to(&mut self, pos: u64) -> Result {
self.rdr.seek(SeekFrom::Start(pos))?;
Ok(())
}

fn read_base_nodes(rdr: &mut R) -> Result<Vec<(u8, u64)>> {
let mut buf = Vec::new();
rdr.seek(SeekFrom::Start(HDR_SZ))?;
for digit in 0..122 {
let dptr = dptr::read(rdr)?;
if dptr != dptr::DPTR_NULL {
buf.push((digit, dptr))
}
}
buf.reverse();
Ok(buf)
}

// `pos` is a position in the file of this node's tag.
fn read_node(&mut self, dptr: u64) -> Result<Node> {
self.seek_to(dptr)?;
let node_tag = self.rdr.read_u8()?;
let base_pos = self.rdr.stream_position()?;
assert!(node_tag == 0 || node_tag > 0b1000_0000);
if node_tag == 0 {
Ok(Node::Leaf(base_pos))
} else {
let mut buf = self.node_buf();
for digit in (0..8).rev() {
if node_tag & (1 << digit) != 0 {
let bit_cnt = (((node_tag as u16) << (8 - digit)) & 0xFF).count_ones();
let child_dptr_pos = base_pos + (bit_cnt as u64 * dptr::DPTR_SZ as u64);
self.seek_to(child_dptr_pos)?;
let child_dptr = dptr::read(&mut self.rdr)?;
buf.push((digit, child_dptr));
}
}
Ok(Node::Parent(buf))
}
}

fn node_buf(&mut self) -> Vec<(u8, u64)> {
let buf = self.recycle_bin.pop().unwrap_or_default();
assert!(buf.is_empty());
buf
}

fn recycle_buf(&mut self, buf: Vec<(u8, u64)>) {
assert!(buf.is_empty());
self.recycle_bin.push(buf);
}

pub(crate) fn new(rdr: &'a mut R, f: F) -> Result<Iter<'a, R, F>> {
let mut cell_stack = CellStack::new();
let mut stack = Vec::new();
let recycle_bin = Vec::new();
let mut base_nodes = Self::read_base_nodes(rdr)?;
let curr = base_nodes.pop();
stack.push(base_nodes);
if let Some((digit, _)) = curr {
cell_stack.push(digit);
}
Ok(Self {
cell_stack,
curr,
rdr,
recycle_bin,
stack,
f,
})
}
}

impl<'a, R, F> Iterator for Iter<'a, R, F>
where
R: Read + Seek,
F: Reader<R>,
{
type Item = Result<(Cell, F::T)>;

fn next(&mut self) -> Option<Self::Item> {
while self.curr.is_none() {
println!("a");
if let Some(mut dptrs) = self.stack.pop() {
println!("b");
self.cell_stack.pop();
if let Some((digit, dptr)) = dptrs.pop() {
println!("c");
self.cell_stack.push(digit as u8);
self.curr = Some((digit, dptr));
self.stack.push(dptrs);
} else {
println!("d");
self.recycle_buf(dptrs);
}
} else {
println!("e");
break;
}
}
while let Some((digit, dptr)) = self.curr {
self.cell_stack.swap(digit);
println!("f: {}", self.cell_stack.cell().unwrap().res());
match self.read_node(dptr) {
Ok(Node::Parent(mut children)) => {
if let Some((digit, dptr)) = children.pop() {
self.cell_stack.push(digit as u8);
self.curr = Some((digit, dptr));
self.stack.push(children);
} else {
self.recycle_buf(children)
}
}
Ok(Node::Leaf(dptr)) => {
self.curr = None;
if let Err(e) = self.seek_to(dptr) {
self.stack.clear();
self.curr = None;
return Some(Err(e));
}
let ret: Self::Item = Ok((
*self.cell_stack.cell().expect("corrupted cell-stack"),
self.f.read(self.rdr),
));
return Some(ret);
}
Err(e) => return Some(Err(e)),
};
}
None
// while let Some((digit, curr)) = self.curr {
// self.cell_stack.swap(digit as u8);
// match curr {
// Node::Parent(children) => {
// let mut iter = make_node_stack_iter(children.as_ref());
// self.curr = iter.next();
// // This branch is not 100% necessary, but I prefer
// // pushing an actual digit instead of 0 and
// // relying on the swap the further up to replace
// // it with the correct value.
// if let Some((digit, _)) = self.curr {
// self.cell_stack.push(digit as u8)
// }
// self.stack.push(iter);
// }
// Node::Leaf(value) => {
// self.curr = None;
// return Some((
// *self.cell_stack.cell().expect("corrupted cell-stack"),
// value,
// ));
// }
// }
// }
// None
}
}
44 changes: 44 additions & 0 deletions src/disktree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
pub use tree::DiskTree;

mod dptr;
mod iter;
mod tree;
mod writer;

Expand All @@ -11,6 +12,7 @@ mod tests {
use super::*;
use byteorder::{LittleEndian as LE, ReadBytesExt};
use serde::{Deserialize, Serialize};
use std::convert::TryFrom;

#[test]
fn test_roundtrip_monaco() {
Expand Down Expand Up @@ -62,4 +64,46 @@ mod tests {
assert_eq!(ht_cell, dt_cell);
}
}

#[test]
fn test_iter() {
use crate::{Cell, HexTreeMap};
let idx_bytes = include_bytes!("../../assets/monaco.res12.h3idx");
let rdr = &mut idx_bytes.as_slice();
let mut cells = Vec::new();
while let Ok(idx) = rdr.read_u64::<LE>() {
cells.push(Cell::from_raw(idx).unwrap());
}

// Construct map with a compactor that automatically combines
// cells with the same save value.
let mut monaco = HexTreeMap::new();

// Now extend the map with cells and a region value.
monaco.extend(cells.iter().copied().zip(cells.iter().copied()));

let file = tempfile::NamedTempFile::new().unwrap();
let (mut file, path) = file.keep().unwrap();
println!("disktree path: {path:?}");
monaco
.to_disktree(&mut file, |wtr, val| bincode::serialize_into(wtr, val))
.unwrap();
let mut monaco_disktree = DiskTree::from_reader(file).unwrap();

for (tree_cell, tree_val) in monaco.iter() {
let (disk_cell, rdr) = monaco_disktree.seek_to_cell(tree_cell).unwrap().unwrap();
assert_eq!(tree_cell, disk_cell);
let disk_val: Cell = bincode::deserialize_from(rdr).unwrap();
assert_eq!(*tree_val, disk_val);
}

let reader = |rdr: &mut std::fs::File| bincode::deserialize_from(rdr);
for item_res in monaco_disktree.iter(reader).unwrap() {
let (cell, value) = item_res.unwrap();
let value: u64 = value.unwrap();
let val_cell = Cell::try_from(value).unwrap();
println!("{cell:?} → {val_cell:?}");
assert_eq!(cell, val_cell);
}
}
}
20 changes: 19 additions & 1 deletion src/disktree/tree.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use crate::{
digits::Digits,
disktree::dptr::{self, DPTR_NULL, DPTR_SZ},
disktree::{
dptr::{self, DPTR_NULL, DPTR_SZ},
iter::{Iter, Reader},
},
error::{Error, Result},
Cell,
};
Expand Down Expand Up @@ -75,6 +78,21 @@ impl<R: Read + Seek> DiskTree<R> {
.map(|opt| opt.is_some())
}

/// Returns an iterator visiting all cell-value pairs in arbitrary
/// order.
///
/// However, insteading of returning the concrete value, the
/// iterator retuns a reader pre-seeked to the node's value.
pub fn iter<'a, F>(
&'a mut self,
f: F,
) -> Result<impl Iterator<Item = Result<(Cell, F::T)>> + 'a>
where
F: Reader<R> + 'a,
{
Iter::new(&mut self.0, f)
}

/// Leaf: | 0_u8 | bincode bytes |
/// Parent: | 1_u8 | Dptr | Dptr | Dptr | Dptr | Dptr | Dptr | Dptr |
fn _get(
Expand Down

0 comments on commit d73f7ba

Please sign in to comment.