diff --git a/kvdb-rocksdb/src/iter.rs b/kvdb-rocksdb/src/iter.rs index a8239bef8..4ea9a9e92 100644 --- a/kvdb-rocksdb/src/iter.rs +++ b/kvdb-rocksdb/src/iter.rs @@ -17,11 +17,17 @@ //! This module contains an implementation of a RocksDB iterator //! wrapped inside a `RwLock`. Since `RwLock` "owns" the inner data, //! we're using `owning_ref` to work around the borrowing rules of Rust. +//! +//! Note: this crate does not use "Prefix Seek" mode which means that the prefix iterator +//! will return keys not starting with the given prefix as well (as long as `key >= prefix`). +//! To work around this we filter the data returned by rocksdb to ensure that +//! all data yielded by the iterator does start with the given prefix. +//! See https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes for details. use crate::DBAndColumns; use owning_ref::{OwningHandle, StableAddress}; use parking_lot::RwLockReadGuard; -use rocksdb::{DBIterator, IteratorMode}; +use rocksdb::{DBIterator, Direction, IteratorMode, ReadOptions}; use std::ops::{Deref, DerefMut}; /// A tuple holding key and value data, used as the iterator item type. @@ -78,22 +84,35 @@ pub trait IterationHandler { /// Create an `Iterator` over the default DB column or over a `ColumnFamily` if a column number /// is passed. - fn iter(&self, col: u32) -> Self::Iterator; + /// In addition to a read lock and a column index, it takes a ref to the same `ReadOptions` we + /// pass to the `get` method. + fn iter(&self, col: u32, read_opts: &ReadOptions) -> Self::Iterator; /// Create an `Iterator` over the default DB column or over a `ColumnFamily` if a column number /// is passed. The iterator starts from the first key having the provided `prefix`. - fn iter_from_prefix(&self, col: u32, prefix: &[u8]) -> Self::Iterator; + /// In addition to a read lock and a column index, it takes a ref to the same `ReadOptions` we + /// pass to the `get` method. + fn iter_from_prefix(&self, col: u32, prefix: &[u8], read_opts: &ReadOptions) -> Self::Iterator; } impl<'a, T> ReadGuardedIterator<'a, <&'a T as IterationHandler>::Iterator, T> where &'a T: IterationHandler, { - pub fn new(read_lock: RwLockReadGuard<'a, Option>, col: u32) -> Self { - Self { inner: Self::new_inner(read_lock, |db| db.iter(col)) } + /// Creates a new `ReadGuardedIterator` that maps `RwLock` to `RwLock`, + /// where `DBIterator` iterates over all keys. + pub fn new(read_lock: RwLockReadGuard<'a, Option>, col: u32, read_opts: &ReadOptions) -> Self { + Self { inner: Self::new_inner(read_lock, |db| db.iter(col, read_opts)) } } - pub fn new_from_prefix(read_lock: RwLockReadGuard<'a, Option>, col: u32, prefix: &[u8]) -> Self { - Self { inner: Self::new_inner(read_lock, |db| db.iter_from_prefix(col, prefix)) } + /// Creates a new `ReadGuardedIterator` that maps `RwLock` to `RwLock`, + /// where `DBIterator` iterates over keys >= prefix. + pub fn new_from_prefix( + read_lock: RwLockReadGuard<'a, Option>, + col: u32, + prefix: &[u8], + read_opts: &ReadOptions, + ) -> Self { + Self { inner: Self::new_inner(read_lock, |db| db.iter_from_prefix(col, prefix, read_opts)) } } fn new_inner( @@ -110,11 +129,15 @@ where impl<'a> IterationHandler for &'a DBAndColumns { type Iterator = DBIterator<'a>; - fn iter(&self, col: u32) -> Self::Iterator { - self.db.iterator_cf(self.cf(col as usize), IteratorMode::Start).expect("iterator params are valid; qed") + fn iter(&self, col: u32, read_opts: &ReadOptions) -> Self::Iterator { + self.db + .iterator_cf_opt(self.cf(col as usize), read_opts, IteratorMode::Start) + .expect("iterator params are valid; qed") } - fn iter_from_prefix(&self, col: u32, prefix: &[u8]) -> Self::Iterator { - self.db.prefix_iterator_cf(self.cf(col as usize), prefix).expect("iterator params are valid; qed") + fn iter_from_prefix(&self, col: u32, prefix: &[u8], read_opts: &ReadOptions) -> Self::Iterator { + self.db + .iterator_cf_opt(self.cf(col as usize), read_opts, IteratorMode::From(prefix, Direction::Forward)) + .expect("iterator params are valid; qed") } } diff --git a/kvdb-rocksdb/src/lib.rs b/kvdb-rocksdb/src/lib.rs index 760b9177e..a658902c9 100644 --- a/kvdb-rocksdb/src/lib.rs +++ b/kvdb-rocksdb/src/lib.rs @@ -520,7 +520,7 @@ impl Database { overlay_data }; - let guarded = iter::ReadGuardedIterator::new(read_lock, col); + let guarded = iter::ReadGuardedIterator::new(read_lock, col, &self.read_opts); Some(interleave_ordered(overlay_data, guarded)) } else { None @@ -534,12 +534,14 @@ impl Database { fn iter_from_prefix<'a>(&'a self, col: u32, prefix: &'a [u8]) -> impl Iterator + 'a { let read_lock = self.db.read(); let optional = if read_lock.is_some() { - let guarded = iter::ReadGuardedIterator::new_from_prefix(read_lock, col, prefix); + let guarded = iter::ReadGuardedIterator::new_from_prefix(read_lock, col, prefix, &self.read_opts); Some(interleave_ordered(Vec::new(), guarded)) } else { None }; - // workaround for https://github.com/facebook/rocksdb/issues/2343 + // We're not using "Prefix Seek" mode, so the iterator will return + // keys not starting with the given prefix as well, + // see https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes optional.into_iter().flat_map(identity).filter(move |(k, _)| k.starts_with(prefix)) }