-
-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add bulk writing to indexer with custom memory directory
- Loading branch information
1 parent
78af882
commit d782e01
Showing
9 changed files
with
265 additions
and
22 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
use std::collections::BTreeMap; | ||
use std::fmt::{Debug, Formatter}; | ||
use std::mem; | ||
use std::ops::Deref; | ||
use std::path::{Path, PathBuf}; | ||
use std::sync::Arc; | ||
|
||
use bytes::{Bytes, BytesMut}; | ||
use parking_lot::Mutex; | ||
use tantivy::directory::error::{DeleteError, OpenReadError, OpenWriteError}; | ||
use tantivy::directory::{ | ||
AntiCallToken, | ||
FileHandle, | ||
OwnedBytes, | ||
TerminatingWrite, | ||
WatchCallback, | ||
WatchCallbackList, | ||
WatchHandle, | ||
WritePtr, | ||
}; | ||
|
||
type State = Arc<Mutex<BTreeMap<PathBuf, Bytes>>>; | ||
|
||
#[derive(Clone, Default)] | ||
/// A [tantivy::Directory] implementation that holds all data in memory. | ||
/// | ||
/// Unlike [tantivy::directory::RamDirectory], this directory allows you to | ||
/// retrieve the internal [Bytes] that make up the object allowing us to | ||
/// avoid some additional copies when writing segments. | ||
pub struct MemoryDirectory { | ||
inner: State, | ||
watch_callbacks: Arc<WatchCallbackList>, | ||
} | ||
|
||
impl MemoryDirectory { | ||
/// Gets the bytes for a file with the given path. | ||
pub fn get(&self, path: &Path) -> Result<Bytes, OpenReadError> { | ||
self.inner | ||
.lock() | ||
.get(path) | ||
.cloned() | ||
.ok_or_else(|| OpenReadError::FileDoesNotExist(path.to_path_buf())) | ||
} | ||
} | ||
|
||
impl Debug for MemoryDirectory { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { | ||
write!(f, "MemoryDirectory") | ||
} | ||
} | ||
|
||
impl tantivy::Directory for MemoryDirectory { | ||
fn get_file_handle( | ||
&self, | ||
path: &Path, | ||
) -> Result<Arc<dyn FileHandle>, OpenReadError> { | ||
let data = self.get(path)?; | ||
let handle = OwnedBytes::new(BytesWrapper(data)); | ||
Ok(Arc::new(handle)) | ||
} | ||
|
||
fn delete(&self, path: &Path) -> Result<(), DeleteError> { | ||
self.inner.lock().remove(path); | ||
Ok(()) | ||
} | ||
|
||
fn exists(&self, path: &Path) -> Result<bool, OpenReadError> { | ||
Ok(self.inner.lock().contains_key(path)) | ||
} | ||
|
||
fn open_write(&self, path: &Path) -> Result<WritePtr, OpenWriteError> { | ||
let writer = MemoryWriter { | ||
path: path.to_path_buf(), | ||
state: self.inner.clone(), | ||
inner_buffer: BytesMut::with_capacity(8 << 10), | ||
}; | ||
Ok(WritePtr::with_capacity(2 << 10, Box::new(writer))) | ||
} | ||
|
||
fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> { | ||
let data = self.get(path)?; | ||
Ok(data.to_vec()) | ||
} | ||
|
||
fn atomic_write(&self, path: &Path, data: &[u8]) -> std::io::Result<()> { | ||
self.inner | ||
.lock() | ||
.insert(path.to_path_buf(), Bytes::copy_from_slice(data)); | ||
Ok(()) | ||
} | ||
|
||
fn sync_directory(&self) -> std::io::Result<()> { | ||
self.watch_callbacks.broadcast(); | ||
Ok(()) | ||
} | ||
|
||
fn watch(&self, watch_callback: WatchCallback) -> tantivy::Result<WatchHandle> { | ||
Ok(self.watch_callbacks.subscribe(watch_callback)) | ||
} | ||
} | ||
|
||
struct MemoryWriter { | ||
path: PathBuf, | ||
inner_buffer: BytesMut, | ||
state: State, | ||
} | ||
|
||
impl std::io::Write for MemoryWriter { | ||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { | ||
self.inner_buffer.extend_from_slice(buf); | ||
Ok(buf.len()) | ||
} | ||
|
||
fn flush(&mut self) -> std::io::Result<()> { | ||
Ok(()) | ||
} | ||
} | ||
|
||
impl TerminatingWrite for MemoryWriter { | ||
fn terminate_ref(&mut self, _: AntiCallToken) -> std::io::Result<()> { | ||
let buffer = mem::take(&mut self.inner_buffer); | ||
self.state.lock().insert(self.path.clone(), buffer.freeze()); | ||
Ok(()) | ||
} | ||
} | ||
|
||
struct BytesWrapper(Bytes); | ||
|
||
impl Deref for BytesWrapper { | ||
type Target = [u8]; | ||
|
||
#[inline] | ||
fn deref(&self) -> &Self::Target { | ||
self.0.as_ref() | ||
} | ||
} | ||
|
||
unsafe impl stable_deref_trait::StableDeref for BytesWrapper {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
mod memory; | ||
|
||
pub use self::memory::MemoryDirectory; |
Oops, something went wrong.