Skip to content

Commit

Permalink
Removed needless allocation, split serialize into serialize and clear.
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Dec 22, 2022
1 parent ca0d361 commit dd27378
Show file tree
Hide file tree
Showing 11 changed files with 134 additions and 75 deletions.
6 changes: 5 additions & 1 deletion src/termdict/sstable_termdict/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub struct TermInfoReader {
impl ValueReader for TermInfoReader {
type Value = TermInfo;

#[inline(always)]
fn value(&self, idx: usize) -> &TermInfo {
&self.term_infos[idx]
}
Expand Down Expand Up @@ -86,7 +87,7 @@ impl ValueWriter for TermInfoWriter {
self.term_infos.push(term_info.clone());
}

fn serialize_block(&mut self, buffer: &mut Vec<u8>) {
fn serialize_block(&self, buffer: &mut Vec<u8>) {
VInt(self.term_infos.len() as u64).serialize_into_vec(buffer);
if self.term_infos.is_empty() {
return;
Expand All @@ -98,6 +99,9 @@ impl ValueWriter for TermInfoWriter {
VInt(term_info.postings_range.len() as u64).serialize_into_vec(buffer);
VInt(term_info.positions_range.len() as u64).serialize_into_vec(buffer);
}
}

fn clear(&mut self) {
self.term_infos.clear();
}
}
Expand Down
8 changes: 4 additions & 4 deletions sstable/benches/stream_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use common::file_slice::FileSlice;
use criterion::{criterion_group, criterion_main, Criterion};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use tantivy_sstable::{self, Dictionary, SSTableMonotonicU64};
use tantivy_sstable::{self, Dictionary, MonotonicU64SSTable};

const CHARSET: &'static [u8] = b"abcdefghij";

Expand All @@ -19,13 +19,13 @@ fn generate_key(rng: &mut impl Rng) -> String {
.collect()
}

fn prepare_sstable() -> io::Result<Dictionary<SSTableMonotonicU64>> {
fn prepare_sstable() -> io::Result<Dictionary<MonotonicU64SSTable>> {
let mut rng = StdRng::from_seed([3u8; 32]);
let mut els = BTreeSet::new();
while els.len() < 100_000 {
els.insert(generate_key(&mut rng));
}
let mut dictionary_builder = Dictionary::<SSTableMonotonicU64>::builder(Vec::new())?;
let mut dictionary_builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new())?;
for (ord, word) in els.iter().enumerate() {
dictionary_builder.insert(word, &(ord as u64))?;
}
Expand All @@ -35,7 +35,7 @@ fn prepare_sstable() -> io::Result<Dictionary<SSTableMonotonicU64>> {
}

fn stream_bench(
dictionary: &Dictionary<SSTableMonotonicU64>,
dictionary: &Dictionary<MonotonicU64SSTable>,
lower: &[u8],
upper: &[u8],
do_scan: bool,
Expand Down
8 changes: 6 additions & 2 deletions sstable/src/block_reader.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::io;
use std::ops::Range;

pub struct BlockReader<'a> {
buffer: Vec<u8>,
Expand Down Expand Up @@ -29,8 +30,8 @@ impl<'a> BlockReader<'a> {
}

#[inline(always)]
pub fn buffer_from_to(&self, start: usize, end: usize) -> &[u8] {
&self.buffer[start..end]
pub fn buffer_from_to(&self, range: Range<usize>) -> &[u8] {
&self.buffer[range]
}

pub fn read_block(&mut self) -> io::Result<bool> {
Expand All @@ -51,14 +52,17 @@ impl<'a> BlockReader<'a> {
Ok(true)
}

#[inline(always)]
pub fn offset(&self) -> usize {
self.offset
}

#[inline(always)]
pub fn advance(&mut self, num_bytes: usize) {
self.offset += num_bytes;
}

#[inline(always)]
pub fn buffer(&self) -> &[u8] {
&self.buffer[self.offset..]
}
Expand Down
27 changes: 14 additions & 13 deletions sstable/src/delta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ where W: io::Write
block: Vec<u8>,
write: CountingWriter<BufWriter<W>>,
value_writer: TValueWriter,
// Only here to avoid allocations.
stateless_buffer: Vec<u8>,
}

impl<W, TValueWriter> DeltaWriter<W, TValueWriter>
Expand All @@ -28,6 +30,7 @@ where
block: Vec::with_capacity(BLOCK_LEN * 2),
write: CountingWriter::wrap(BufWriter::new(wrt)),
value_writer: TValueWriter::default(),
stateless_buffer: Vec::new(),
}
}
}
Expand All @@ -42,15 +45,16 @@ where
return Ok(None);
}
let start_offset = self.write.written_bytes() as usize;
// TODO avoid buffer allocation
let mut buffer = Vec::new();
self.value_writer.serialize_block(&mut buffer);
let buffer: &mut Vec<u8> = &mut self.stateless_buffer;
self.value_writer.serialize_block(buffer);
self.value_writer.clear();
let block_len = buffer.len() + self.block.len();
self.write.write_all(&(block_len as u32).to_le_bytes())?;
self.write.write_all(&buffer[..])?;
self.write.write_all(&self.block[..])?;
let end_offset = self.write.written_bytes() as usize;
self.block.clear();
buffer.clear();
Ok(Some(start_offset..end_offset))
}

Expand Down Expand Up @@ -91,8 +95,7 @@ where

pub struct DeltaReader<'a, TValueReader> {
common_prefix_len: usize,
suffix_start: usize,
suffix_end: usize,
suffix_range: Range<usize>,
value_reader: TValueReader,
block_reader: BlockReader<'a>,
idx: usize,
Expand All @@ -105,8 +108,7 @@ where TValueReader: value::ValueReader
DeltaReader {
idx: 0,
common_prefix_len: 0,
suffix_start: 0,
suffix_end: 0,
suffix_range: 0..0,
value_reader: TValueReader::default(),
block_reader: BlockReader::new(Box::new(reader)),
}
Expand Down Expand Up @@ -148,8 +150,8 @@ where TValueReader: value::ValueReader
return false;
};
self.common_prefix_len = keep;
self.suffix_start = self.block_reader.offset();
self.suffix_end = self.suffix_start + add;
let suffix_start = self.block_reader.offset();
self.suffix_range = suffix_start..(suffix_start + add);
self.block_reader.advance(add);
true
}
Expand Down Expand Up @@ -178,8 +180,7 @@ where TValueReader: value::ValueReader

#[inline(always)]
pub fn suffix(&self) -> &[u8] {
self.block_reader
.buffer_from_to(self.suffix_start, self.suffix_end)
self.block_reader.buffer_from_to(self.suffix_range.clone())
}

#[inline(always)]
Expand All @@ -191,11 +192,11 @@ where TValueReader: value::ValueReader
#[cfg(test)]
mod tests {
use super::DeltaReader;
use crate::value::U64MonotonicReader;
use crate::value::U64MonotonicValueReader;

#[test]
fn test_empty() {
let mut delta_reader: DeltaReader<U64MonotonicReader> = DeltaReader::empty();
let mut delta_reader: DeltaReader<U64MonotonicValueReader> = DeltaReader::empty();
assert!(!delta_reader.advance().unwrap());
}
}
53 changes: 36 additions & 17 deletions sstable/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ mod block_reader;
pub use self::block_reader::BlockReader;
pub use self::delta::{DeltaReader, DeltaWriter};
pub use self::merge::VoidMerge;
use self::value::{U64MonotonicReader, U64MonotonicWriter, ValueReader, ValueWriter};
use crate::value::{RangeReader, RangeWriter};
use self::value::{U64MonotonicValueReader, U64MonotonicValueWriter, ValueReader, ValueWriter};
use crate::value::{RangeValueReader, RangeValueWriter};

pub type TermOrdinal = u64;

Expand Down Expand Up @@ -87,31 +87,47 @@ pub struct VoidSSTable;

impl SSTable for VoidSSTable {
type Value = ();
type ValueReader = value::VoidReader;
type ValueWriter = value::VoidWriter;
type ValueReader = value::VoidValueReader;
type ValueWriter = value::VoidValueWriter;
}

/// SSTable associated keys to u64
/// sorted in order.
///
/// In other words, two keys `k1` and `k2`
/// such that `k1` <= `k2`, are required to observe
/// `range_sstable[k1] <= range_sstable[k2]`.
#[allow(dead_code)]
pub struct SSTableMonotonicU64;
pub struct MonotonicU64SSTable;

impl SSTable for SSTableMonotonicU64 {
impl SSTable for MonotonicU64SSTable {
type Value = u64;

type ValueReader = U64MonotonicReader;
type ValueReader = U64MonotonicValueReader;

type ValueWriter = U64MonotonicWriter;
type ValueWriter = U64MonotonicValueWriter;
}

pub struct SSTableRange;

impl SSTable for SSTableRange {
/// SSTable associating keys to ranges.
/// The range are required to partition the
/// space.
///
/// In other words, two consecutive keys `k1` and `k2`
/// are required to observe
/// `range_sstable[k1].end == range_sstable[k2].start`.
///
/// The first range is not required to start at `0`.
pub struct RangeSSTable;

impl SSTable for RangeSSTable {
type Value = Range<u64>;

type ValueReader = RangeReader;
type ValueReader = RangeValueReader;

type ValueWriter = RangeWriter;
type ValueWriter = RangeValueWriter;
}

/// SSTable reader.
pub struct Reader<'a, TValueReader> {
key: Vec<u8>,
delta_reader: DeltaReader<'a, TValueReader>,
Expand All @@ -132,16 +148,19 @@ where TValueReader: ValueReader
Ok(true)
}

#[inline(always)]
pub fn key(&self) -> &[u8] {
&self.key
}

#[inline(always)]
pub fn value(&self) -> &TValueReader::Value {
self.delta_reader.value()
}
}

impl<'a, TValueReader> AsRef<[u8]> for Reader<'a, TValueReader> {
#[inline(always)]
fn as_ref(&self) -> &[u8] {
&self.key
}
Expand Down Expand Up @@ -281,7 +300,7 @@ mod test {
use std::io;
use std::ops::Bound;

use super::{common_prefix_len, SSTable, SSTableMonotonicU64, VoidMerge, VoidSSTable};
use super::{common_prefix_len, MonotonicU64SSTable, SSTable, VoidMerge, VoidSSTable};

fn aux_test_common_prefix_len(left: &str, right: &str, expect_len: usize) {
assert_eq!(
Expand Down Expand Up @@ -405,12 +424,12 @@ mod test {
#[test]
fn test_sstable_u64() -> io::Result<()> {
let mut buffer = Vec::new();
let mut writer = SSTableMonotonicU64::writer(&mut buffer);
let mut writer = MonotonicU64SSTable::writer(&mut buffer);
writer.insert(b"abcd", &1u64)?;
writer.insert(b"abe", &4u64)?;
writer.insert(b"gogo", &4324234234234234u64)?;
writer.finish()?;
let mut reader = SSTableMonotonicU64::reader(&buffer[..]);
let mut reader = MonotonicU64SSTable::reader(&buffer[..]);
assert!(reader.advance()?);
assert_eq!(reader.key(), b"abcd");
assert_eq!(reader.value(), &1u64);
Expand All @@ -426,7 +445,7 @@ mod test {

#[test]
fn test_sstable_empty() {
let mut sstable_range_empty = crate::SSTableRange::create_empty_reader();
let mut sstable_range_empty = crate::RangeSSTable::create_empty_reader();
assert!(!sstable_range_empty.advance().unwrap());
}

Expand Down
8 changes: 4 additions & 4 deletions sstable/src/merge/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ mod tests {
use std::collections::{BTreeMap, BTreeSet};
use std::str;

use super::super::{SSTable, SSTableMonotonicU64, VoidSSTable};
use super::super::{MonotonicU64SSTable, SSTable, VoidSSTable};
use super::{U64Merge, VoidMerge};

fn write_sstable(keys: &[&'static str]) -> Vec<u8> {
Expand All @@ -89,7 +89,7 @@ mod tests {
fn write_sstable_u64(keys: &[(&'static str, u64)]) -> Vec<u8> {
let mut buffer: Vec<u8> = vec![];
{
let mut sstable_writer = SSTableMonotonicU64::writer(&mut buffer);
let mut sstable_writer = MonotonicU64SSTable::writer(&mut buffer);
for (key, val) in keys {
assert!(sstable_writer.insert(key.as_bytes(), val).is_ok());
}
Expand Down Expand Up @@ -132,8 +132,8 @@ mod tests {
}
}
let mut w = Vec::new();
assert!(SSTableMonotonicU64::merge(sstables_ref, &mut w, U64Merge).is_ok());
let mut reader = SSTableMonotonicU64::reader(&w[..]);
assert!(MonotonicU64SSTable::merge(sstables_ref, &mut w, U64Merge).is_ok());
let mut reader = MonotonicU64SSTable::reader(&w[..]);
for (k, v) in merged {
assert!(reader.advance().unwrap());
assert_eq!(reader.key(), k.as_bytes());
Expand Down
6 changes: 3 additions & 3 deletions sstable/src/streamer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,10 @@ mod tests {

use common::OwnedBytes;

use crate::{Dictionary, SSTableMonotonicU64};
use crate::{Dictionary, MonotonicU64SSTable};

fn create_test_dictionary() -> io::Result<Dictionary<SSTableMonotonicU64>> {
let mut dict_builder = Dictionary::<SSTableMonotonicU64>::builder(Vec::new())?;
fn create_test_dictionary() -> io::Result<Dictionary<MonotonicU64SSTable>> {
let mut dict_builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new())?;
dict_builder.insert(b"abaisance", &0)?;
dict_builder.insert(b"abalation", &1)?;
dict_builder.insert(b"abalienate", &2)?;
Expand Down
15 changes: 11 additions & 4 deletions sstable/src/value/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ pub trait ValueReader: Default {
fn load(&mut self, data: &[u8]) -> io::Result<usize>;
}

/// `ValueWriter` is a trait to make it possible to write blocks
/// of value.
pub trait ValueWriter: Default {
/// Type of the value being written.
type Value;
Expand All @@ -30,12 +32,16 @@ pub trait ValueWriter: Default {
fn write(&mut self, val: &Self::Value);

/// Serializes the accumulated values into the output buffer.
fn serialize_block(&mut self, output: &mut Vec<u8>);
fn serialize_block(&self, output: &mut Vec<u8>);

/// Clears the `ValueWriter`. After a call to clear, the `ValueWriter`
/// should behave like a fresh `ValueWriter::default()`.
fn clear(&mut self);
}

pub use range::{RangeReader, RangeWriter};
pub use u64_monotonic::{U64MonotonicReader, U64MonotonicWriter};
pub use void::{VoidReader, VoidWriter};
pub use range::{RangeValueReader, RangeValueWriter};
pub use u64_monotonic::{U64MonotonicValueReader, U64MonotonicValueWriter};
pub use void::{VoidValueReader, VoidValueWriter};

fn deserialize_vint_u64(data: &mut &[u8]) -> u64 {
let (num_bytes, val) = super::vint::deserialize_read(data);
Expand Down Expand Up @@ -63,6 +69,7 @@ pub(crate) mod tests {
writer.write(value);
}
writer.serialize_block(&mut buffer);
writer.clear();
}
let data_len = buffer.len();
buffer.extend_from_slice(&b"extradata"[..]);
Expand Down
Loading

0 comments on commit dd27378

Please sign in to comment.