From cfb2ec2b36d3610080c860dd3b6443e2db75eae0 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 19 Jan 2023 16:55:01 +0900 Subject: [PATCH] Add support for multivalues (#1809) --- columnar/src/column/dictionary_encoded.rs | 26 +++++++------ columnar/src/column/mod.rs | 32 +++++++++------ columnar/src/column/serialize.rs | 15 +++++-- columnar/src/column_index/mod.rs | 27 +++++++++---- .../src/column_index/multivalued_index.rs | 8 ++-- columnar/src/column_index/serialize.rs | 9 +++-- columnar/src/column_values/mod.rs | 19 ++------- columnar/src/column_values/serialize.rs | 23 ----------- columnar/src/columnar/merge.rs | 6 +-- columnar/src/columnar/mod.rs | 1 + columnar/src/columnar/writer/mod.rs | 26 ++++++------- columnar/src/columnar/writer/serializer.rs | 1 - columnar/src/columnar/writer/value_index.rs | 16 ++------ columnar/src/lib.rs | 4 +- columnar/src/tests.rs | 39 +++++++++++++++---- 15 files changed, 131 insertions(+), 121 deletions(-) diff --git a/columnar/src/column/dictionary_encoded.rs b/columnar/src/column/dictionary_encoded.rs index 8020e43143..b650ba6986 100644 --- a/columnar/src/column/dictionary_encoded.rs +++ b/columnar/src/column/dictionary_encoded.rs @@ -8,6 +8,13 @@ use crate::column::Column; use crate::RowId; /// Dictionary encoded column. +/// +/// The column simply gives access to a regular u64-column that, in +/// which the values are term-ordinals. +/// +/// These ordinals are ids uniquely identify the bytes that are stored in +/// the column. These ordinals are small, and sorted in the same order +/// as the term_ord_column. #[derive(Clone)] pub struct BytesColumn { pub(crate) dictionary: Arc>, @@ -15,17 +22,21 @@ pub struct BytesColumn { } impl BytesColumn { + /// Fills the given `output` buffer with the term associated to the ordinal `ord`. + /// /// Returns `false` if the term does not exist (e.g. `term_ord` is greater or equal to the /// overll number of terms). - pub fn ord_to_bytes(&self, term_ord: u64, output: &mut Vec) -> io::Result { - self.dictionary.ord_to_term(term_ord, output) + pub fn ord_to_bytes(&self, ord: u64, output: &mut Vec) -> io::Result { + self.dictionary.ord_to_term(ord, output) } + /// Returns the number of rows in the column. pub fn num_rows(&self) -> RowId { self.term_ord_column.num_rows() } - pub fn term_ords(&self) -> &Column { + /// Returns the column of ordinals + pub fn ords(&self) -> &Column { &self.term_ord_column } } @@ -40,6 +51,7 @@ impl From for StrColumn { } impl StrColumn { + /// Fills the buffer pub fn ord_to_str(&self, term_ord: u64, output: &mut String) -> io::Result { unsafe { let buf = output.as_mut_vec(); @@ -55,14 +67,6 @@ impl StrColumn { } Ok(true) } - - pub fn num_rows(&self) -> RowId { - self.term_ord_column.num_rows() - } - - pub fn ordinal_dictionary(&self) -> &Column { - &self.0.term_ord_column - } } impl Deref for StrColumn { diff --git a/columnar/src/column/mod.rs b/columnar/src/column/mod.rs index 37087e9648..60ecd3915b 100644 --- a/columnar/src/column/mod.rs +++ b/columnar/src/column/mod.rs @@ -5,11 +5,11 @@ use std::ops::Deref; use std::sync::Arc; use common::BinarySerializable; +pub use dictionary_encoded::{BytesColumn, StrColumn}; pub use serialize::{ - open_column_bytes, open_column_u128, open_column_u64, serialize_column_u128, + open_column_bytes, open_column_u128, open_column_u64, serialize_column_mappable_to_u128, serialize_column_u64, }; -pub use dictionary_encoded::{BytesColumn, StrColumn}; use crate::column_index::ColumnIndex; use crate::column_values::ColumnValues; @@ -21,23 +21,31 @@ pub struct Column { pub values: Arc>, } -use crate::column_index::Set; - impl Column { - pub fn first(&self, row_id: RowId) -> Option { + pub fn num_rows(&self) -> RowId { match &self.idx { - ColumnIndex::Full => Some(self.values.get_val(row_id)), - ColumnIndex::Optional(opt_idx) => { - let value_row_idx = opt_idx.rank_if_exists(row_id)?; - Some(self.values.get_val(value_row_idx)) - } - ColumnIndex::Multivalued(_multivalued_index) => { - todo!(); + ColumnIndex::Full => self.values.num_vals() as u32, + ColumnIndex::Optional(optional_index) => optional_index.num_rows(), + ColumnIndex::Multivalued(col_index) => { + // The multivalued index contains all value start row_id, + // and one extra value at the end with the overall number of rows. + col_index.num_vals() - 1 } } } } +impl Column { + pub fn first(&self, row_id: RowId) -> Option { + self.values(row_id).next() + } + + pub fn values(&self, row_id: RowId) -> impl Iterator + '_ { + self.value_row_ids(row_id) + .map(|value_row_id: RowId| self.values.get_val(value_row_id)) + } +} + impl Deref for Column { type Target = ColumnIndex<'static>; diff --git a/columnar/src/column/serialize.rs b/columnar/src/column/serialize.rs index 524fe6fb0e..d834b5d280 100644 --- a/columnar/src/column/serialize.rs +++ b/columnar/src/column/serialize.rs @@ -9,11 +9,11 @@ use crate::column::{BytesColumn, Column}; use crate::column_index::{serialize_column_index, SerializableColumnIndex}; use crate::column_values::serialize::serialize_column_values_u128; use crate::column_values::{ - serialize_column_values, ColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64, - ALL_CODEC_TYPES, + serialize_column_values, ColumnValues, FastFieldCodecType, MonotonicallyMappableToU128, + MonotonicallyMappableToU64, }; -pub fn serialize_column_u128< +pub fn serialize_column_mappable_to_u128< F: Fn() -> I, I: Iterator, T: MonotonicallyMappableToU128, @@ -39,7 +39,14 @@ pub fn serialize_column_u64( output: &mut impl Write, ) -> io::Result<()> { let column_index_num_bytes = serialize_column_index(column_index, output)?; - serialize_column_values(column_values, &ALL_CODEC_TYPES[..], output)?; + serialize_column_values( + column_values, + &[ + FastFieldCodecType::Bitpacked, + FastFieldCodecType::BlockwiseLinear, + ], + output, + )?; output.write_all(&column_index_num_bytes.to_le_bytes())?; Ok(()) } diff --git a/columnar/src/column_index/mod.rs b/columnar/src/column_index/mod.rs index 64a7092e7a..748361bc17 100644 --- a/columnar/src/column_index/mod.rs +++ b/columnar/src/column_index/mod.rs @@ -2,6 +2,7 @@ mod multivalued_index; mod optional_index; mod serialize; +use std::ops::Range; use std::sync::Arc; pub use optional_index::{OptionalIndex, SerializableOptionalIndex, Set}; @@ -14,8 +15,12 @@ use crate::{Cardinality, RowId}; pub enum ColumnIndex<'a> { Full, Optional(OptionalIndex), - // TODO remove the Arc apart from serialization this is not - // dynamic at all. + // TODO Remove the static by fixing the codec if possible. + /// The column values enclosed contains for all row_id, + /// the value start_index. + /// + /// In addition, at index num_rows, an extra value is added + /// containing the overal number of values. Multivalued(Arc + 'a>), } @@ -28,13 +33,21 @@ impl<'a> ColumnIndex<'a> { } } - pub fn num_rows(&self) -> RowId { + pub fn value_row_ids(&self, row_id: RowId) -> Range { match self { - ColumnIndex::Full => { - todo!() + ColumnIndex::Full => row_id..row_id + 1, + ColumnIndex::Optional(optional_index) => { + if let Some(val) = optional_index.rank_if_exists(row_id) { + val..val + 1 + } else { + 0..0 + } + } + ColumnIndex::Multivalued(multivalued_index) => { + let start = multivalued_index.get_val(row_id); + let end = multivalued_index.get_val(row_id + 1); + start..end } - ColumnIndex::Optional(optional_index) => optional_index.num_rows(), - ColumnIndex::Multivalued(multivalued_index) => multivalued_index.num_vals() - 1, } } } diff --git a/columnar/src/column_index/multivalued_index.rs b/columnar/src/column_index/multivalued_index.rs index de5c5bb589..9d4dd4143c 100644 --- a/columnar/src/column_index/multivalued_index.rs +++ b/columnar/src/column_index/multivalued_index.rs @@ -11,11 +11,11 @@ use crate::RowId; pub struct MultivaluedIndex(Arc>); pub fn serialize_multivalued_index( - multivalued_index: MultivaluedIndex, + multivalued_index: &dyn ColumnValues, output: &mut impl Write, ) -> io::Result<()> { crate::column_values::serialize_column_values( - &*multivalued_index.0, + &*multivalued_index, &[FastFieldCodecType::Bitpacked, FastFieldCodecType::Linear], output, )?; @@ -23,5 +23,7 @@ pub fn serialize_multivalued_index( } pub fn open_multivalued_index(bytes: OwnedBytes) -> io::Result>> { - todo!(); + let start_index_column: Arc> = + crate::column_values::open_u64_mapped(bytes)?; + Ok(start_index_column) } diff --git a/columnar/src/column_index/serialize.rs b/columnar/src/column_index/serialize.rs index 7a4b045395..62ec6eff17 100644 --- a/columnar/src/column_index/serialize.rs +++ b/columnar/src/column_index/serialize.rs @@ -3,17 +3,18 @@ use std::io::Write; use common::{CountingWriter, OwnedBytes}; -use crate::column_index::multivalued_index::{serialize_multivalued_index, MultivaluedIndex}; +use crate::column_index::multivalued_index::serialize_multivalued_index; use crate::column_index::optional_index::serialize_optional_index; use crate::column_index::{ColumnIndex, SerializableOptionalIndex}; -use crate::Cardinality; +use crate::column_values::ColumnValues; +use crate::{Cardinality, RowId}; pub enum SerializableColumnIndex<'a> { Full, Optional(Box + 'a>), // TODO remove the Arc apart from serialization this is not // dynamic at all. - Multivalued(MultivaluedIndex), + Multivalued(Box + 'a>), } impl<'a> SerializableColumnIndex<'a> { @@ -39,7 +40,7 @@ pub fn serialize_column_index( serialize_optional_index(&*optional_index, &mut output)? } SerializableColumnIndex::Multivalued(multivalued_index) => { - serialize_multivalued_index(multivalued_index, &mut output)? + serialize_multivalued_index(&*multivalued_index, &mut output)? } } let column_index_num_bytes = output.written_bytes() as u32; diff --git a/columnar/src/column_values/mod.rs b/columnar/src/column_values/mod.rs index b0338c40c4..090c4d22ad 100644 --- a/columnar/src/column_values/mod.rs +++ b/columnar/src/column_values/mod.rs @@ -38,7 +38,9 @@ pub mod serialize; pub use self::column::{monotonic_map_column, ColumnValues, IterColumn, VecColumn}; pub use self::monotonic_mapping::{MonotonicallyMappableToU64, StrictlyMonotonicFn}; pub use self::monotonic_mapping_u128::MonotonicallyMappableToU128; -pub use self::serialize::{serialize_and_load, serialize_column_values, NormalizedHeader}; +#[cfg(test)] +pub use self::serialize::serialize_and_load; +pub use self::serialize::{serialize_column_values, NormalizedHeader}; use crate::column_values::bitpacked::BitpackedCodec; use crate::column_values::blockwise_linear::BlockwiseLinearCodec; use crate::column_values::linear::LinearCodec; @@ -121,21 +123,6 @@ impl U128FastFieldCodecType { } } -/// Returns the correct codec reader wrapped in the `Arc` for the data. -// pub fn open_u128( -// bytes: OwnedBytes, -// ) -> io::Result>> { -// todo!(); -// // let (bytes, _format_version) = read_format_version(bytes)?; -// // let (mut bytes, _null_index_footer) = read_null_index_footer(bytes)?; -// // let header = U128Header::deserialize(&mut bytes)?; -// // assert_eq!(header.codec_type, U128FastFieldCodecType::CompactSpace); -// // let reader = CompactSpaceDecompressor::open(bytes)?; -// // let inverted: StrictlyMonotonicMappingInverter> = -// // StrictlyMonotonicMappingToInternal::::new().into(); -// // Ok(Arc::new(monotonic_map_column(reader, inverted))) -// } - /// Returns the correct codec reader wrapped in the `Arc` for the data. pub fn open_u128_mapped( mut bytes: OwnedBytes, diff --git a/columnar/src/column_values/serialize.rs b/columnar/src/column_values/serialize.rs index d086e08f32..2952ab6406 100644 --- a/columnar/src/column_values/serialize.rs +++ b/columnar/src/column_values/serialize.rs @@ -161,28 +161,6 @@ impl BinarySerializable for Header { } } -/// Return estimated compression for given codec in the value range [0.0..1.0], where 1.0 means no -/// compression. -pub(crate) fn estimate( - typed_column: impl ColumnValues, - codec_type: FastFieldCodecType, -) -> Option { - let column = monotonic_map_column(typed_column, StrictlyMonotonicMappingToInternal::::new()); - let min_value = column.min_value(); - let gcd = super::gcd::find_gcd(column.iter().map(|val| val - min_value)) - .filter(|gcd| gcd.get() > 1u64); - let mapping = StrictlyMonotonicMappingToInternalGCDBaseval::new( - gcd.map(|gcd| gcd.get()).unwrap_or(1u64), - min_value, - ); - let normalized_column = monotonic_map_column(&column, mapping); - match codec_type { - FastFieldCodecType::Bitpacked => BitpackedCodec::estimate(&normalized_column), - FastFieldCodecType::Linear => LinearCodec::estimate(&normalized_column), - FastFieldCodecType::BlockwiseLinear => BlockwiseLinearCodec::estimate(&normalized_column), - } -} - /// Serializes u128 values with the compact space codec. pub fn serialize_column_values_u128 I, I: Iterator>( iter_gen: F, @@ -194,7 +172,6 @@ pub fn serialize_column_values_u128 I, I: Iterator>( codec_type: U128FastFieldCodecType::CompactSpace, }; header.serialize(output)?; - let compressor = CompactSpaceCompressor::train_from(iter_gen(), num_vals); compressor.compress_into(iter_gen(), output)?; diff --git a/columnar/src/columnar/merge.rs b/columnar/src/columnar/merge.rs index ba32fad748..63e242e1b4 100644 --- a/columnar/src/columnar/merge.rs +++ b/columnar/src/columnar/merge.rs @@ -15,10 +15,10 @@ pub enum MergeDocOrder { Complex(()), } -pub fn merge( - columnar_readers: &[ColumnarReader], +pub fn merge_columnar( + _columnar_readers: &[ColumnarReader], mapping: MergeDocOrder, - output: &mut impl io::Write, + _output: &mut impl io::Write, ) -> io::Result<()> { match mapping { MergeDocOrder::Stack => { diff --git a/columnar/src/columnar/mod.rs b/columnar/src/columnar/mod.rs index 4c44b8063e..d5caf6778d 100644 --- a/columnar/src/columnar/mod.rs +++ b/columnar/src/columnar/mod.rs @@ -5,5 +5,6 @@ mod reader; mod writer; pub use column_type::ColumnType; +pub use merge::{merge_columnar, MergeDocOrder}; pub use reader::ColumnarReader; pub use writer::ColumnarWriter; diff --git a/columnar/src/columnar/writer/mod.rs b/columnar/src/columnar/writer/mod.rs index b1cdb92be7..e4b77b1bc4 100644 --- a/columnar/src/columnar/writer/mod.rs +++ b/columnar/src/columnar/writer/mod.rs @@ -22,7 +22,7 @@ use crate::columnar::writer::column_writers::{ use crate::columnar::writer::value_index::{IndexBuilder, PreallocatedIndexBuilders}; use crate::dictionary::{DictionaryBuilder, TermIdMapping, UnorderedId}; use crate::value::{Coerce, NumericalType, NumericalValue}; -use crate::{column, Cardinality, RowId}; +use crate::{Cardinality, RowId}; /// This is a set of buffers that are used to temporarily write the values into before passing them /// to the fast field codecs. @@ -310,7 +310,7 @@ fn serialize_bytes_or_str_column( ColumnOperation::NewDoc(doc) => ColumnOperation::NewDoc(doc), } }); - serialize_column( + serialize_column_mappable_to_u64( operation_iterator, cardinality, num_docs, @@ -339,7 +339,7 @@ fn serialize_numerical_column( } = buffers; match numerical_type { NumericalType::I64 => { - serialize_column( + serialize_column_mappable_to_u64( coerce_numerical_symbol::(op_iterator), cardinality, num_docs, @@ -349,7 +349,7 @@ fn serialize_numerical_column( )?; } NumericalType::U64 => { - serialize_column( + serialize_column_mappable_to_u64( coerce_numerical_symbol::(op_iterator), cardinality, num_docs, @@ -359,7 +359,7 @@ fn serialize_numerical_column( )?; } NumericalType::F64 => { - serialize_column( + serialize_column_mappable_to_u64( coerce_numerical_symbol::(op_iterator), cardinality, num_docs, @@ -384,7 +384,7 @@ fn serialize_bool_column( bool_values, .. } = buffers; - serialize_column( + serialize_column_mappable_to_u64( column_operations_it, cardinality, num_docs, @@ -451,12 +451,11 @@ where Cardinality::Multivalued => { let multivalued_index_builder = value_index_builders.borrow_multivalued_index_builder(); consume_operation_iterator(op_iterator, multivalued_index_builder, values); - let _multivalued_index = multivalued_index_builder.finish(num_docs); - todo!(); - // SerializableColumnIndex::Multivalued(Box::new(multivalued_index)) + let multivalued_index = multivalued_index_builder.finish(num_docs); + SerializableColumnIndex::Multivalued(Box::new(multivalued_index)) } }; - crate::column::serialize_column_u128( + crate::column::serialize_column_mappable_to_u128( serializable_column_index, || values.iter().cloned(), values.len() as u32, @@ -465,7 +464,7 @@ where Ok(()) } -fn serialize_column< +fn serialize_column_mappable_to_u64< T: Copy + Default + std::fmt::Debug + Send + Sync + MonotonicallyMappableToU64 + PartialOrd, >( op_iterator: impl Iterator>, @@ -497,9 +496,8 @@ where Cardinality::Multivalued => { let multivalued_index_builder = value_index_builders.borrow_multivalued_index_builder(); consume_operation_iterator(op_iterator, multivalued_index_builder, values); - let _multivalued_index = multivalued_index_builder.finish(num_docs); - todo!(); - // SerializableColumnIndex::Multivalued(Box::new(multivalued_index)) + let multivalued_index = multivalued_index_builder.finish(num_docs); + SerializableColumnIndex::Multivalued(Box::new(multivalued_index)) } }; crate::column::serialize_column_u64( diff --git a/columnar/src/columnar/writer/serializer.rs b/columnar/src/columnar/writer/serializer.rs index 959d3850f8..47364c3fd2 100644 --- a/columnar/src/columnar/writer/serializer.rs +++ b/columnar/src/columnar/writer/serializer.rs @@ -5,7 +5,6 @@ use common::CountingWriter; use sstable::value::RangeValueWriter; use sstable::RangeSSTable; -use crate::column; use crate::columnar::ColumnType; pub struct ColumnarSerializer { diff --git a/columnar/src/columnar/writer/value_index.rs b/columnar/src/columnar/writer/value_index.rs index 71f577b3da..6d2c8de205 100644 --- a/columnar/src/columnar/writer/value_index.rs +++ b/columnar/src/columnar/writer/value_index.rs @@ -45,16 +45,6 @@ impl<'a> SerializableOptionalIndex<'a> for SingleValueArrayIndex<'a> { } } -impl OptionalIndexBuilder { - fn num_non_nulls(&self) -> u32 { - self.docs.len() as u32 - } - - fn iter(&self) -> Box + '_> { - Box::new(self.docs.iter().copied()) - } -} - impl OptionalIndexBuilder { pub fn finish<'a>(&'a mut self, num_rows: RowId) -> impl SerializableOptionalIndex + 'a { debug_assert!(self @@ -96,7 +86,7 @@ pub struct MultivaluedIndexBuilder { impl MultivaluedIndexBuilder { pub fn finish(&mut self, num_docs: RowId) -> impl ColumnValues + '_ { self.start_offsets - .resize(num_docs as usize, self.total_num_vals_seen); + .resize(num_docs as usize + 1, self.total_num_vals_seen); VecColumn { values: &&self.start_offsets[..], min_value: 0, @@ -188,7 +178,7 @@ mod tests { .finish(4u32) .iter() .collect::>(), - vec![0, 0, 2, 3] + vec![0, 0, 2, 3, 3] ); multivalued_value_index_builder.reset(); multivalued_value_index_builder.record_row(2u32); @@ -199,7 +189,7 @@ mod tests { .finish(4u32) .iter() .collect::>(), - vec![0, 0, 0, 2] + vec![0, 0, 0, 2, 2] ); } } diff --git a/columnar/src/lib.rs b/columnar/src/lib.rs index 0c37a025a1..a1c13116b2 100644 --- a/columnar/src/lib.rs +++ b/columnar/src/lib.rs @@ -18,10 +18,10 @@ mod dynamic_column; pub(crate) mod utils; mod value; -pub use columnar::{ColumnarReader, ColumnarWriter}; +pub use columnar::{merge_columnar, ColumnarReader, ColumnarWriter, MergeDocOrder}; pub use value::{NumericalType, NumericalValue}; -// pub use self::dynamic_column::DynamicColumnHandle; +pub use self::dynamic_column::{DynamicColumn, DynamicColumnHandle}; pub type RowId = u32; diff --git a/columnar/src/tests.rs b/columnar/src/tests.rs index 4ae785d05d..b014dd5b0e 100644 --- a/columnar/src/tests.rs +++ b/columnar/src/tests.rs @@ -1,10 +1,10 @@ -use std::net::{IpAddr, Ipv6Addr}; +use std::net::Ipv6Addr; use crate::column_values::MonotonicallyMappableToU128; use crate::columnar::ColumnType; use crate::dynamic_column::{DynamicColumn, DynamicColumnHandle}; use crate::value::NumericalValue; -use crate::{Cardinality, ColumnarReader, ColumnarWriter, RowId}; +use crate::{Cardinality, ColumnarReader, ColumnarWriter}; #[test] fn test_dataframe_writer_str() { @@ -53,6 +53,31 @@ fn test_dataframe_writer_bool() { assert_eq!(&vals, &[None, Some(false), None, Some(true), None,]); } +#[test] +fn test_dataframe_writer_u64_multivalued() { + let mut dataframe_writer = ColumnarWriter::default(); + dataframe_writer.record_numerical(2u32, "divisor", 2u64); + dataframe_writer.record_numerical(3u32, "divisor", 3u64); + dataframe_writer.record_numerical(4u32, "divisor", 2u64); + dataframe_writer.record_numerical(5u32, "divisor", 5u64); + dataframe_writer.record_numerical(6u32, "divisor", 2u64); + dataframe_writer.record_numerical(6u32, "divisor", 3u64); + let mut buffer: Vec = Vec::new(); + dataframe_writer.serialize(7, &mut buffer).unwrap(); + let columnar = ColumnarReader::open(buffer).unwrap(); + assert_eq!(columnar.num_columns(), 1); + let cols: Vec = columnar.read_columns("divisor").unwrap(); + assert_eq!(cols.len(), 1); + assert_eq!(cols[0].num_bytes(), 43); + let dyn_i64_col = cols[0].open().unwrap(); + let DynamicColumn::I64(divisor_col) = dyn_i64_col else { panic!(); }; + assert_eq!( + divisor_col.get_cardinality(), + crate::Cardinality::Multivalued + ); + assert_eq!(divisor_col.num_rows(), 7); +} + #[test] fn test_dataframe_writer_ip_addr() { let mut dataframe_writer = ColumnarWriter::default(); @@ -125,13 +150,11 @@ fn test_dictionary_encoded_str() { let col_handles = columnar_reader.read_columns("my.column").unwrap(); assert_eq!(col_handles.len(), 1); let DynamicColumn::Str(str_col) = col_handles[0].open().unwrap() else { panic!(); }; - let index: Vec> = (0..5) - .map(|row_id| str_col.term_ords().first(row_id)) - .collect(); + let index: Vec> = (0..5).map(|row_id| str_col.ords().first(row_id)).collect(); assert_eq!(index, &[None, Some(0), None, Some(2), Some(1)]); assert_eq!(str_col.num_rows(), 5); let mut term_buffer = String::new(); - let term_ords = str_col.ordinal_dictionary(); + let term_ords = str_col.ords(); assert_eq!(term_ords.first(0), None); assert_eq!(term_ords.first(1), Some(0)); str_col.ord_to_str(0u64, &mut term_buffer).unwrap(); @@ -160,12 +183,12 @@ fn test_dictionary_encoded_bytes() { assert_eq!(col_handles.len(), 1); let DynamicColumn::Bytes(bytes_col) = col_handles[0].open().unwrap() else { panic!(); }; let index: Vec> = (0..5) - .map(|row_id| bytes_col.term_ords().first(row_id)) + .map(|row_id| bytes_col.ords().first(row_id)) .collect(); assert_eq!(index, &[None, Some(0), None, Some(2), Some(1)]); assert_eq!(bytes_col.num_rows(), 5); let mut term_buffer = Vec::new(); - let term_ords = bytes_col.term_ords(); + let term_ords = bytes_col.ords(); assert_eq!(term_ords.first(0), None); assert_eq!(term_ords.first(1), Some(0)); bytes_col