Skip to content

Commit

Permalink
Minor code cleanup (quickwit-oss#1810)
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton authored and Jamie Hodkinson committed Jan 30, 2023
1 parent cfb2ec2 commit d798a1d
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 205 deletions.
2 changes: 1 addition & 1 deletion columnar/src/column/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use common::BinarySerializable;
pub use dictionary_encoded::{BytesColumn, StrColumn};
pub use serialize::{
open_column_bytes, open_column_u128, open_column_u64, serialize_column_mappable_to_u128,
serialize_column_u64,
serialize_column_mappable_to_u64,
};

use crate::column_index::ColumnIndex;
Expand Down
2 changes: 1 addition & 1 deletion columnar/src/column/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub fn serialize_column_mappable_to_u128<
Ok(())
}

pub fn serialize_column_u64<T: MonotonicallyMappableToU64>(
pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64>(
column_index: SerializableColumnIndex<'_>,
column_values: &impl ColumnValues<T>,
output: &mut impl Write,
Expand Down
9 changes: 1 addition & 8 deletions columnar/src/column_values/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub use self::column::{monotonic_map_column, ColumnValues, IterColumn, VecColumn
pub use self::monotonic_mapping::{MonotonicallyMappableToU64, StrictlyMonotonicFn};
pub use self::monotonic_mapping_u128::MonotonicallyMappableToU128;
#[cfg(test)]
pub use self::serialize::serialize_and_load;
pub use self::serialize::tests::serialize_and_load;
pub use self::serialize::{serialize_column_values, NormalizedHeader};
use crate::column_values::bitpacked::BitpackedCodec;
use crate::column_values::blockwise_linear::BlockwiseLinearCodec;
Expand Down Expand Up @@ -198,13 +198,6 @@ pub(crate) trait FastFieldCodec: 'static {
fn estimate(column: &dyn ColumnValues) -> Option<f32>;
}

/// The list of all available codecs for u64 convertible data.
pub const ALL_CODEC_TYPES: [FastFieldCodecType; 3] = [
FastFieldCodecType::Bitpacked,
FastFieldCodecType::BlockwiseLinear,
FastFieldCodecType::Linear,
];

#[cfg(all(test, feature = "unstable"))]
mod bench {
use std::sync::Arc;
Expand Down
36 changes: 22 additions & 14 deletions columnar/src/column_values/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@

use std::io;
use std::num::NonZeroU64;
use std::sync::Arc;

use common::{BinarySerializable, OwnedBytes, VInt};
use common::{BinarySerializable, VInt};
use log::warn;

use super::bitpacked::BitpackedCodec;
Expand All @@ -33,7 +32,7 @@ use super::monotonic_mapping::{
};
use super::{
monotonic_map_column, ColumnValues, FastFieldCodec, FastFieldCodecType,
MonotonicallyMappableToU64, U128FastFieldCodecType, VecColumn, ALL_CODEC_TYPES,
MonotonicallyMappableToU64, U128FastFieldCodecType,
};
use crate::column_values::compact_space::CompactSpaceCompressor;

Expand Down Expand Up @@ -248,20 +247,29 @@ pub(crate) fn serialize_given_codec(
Ok(())
}

/// Helper function to serialize a column (autodetect from all codecs) and then open it
pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default>(
column: &[T],
) -> Arc<dyn ColumnValues<T>> {
let mut buffer = Vec::new();
super::serialize_column_values(&VecColumn::from(&column), &ALL_CODEC_TYPES, &mut buffer)
.unwrap();
super::open_u64_mapped(OwnedBytes::new(buffer)).unwrap()
}

#[cfg(test)]
mod tests {
pub mod tests {
use std::sync::Arc;

use common::OwnedBytes;

use super::*;
use crate::column_values::{open_u64_mapped, VecColumn};

const ALL_CODEC_TYPES: [FastFieldCodecType; 3] = [
FastFieldCodecType::Bitpacked,
FastFieldCodecType::Linear,
FastFieldCodecType::BlockwiseLinear,
];

/// Helper function to serialize a column (autodetect from all codecs) and then open it
pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default>(
column: &[T],
) -> Arc<dyn ColumnValues<T>> {
let mut buffer = Vec::new();
serialize_column_values(&VecColumn::from(&column), &ALL_CODEC_TYPES, &mut buffer).unwrap();
open_u64_mapped(OwnedBytes::new(buffer)).unwrap()
}
#[test]
fn test_serialize_deserialize_u128_header() {
let original = U128Header {
Expand Down
130 changes: 34 additions & 96 deletions columnar/src/columnar/column_type.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use crate::utils::{place_bits, select_bits};
use crate::value::NumericalType;
use crate::InvalidData;

Expand All @@ -7,113 +6,52 @@ use crate::InvalidData;
/// - bits[0..3]: Column category type.
/// - bits[3..6]: Numerical type if necessary.
#[derive(Hash, Eq, PartialEq, Debug, Clone, Copy)]
#[repr(u8)]
pub enum ColumnType {
Bytes,
Str,
Numerical(NumericalType),
Bool,
IpAddr,
I64 = 0u8,
U64 = 1u8,
F64 = 2u8,
Bytes = 3u8,
Str = 4u8,
Bool = 5u8,
IpAddr = 6u8,
}

impl ColumnType {
/// Encoded over 6 bits.
pub(crate) fn to_code(self) -> u8 {
let column_type_category;
let numerical_type_code: u8;
match self {
ColumnType::Bytes => {
column_type_category = ColumnTypeCategory::Bytes;
numerical_type_code = 0u8;
}
ColumnType::Str => {
column_type_category = ColumnTypeCategory::Str;
numerical_type_code = 0u8;
}
ColumnType::Numerical(numerical_type) => {
column_type_category = ColumnTypeCategory::Numerical;
numerical_type_code = numerical_type.to_code();
}
ColumnType::Bool => {
column_type_category = ColumnTypeCategory::Bool;
numerical_type_code = 0u8;
}
ColumnType::IpAddr => {
column_type_category = ColumnTypeCategory::IpAddr;
numerical_type_code = 0u8;
}
impl From<NumericalType> for ColumnType {
fn from(numerical_type: NumericalType) -> Self {
match numerical_type {
NumericalType::I64 => ColumnType::I64,
NumericalType::U64 => ColumnType::U64,
NumericalType::F64 => ColumnType::F64,
}
place_bits::<0, 3>(column_type_category.to_code()) | place_bits::<3, 6>(numerical_type_code)
}
}

pub(crate) fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
if select_bits::<6, 8>(code) != 0u8 {
return Err(InvalidData);
}
let column_type_category_code = select_bits::<0, 3>(code);
let numerical_type_code = select_bits::<3, 6>(code);
let column_type_category = ColumnTypeCategory::try_from_code(column_type_category_code)?;
match column_type_category {
ColumnTypeCategory::Bool => {
if numerical_type_code != 0u8 {
return Err(InvalidData);
}
Ok(ColumnType::Bool)
}
ColumnTypeCategory::IpAddr => {
if numerical_type_code != 0u8 {
return Err(InvalidData);
}
Ok(ColumnType::IpAddr)
}
ColumnTypeCategory::Str => {
if numerical_type_code != 0u8 {
return Err(InvalidData);
}
Ok(ColumnType::Str)
}
ColumnTypeCategory::Numerical => {
let numerical_type = NumericalType::try_from_code(numerical_type_code)?;
Ok(ColumnType::Numerical(numerical_type))
}
ColumnTypeCategory::Bytes => {
if numerical_type_code != 0u8 {
return Err(InvalidData);
}
Ok(ColumnType::Bytes)
}
impl ColumnType {
pub fn numerical_type(&self) -> Option<NumericalType> {
match self {
ColumnType::I64 => Some(NumericalType::I64),
ColumnType::U64 => Some(NumericalType::U64),
ColumnType::F64 => Some(NumericalType::F64),
ColumnType::Bytes | ColumnType::Str | ColumnType::Bool | ColumnType::IpAddr => None,
}
}
}

/// Column types are grouped into different categories that
/// corresponds to the different types of `JsonValue` types.
///
/// The columnar writer will apply coercion rules to make sure that
/// at most one column exist per `ColumnTypeCategory`.
///
/// See also [README.md].
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
#[repr(u8)]
pub(crate) enum ColumnTypeCategory {
Bool = 0u8,
Str = 1u8,
Numerical = 2u8,
IpAddr = 3u8,
Bytes = 4u8,
}

impl ColumnTypeCategory {
pub fn to_code(self) -> u8 {
/// Encoded over 6 bits.
pub(crate) fn to_code(self) -> u8 {
self as u8
}

pub fn try_from_code(code: u8) -> Result<Self, InvalidData> {
pub(crate) fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
use ColumnType::*;
match code {
0u8 => Ok(Self::Bool),
1u8 => Ok(Self::Str),
2u8 => Ok(Self::Numerical),
3u8 => Ok(Self::IpAddr),
4u8 => Ok(Self::Bytes),
0u8 => Ok(I64),
1u8 => Ok(U64),
2u8 => Ok(F64),
3u8 => Ok(Bytes),
4u8 => Ok(Str),
5u8 => Ok(Bool),
6u8 => Ok(IpAddr),
_ => Err(InvalidData),
}
}
Expand All @@ -135,7 +73,7 @@ mod tests {
assert!(column_type_set.insert(column_type));
}
}
assert_eq!(column_type_set.len(), 3 + 4);
assert_eq!(column_type_set.len(), 7);
}

#[test]
Expand Down
Loading

0 comments on commit d798a1d

Please sign in to comment.