Skip to content

Commit

Permalink
fix(hash): fix hash_key_data_size (#3843)
Browse files Browse the repository at this point in the history
* fix(hash): fix hash_key_data_size

* fix(hash): support Key8 for Boolean
  • Loading branch information
Li0k authored Jul 13, 2022
1 parent d0027c6 commit 4a5503e
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 34 deletions.
50 changes: 45 additions & 5 deletions src/common/src/hash/dispatcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@

use super::HashKey;
use crate::hash;
use crate::types::{DataSize, DataType};
use crate::types::DataType;

/// An enum to help to dynamically dispatch [`HashKey`] template.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum HashKeyKind {
Key8,
Key16,
Key32,
Key64,
Expand All @@ -31,6 +32,7 @@ impl HashKeyKind {
fn order_by_key_size() -> impl IntoIterator<Item = (HashKeyKind, usize)> {
use HashKeyKind::*;
[
(Key8, 1),
(Key16, 2),
(Key32, 4),
(Key64, 8),
Expand All @@ -40,6 +42,14 @@ impl HashKeyKind {
}
}

/// Number of bytes of one element in `HashKey` serialization of [`DataType`].
pub enum HashKeySize {
/// For types with fixed size, e.g. int, float.
Fixed(usize),
/// For types with variable size, e.g. string.
Variable,
}

pub trait HashKeyDispatcher {
type Input;
type Output;
Expand All @@ -48,6 +58,7 @@ pub trait HashKeyDispatcher {

fn dispatch_by_kind(kind: HashKeyKind, input: Self::Input) -> Self::Output {
match kind {
HashKeyKind::Key8 => Self::dispatch::<hash::Key8>(input),
HashKeyKind::Key16 => Self::dispatch::<hash::Key16>(input),
HashKeyKind::Key32 => Self::dispatch::<hash::Key32>(input),
HashKeyKind::Key64 => Self::dispatch::<hash::Key64>(input),
Expand All @@ -58,6 +69,35 @@ pub trait HashKeyDispatcher {
}
}

pub fn hash_key_size(data_type: &DataType) -> HashKeySize {
use std::mem::size_of;

use crate::types::{
Decimal, IntervalUnit, NaiveDateTimeWrapper, NaiveDateWrapper, NaiveTimeWrapper,
OrderedF32, OrderedF64,
};

match data_type {
// for `Boolean` in `HashKey` use 1 FixedBytes , but in `Array` use 1 FixedBits
DataType::Boolean => HashKeySize::Fixed(size_of::<bool>()), //
DataType::Int16 => HashKeySize::Fixed(size_of::<i16>()),
DataType::Int32 => HashKeySize::Fixed(size_of::<i32>()),
DataType::Int64 => HashKeySize::Fixed(size_of::<i64>()),
DataType::Float32 => HashKeySize::Fixed(size_of::<OrderedF32>()),
DataType::Float64 => HashKeySize::Fixed(size_of::<OrderedF64>()),
DataType::Decimal => HashKeySize::Fixed(size_of::<Decimal>()),
DataType::Date => HashKeySize::Fixed(size_of::<NaiveDateWrapper>()),
DataType::Time => HashKeySize::Fixed(size_of::<NaiveTimeWrapper>()),
DataType::Timestamp => HashKeySize::Fixed(size_of::<NaiveDateTimeWrapper>()),
DataType::Timestampz => HashKeySize::Fixed(size_of::<NaiveDateTimeWrapper>()),
DataType::Interval => HashKeySize::Fixed(size_of::<IntervalUnit>()),

DataType::Varchar => HashKeySize::Variable,
DataType::Struct { .. } => HashKeySize::Variable,
DataType::List { .. } => HashKeySize::Variable,
}
}

pub const MAX_FIXED_SIZE_KEY_ELEMENTS: usize = 8;
/// Calculate what kind of hash key should be used given the key data types.
///
Expand All @@ -75,11 +115,11 @@ pub fn calc_hash_key_kind(data_types: &[DataType]) -> HashKeyKind {

let mut total_data_size: usize = 0;
for data_type in data_types {
match data_type.data_size() {
DataSize::Fixed(size) => {
match hash_key_size(data_type) {
HashKeySize::Fixed(size) => {
total_data_size += size;
}
DataSize::Variable => {
HashKeySize::Variable => {
return HashKeyKind::KeySerialized;
}
}
Expand Down Expand Up @@ -128,7 +168,7 @@ mod tests {

#[test]
fn test_calc_hash_key_kind() {
compare_key_kinds(&[0], HashKeyKind::KeySerialized);
compare_key_kinds(&[0], HashKeyKind::Key8);
compare_key_kinds(&[1], HashKeyKind::Key16);
compare_key_kinds(&[2], HashKeyKind::Key32);
compare_key_kinds(&[3], HashKeyKind::Key64);
Expand Down
1 change: 1 addition & 0 deletions src/common/src/hash/key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ impl BuildHasher for PrecomputedBuildHasher {
}
}

pub type Key8 = FixedSizeKey<1>;
pub type Key16 = FixedSizeKey<2>;
pub type Key32 = FixedSizeKey<4>;
pub type Key64 = FixedSizeKey<8>;
Expand Down
29 changes: 0 additions & 29 deletions src/common/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,6 @@ pub enum DataType {
const DECIMAL_DEFAULT_PRECISION: u32 = 20;
const DECIMAL_DEFAULT_SCALE: u32 = 6;

/// Number of bytes of one element in array of [`DataType`].
pub enum DataSize {
/// For types with fixed size, e.g. int, float.
Fixed(usize),
/// For types with variable size, e.g. string.
Variable,
}

impl From<&ProstDataType> for DataType {
fn from(proto: &ProstDataType) -> DataType {
match proto.get_type_name().expect("missing type field") {
Expand Down Expand Up @@ -221,27 +213,6 @@ impl DataType {
}
}

pub fn data_size(&self) -> DataSize {
use std::mem::size_of;
match self {
DataType::Boolean => DataSize::Variable,
DataType::Int16 => DataSize::Fixed(size_of::<i16>()),
DataType::Int32 => DataSize::Fixed(size_of::<i32>()),
DataType::Int64 => DataSize::Fixed(size_of::<i64>()),
DataType::Float32 => DataSize::Fixed(size_of::<OrderedF32>()),
DataType::Float64 => DataSize::Fixed(size_of::<OrderedF64>()),
DataType::Decimal => DataSize::Fixed(size_of::<Decimal>()),
DataType::Varchar => DataSize::Variable,
DataType::Date => DataSize::Fixed(size_of::<NaiveDateWrapper>()),
DataType::Time => DataSize::Fixed(size_of::<NaiveTimeWrapper>()),
DataType::Timestamp => DataSize::Fixed(size_of::<NaiveDateTimeWrapper>()),
DataType::Timestampz => DataSize::Fixed(size_of::<NaiveDateTimeWrapper>()),
DataType::Interval => DataSize::Variable,
DataType::Struct { .. } => DataSize::Variable,
DataType::List { .. } => DataSize::Variable,
}
}

pub fn is_numeric(&self) -> bool {
matches!(
self,
Expand Down

0 comments on commit 4a5503e

Please sign in to comment.