Skip to content

Commit

Permalink
feat(common): support generating Datum in field generator (#3827)
Browse files Browse the repository at this point in the history
* add generate_datum to generator traits

* add MIN and MAX to OrderedF32 and OrderedF64

* make OrderedFloat serializable and expose impl_rand in common configs

* implement generate_datum

* add unit tests and fix format

* fix bug

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
  • Loading branch information
wzzzzd and mergify[bot] authored Jul 13, 2022
1 parent 4b511e8 commit 0f8148d
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 19 deletions.
24 changes: 23 additions & 1 deletion src/common/src/field_generator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use serde_json::Value;
pub use timestamp::*;
pub use varchar::*;

use crate::types::DataType;
use crate::types::{DataType, Datum};

pub const DEFAULT_MIN: i16 = i16::MIN;
pub const DEFAULT_MAX: i16 = i16::MAX;
Expand All @@ -44,6 +44,8 @@ pub trait NumericFieldRandomGenerator {
Self: Sized;

fn generate(&mut self, offset: u64) -> Value;

fn generate_datum(&mut self, offset: u64) -> Datum;
}

/// fields that can be continuously generated impl this trait
Expand All @@ -53,6 +55,8 @@ pub trait NumericFieldSequenceGenerator {
Self: Sized;

fn generate(&mut self) -> Value;

fn generate_datum(&mut self) -> Datum;
}

/// the way that datagen create the field data. such as 'sequence' or 'random'.
Expand Down Expand Up @@ -173,6 +177,24 @@ impl FieldGeneratorImpl {
FieldGeneratorImpl::Timestamp(f) => f.generate(),
}
}

pub fn generate_datum(&mut self, offset: u64) -> Datum {
match self {
FieldGeneratorImpl::I16Sequence(f) => f.generate_datum(),
FieldGeneratorImpl::I32Sequence(f) => f.generate_datum(),
FieldGeneratorImpl::I64Sequence(f) => f.generate_datum(),
FieldGeneratorImpl::F32Sequence(f) => f.generate_datum(),
FieldGeneratorImpl::F64Sequence(f) => f.generate_datum(),
FieldGeneratorImpl::I16Random(f) => f.generate_datum(offset),
FieldGeneratorImpl::I32Random(f) => f.generate_datum(offset),
FieldGeneratorImpl::I64Random(f) => f.generate_datum(offset),
FieldGeneratorImpl::F32Random(f) => f.generate_datum(offset),
FieldGeneratorImpl::F64Random(f) => f.generate_datum(offset),
// TODO: add generate_datum in VarcharField and TimestampField
FieldGeneratorImpl::Varchar(_) => todo!(),
FieldGeneratorImpl::Timestamp(_) => todo!(),
}
}
}

#[cfg(test)]
Expand Down
64 changes: 50 additions & 14 deletions src/common/src/field_generator/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use serde_json::json;

use super::{DEFAULT_END, DEFAULT_MAX, DEFAULT_MIN, DEFAULT_START};
use crate::field_generator::{NumericFieldRandomGenerator, NumericFieldSequenceGenerator};
use crate::types::{Datum, OrderedF32, OrderedF64, Scalar};

trait NumericType
where
Expand All @@ -39,18 +39,14 @@ where
{
const DEFAULT_MIN: Self;
const DEFAULT_MAX: Self;
const DEFAULT_START: Self;
const DEFAULT_END: Self;
}

macro_rules! impl_numeric_type {
($({ $random_variant_name:ident, $sequence_variant_name:ident,$field_type:ty }),*) => {
$(
impl NumericType for $field_type {
const DEFAULT_MIN: $field_type = DEFAULT_MIN as $field_type;
const DEFAULT_MAX: $field_type = DEFAULT_MAX as $field_type;
const DEFAULT_START: $field_type = DEFAULT_START as $field_type;
const DEFAULT_END: $field_type = DEFAULT_END as $field_type;
const DEFAULT_MIN: $field_type = <$field_type>::MIN;
const DEFAULT_MAX: $field_type = <$field_type>::MAX;
}
)*
};
Expand All @@ -73,7 +69,7 @@ pub struct NumericFieldSequenceConcrete<T> {

impl<T> NumericFieldRandomGenerator for NumericFieldRandomConcrete<T>
where
T: NumericType,
T: NumericType + Scalar,
<T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
{
fn new(min_option: Option<String>, max_option: Option<String>, seed: u64) -> Result<Self>
Expand All @@ -89,7 +85,6 @@ where
if let Some(max_option) = max_option {
max = max_option.parse::<T>()?;
}

assert!(min < max);

Ok(Self { min, max, seed })
Expand All @@ -100,10 +95,16 @@ where
let result = rng.gen_range(self.min..=self.max);
json!(result)
}

fn generate_datum(&mut self, offset: u64) -> Datum {
let mut rng = StdRng::seed_from_u64(offset ^ self.seed);
let result = rng.gen_range(self.min..=self.max);
Some(result.to_scalar_value())
}
}
impl<T> NumericFieldSequenceGenerator for NumericFieldSequenceConcrete<T>
where
T: NumericType,
T: NumericType + Scalar,
<T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
{
fn new(
Expand All @@ -115,8 +116,8 @@ where
where
Self: Sized,
{
let mut start = T::DEFAULT_START;
let mut end = T::DEFAULT_END;
let mut start = T::zero();
let mut end = T::DEFAULT_MAX;

if let Some(star_optiont) = star_option {
start = star_optiont.parse::<T>()?;
Expand Down Expand Up @@ -146,6 +147,17 @@ where
self.cur += T::one();
json!(partition_result)
}

fn generate_datum(&mut self) -> Datum {
let partition_result =
self.start + T::from(self.offset).unwrap() + T::from(self.step).unwrap() * self.cur;
self.cur += T::one();
if partition_result > self.end {
None
} else {
Some(partition_result.to_scalar_value())
}
}
}

#[macro_export]
Expand All @@ -155,8 +167,8 @@ macro_rules! for_all_fields_variants {
{ I16RandomField,I16SequenceField,i16 },
{ I32RandomField,I32SequenceField,i32 },
{ I64RandomField,I64SequenceField,i64 },
{ F32RandomField,F32SequenceField,f32 },
{ F64RandomField,F64SequenceField,f64 }
{ F32RandomField,F32SequenceField,OrderedF32 },
{ F64RandomField,F64SequenceField,OrderedF64 }
}
};
}
Expand Down Expand Up @@ -203,4 +215,28 @@ mod tests {
assert!((5..=10).contains(&res));
}
}
#[test]
fn test_sequence_datum_generator() {
let mut f32_field =
F32SequenceField::new(Some("5.0".to_string()), Some("10.0".to_string()), 0, 1).unwrap();

for i in 5..=10 {
assert_eq!(
f32_field.generate_datum(),
Some(OrderedF32::from(i as f32).to_scalar_value())
);
}
}
#[test]
fn test_random_datum_generator() {
let mut i32_field =
I32RandomField::new(Some("-5".to_string()), Some("5".to_string()), 123).unwrap();
let (lower, upper) = ((-5).to_scalar_value(), 5.to_scalar_value());
for i in 0..100 {
let res = i32_field.generate_datum(i as u64);
assert!(res.is_some());
let res = res.unwrap();
assert!(lower <= res && res <= upper);
}
}
}
20 changes: 16 additions & 4 deletions src/common/src/types/ordered_float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ const CANONICAL_ZERO_BITS: u64 = 0x0u64;
/// s.insert(OrderedFloat(NAN));
/// assert!(s.contains(&OrderedFloat(NAN)));
/// ```
#[derive(Debug, Default, Clone, Copy)]
#[derive(Debug, Default, Clone, Copy, Serialize)]
#[repr(transparent)]
pub struct OrderedFloat<T>(pub T);

Expand All @@ -106,6 +106,16 @@ impl<T: Float> OrderedFloat<T> {
}
}

impl OrderedFloat<f32> {
pub const MAX: Self = Self(f32::MAX);
pub const MIN: Self = Self(f32::MIN);
}

impl OrderedFloat<f64> {
pub const MAX: Self = Self(f64::MAX);
pub const MIN: Self = Self(f64::MIN);
}

impl<T: Float> AsRef<T> for OrderedFloat<T> {
#[inline]
fn as_ref(&self) -> &T {
Expand Down Expand Up @@ -938,8 +948,6 @@ fn raw_double_bits<F: Float>(f: &F) -> u64 {
(man & MAN_MASK) | ((exp_u64 << 52) & EXP_MASK) | ((sign_u64 << 63) & SIGN_MASK)
}

// Currently we only introduce `rand` as a dev dependency.
#[cfg(test)]
mod impl_rand {
use rand::distributions::uniform::*;
use rand::distributions::{Distribution, Open01, OpenClosed01, Standard};
Expand Down Expand Up @@ -989,7 +997,10 @@ mod impl_rand {
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized,
{
UniformSampler::new(low, high)
UniformOrdered(UniformFloat::<$f>::new_inclusive(
low.borrow().0,
high.borrow().0,
))
}

fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
Expand Down Expand Up @@ -1155,6 +1166,7 @@ mod impl_into_ordered {
}

pub use impl_into_ordered::IntoOrdered;
use serde::Serialize;

#[cfg(test)]
mod tests {
Expand Down

0 comments on commit 0f8148d

Please sign in to comment.