Skip to content

Commit

Permalink
feat(fuzz): add insert target (#3499)
Browse files Browse the repository at this point in the history
* fix(common-time): allow building nanos timestamp from parts split from i64::MIN

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat(fuzz): add insert target

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: cleanup cargo.toml and polish comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
  • Loading branch information
zhongzc authored Mar 13, 2024
1 parent fb4da05 commit b55905c
Show file tree
Hide file tree
Showing 8 changed files with 360 additions and 46 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 8 additions & 7 deletions tests-fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ common-macro = { workspace = true }
common-query = { workspace = true }
common-runtime = { workspace = true }
common-telemetry = { workspace = true }
common-time = { workspace = true }
datatypes = { workspace = true }
derive_builder = { workspace = true }
dotenv = "0.15"
Expand All @@ -39,13 +40,6 @@ sqlx = { version = "0.6", features = [
] }

[dev-dependencies]
dotenv = "0.15"
sqlx = { version = "0.6", features = [
"runtime-tokio-rustls",
"mysql",
"postgres",
"chrono",
] }
tokio = { workspace = true }

[[bin]]
Expand All @@ -54,3 +48,10 @@ path = "targets/fuzz_create_table.rs"
test = false
bench = false
doc = false

[[bin]]
name = "fuzz_insert"
path = "targets/fuzz_insert.rs"
test = false
bench = false
doc = false
61 changes: 48 additions & 13 deletions tests-fuzz/src/generator/insert_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use std::marker::PhantomData;

use datatypes::value::Value;
use derive_builder::Builder;
use rand::seq::SliceRandom;
use rand::Rng;
Expand All @@ -22,7 +23,7 @@ use crate::context::TableContextRef;
use crate::error::{Error, Result};
use crate::fake::WordGenerator;
use crate::generator::{Generator, Random};
use crate::ir::insert_expr::InsertIntoExpr;
use crate::ir::insert_expr::{InsertIntoExpr, RowValue};
use crate::ir::{generate_random_value, Ident};

/// Generates [InsertIntoExpr].
Expand All @@ -41,30 +42,64 @@ pub struct InsertExprGenerator<R: Rng + 'static> {
impl<R: Rng + 'static> Generator<InsertIntoExpr, R> for InsertExprGenerator<R> {
type Error = Error;

/// Generates the [CreateTableExpr].
/// Generates the [InsertIntoExpr].
fn generate(&self, rng: &mut R) -> Result<InsertIntoExpr> {
let mut columns = self.table_ctx.columns.clone();
columns.shuffle(rng);
// Whether to omit all columns, i.e. INSERT INTO table_name VALUES (...)
let omit_column_list = rng.gen_bool(0.2);

let mut rows = Vec::with_capacity(self.rows);
let mut values_columns = vec![];
if omit_column_list {
// If omit column list, then all columns are required in the values list
values_columns = self.table_ctx.columns.clone();
} else {
for column in &self.table_ctx.columns {
let can_omit = column.is_nullable() || column.has_default_value();

// 50% chance to omit a column if it's not required
if !can_omit || rng.gen_bool(0.5) {
values_columns.push(column.clone());
}
}
values_columns.shuffle(rng);

// If all columns are omitted, pick a random column
if values_columns.is_empty() {
values_columns.push(self.table_ctx.columns.choose(rng).unwrap().clone());
}
}

let mut values_list = Vec::with_capacity(self.rows);
for _ in 0..self.rows {
let mut row = Vec::with_capacity(columns.len());
for column in &columns {
// TODO(weny): generates the special cases
row.push(generate_random_value(
let mut row = Vec::with_capacity(values_columns.len());
for column in &values_columns {
if column.is_nullable() && rng.gen_bool(0.2) {
row.push(RowValue::Value(Value::Null));
continue;
}

if column.has_default_value() && rng.gen_bool(0.2) {
row.push(RowValue::Default);
continue;
}

row.push(RowValue::Value(generate_random_value(
rng,
&column.column_type,
Some(self.word_generator.as_ref()),
));
)));
}

rows.push(row);
values_list.push(row);
}

Ok(InsertIntoExpr {
table_name: self.table_ctx.name.to_string(),
columns,
rows,
columns: if omit_column_list {
vec![]
} else {
values_columns
},
values_list,
})
}
}
75 changes: 72 additions & 3 deletions tests-fuzz/src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@ pub(crate) mod select_expr;
use core::fmt;

pub use alter_expr::AlterTableExpr;
use common_time::{Date, DateTime, Timestamp};
pub use create_expr::CreateTableExpr;
use datatypes::data_type::ConcreteDataType;
use datatypes::types::TimestampType;
use datatypes::value::Value;
use derive_builder::Builder;
pub use insert_expr::InsertIntoExpr;
use lazy_static::lazy_static;
use rand::seq::SliceRandom;
use rand::Rng;
Expand Down Expand Up @@ -91,14 +94,62 @@ pub fn generate_random_value<R: Rng>(
Some(random) => Value::from(random.gen(rng).value),
None => Value::from(rng.gen::<char>().to_string()),
},
ConcreteDataType::Date(_) => Value::from(rng.gen::<i32>()),
ConcreteDataType::DateTime(_) => Value::from(rng.gen::<i64>()),
&ConcreteDataType::Timestamp(_) => Value::from(rng.gen::<u64>()),
ConcreteDataType::Date(_) => generate_random_date(rng),
ConcreteDataType::DateTime(_) => generate_random_datetime(rng),
&ConcreteDataType::Timestamp(ts_type) => generate_random_timestamp(rng, ts_type),

_ => unimplemented!("unsupported type: {datatype}"),
}
}

fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
let v = match ts_type {
TimestampType::Second(_) => {
let min = i64::from(Timestamp::MIN_SECOND);
let max = i64::from(Timestamp::MAX_SECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_second(value)
}
TimestampType::Millisecond(_) => {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_millisecond(value)
}
TimestampType::Microsecond(_) => {
let min = i64::from(Timestamp::MIN_MICROSECOND);
let max = i64::from(Timestamp::MAX_MICROSECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_microsecond(value)
}
TimestampType::Nanosecond(_) => {
let min = i64::from(Timestamp::MIN_NANOSECOND);
let max = i64::from(Timestamp::MAX_NANOSECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_nanosecond(value)
}
};
Value::from(v)
}

fn generate_random_datetime<R: Rng>(rng: &mut R) -> Value {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
let value = rng.gen_range(min..=max);
let datetime = Timestamp::new_millisecond(value)
.to_chrono_datetime()
.unwrap();
Value::from(DateTime::from(datetime))
}

fn generate_random_date<R: Rng>(rng: &mut R) -> Value {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
let value = rng.gen_range(min..=max);
let date = Timestamp::new_millisecond(value).to_chrono_date().unwrap();
Value::from(Date::from(date))
}

/// An identifier.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)]
pub struct Ident {
Expand Down Expand Up @@ -181,6 +232,24 @@ impl Column {
.iter()
.any(|opt| opt == &ColumnOption::PrimaryKey)
}

/// Returns true if it's nullable.
pub fn is_nullable(&self) -> bool {
!self
.options
.iter()
.any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex))
}

// Returns true if it has default value.
pub fn has_default_value(&self) -> bool {
self.options.iter().any(|opt| {
matches!(
opt,
ColumnOption::DefaultValue(_) | ColumnOption::DefaultFn(_)
)
})
}
}

/// Returns droppable columns. i.e., non-primary key columns, non-ts columns.
Expand Down
29 changes: 26 additions & 3 deletions tests-fuzz/src/ir/insert_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,37 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt::Display;

use datatypes::value::Value;

use crate::ir::Column;

pub type RowValue = Vec<Value>;

pub struct InsertIntoExpr {
pub table_name: String,
pub columns: Vec<Column>,
pub rows: Vec<RowValue>,
pub values_list: Vec<RowValues>,
}

pub type RowValues = Vec<RowValue>;

pub enum RowValue {
Value(Value),
Default,
}

impl Display for RowValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RowValue::Value(v) => match v {
Value::Null => write!(f, "NULL"),
v @ (Value::String(_)
| Value::Timestamp(_)
| Value::DateTime(_)
| Value::Date(_)) => write!(f, "'{}'", v),
v => write!(f, "{}", v),
},
RowValue::Default => write!(f, "DEFAULT"),
}
}
}
60 changes: 41 additions & 19 deletions tests-fuzz/src/translator/mysql/insert_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,33 +22,42 @@ impl DslTranslator<InsertIntoExpr, String> for InsertIntoExprTranslator {
type Error = Error;

fn translate(&self, input: &InsertIntoExpr) -> Result<String> {
let columns = input
.columns
.iter()
.map(|c| c.name.to_string())
.collect::<Vec<_>>()
.join(", ")
.to_string();

Ok(format!(
"INSERT INTO {} ({})\nVALUES\n{};",
"INSERT INTO {} {} VALUES\n{};",
input.table_name,
columns,
Self::format_columns(input),
Self::format_values(input)
))
}
}

impl InsertIntoExprTranslator {
fn format_columns(input: &InsertIntoExpr) -> String {
if input.columns.is_empty() {
"".to_string()
} else {
let list = input
.columns
.iter()
.map(|c| c.name.to_string())
.collect::<Vec<_>>()
.join(", ")
.to_string();

format!("({})", list)
}
}

fn format_values(input: &InsertIntoExpr) -> String {
input
.rows
.values_list
.iter()
.map(|row| {
.map(|value| {
format!(
"({})",
row.iter()
.map(|v| format!("'{v}'"))
value
.iter()
.map(|v| v.to_string())
.collect::<Vec<_>>()
.join(", ")
)
Expand All @@ -64,7 +73,7 @@ mod tests {

use rand::SeedableRng;

use super::InsertIntoExprTranslator;
use super::*;
use crate::generator::insert_expr::InsertExprGeneratorBuilder;
use crate::generator::Generator;
use crate::test_utils;
Expand All @@ -84,10 +93,23 @@ mod tests {
let insert_expr = insert_expr_generator.generate(&mut rng).unwrap();

let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap();
let expected = r#"INSERT INTO test (host, idc, memory_util, ts, cpu_util, disk_util)
VALUES
('adipisci', 'debitis', '0.5495312687894465', '15292064470292927036', '0.9354265029131291', '0.8037816422279636'),
('ut', 'sequi', '0.8807117723618908', '14214208091261382505', '0.5240550121500691', '0.350785883750684');"#;
let expected = r#"INSERT INTO test (ts, host, cpu_util) VALUES
('+199601-11-07 21:32:56.695+0000', 'corrupti', 0.051130243193075464),
('+40822-03-25 02:17:34.328+0000', NULL, 0.6552502332327004);"#;
assert_eq!(output, expected);

let insert_expr = insert_expr_generator.generate(&mut rng).unwrap();
let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap();
let expected = r#"INSERT INTO test (cpu_util, disk_util, ts) VALUES
(0.7074194466620976, 0.661288102315126, '-47252-05-08 07:33:49.567+0000'),
(0.8266101224213618, 0.7947724277743285, '-224292-12-07 02:51:53.371+0000');"#;
assert_eq!(output, expected);

let insert_expr = insert_expr_generator.generate(&mut rng).unwrap();
let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap();
let expected = r#"INSERT INTO test VALUES
('odio', NULL, 0.48809950435391647, 0.5228925709595407, 0.9091528874275897, '+241156-12-16 20:52:15.185+0000'),
('dignissimos', 'labore', NULL, 0.12983559048685023, 0.6362040919831425, '-30691-06-17 23:41:09.938+0000');"#;
assert_eq!(output, expected);
}
}
2 changes: 1 addition & 1 deletion tests-fuzz/src/validator/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ impl PartialEq<Column> for ColumnEntry {
.iter()
.any(|opt| matches!(opt, ColumnOption::NotNull))
{
debug!("ColumnOption::NotNull is not found");
debug!("ColumnOption::NotNull is found");
return false;
}
} else {
Expand Down
Loading

0 comments on commit b55905c

Please sign in to comment.