Skip to content

Commit

Permalink
feat(sqlsmith): generate group by (#3805)
Browse files Browse the repository at this point in the history
* implement sqlsmith group by

* fmt

* add required cols

* pass valid cols to gen_col

* gen col from valid_cols if available

* override bound columns for group by

* capture local column context

* fix case where no FROM

* fmt

* cleanup

* clean

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
  • Loading branch information
kwannoel and mergify[bot] authored Jul 14, 2022
1 parent 8231cf1 commit 9b0d3a1
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 15 deletions.
9 changes: 4 additions & 5 deletions src/tests/sqlsmith/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,20 +55,19 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}

fn gen_col(&mut self, typ: DataTypeName) -> Expr {
if self.bound_relations.is_empty() {
if self.bound_columns.is_empty() {
return self.gen_simple_scalar(typ);
}
let rel = self.bound_relations.choose(&mut self.rng).unwrap();
let matched_cols = rel
.columns
let matched_cols = self
.bound_columns
.iter()
.filter(|col| col.data_type == typ)
.collect::<Vec<_>>();
if matched_cols.is_empty() {
self.gen_simple_scalar(typ)
} else {
let col_def = matched_cols.choose(&mut self.rng).unwrap();
Expr::Identifier(Ident::new(format!("{}.{}", rel.name, col_def.name)))
Expr::Identifier(Ident::new(&col_def.name))
}
}

Expand Down
40 changes: 33 additions & 7 deletions src/tests/sqlsmith/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use std::vec;

use itertools::Itertools;
use rand::prelude::SliceRandom;
use rand::Rng;
use risingwave_frontend::binder::bind_data_type;
Expand Down Expand Up @@ -68,17 +69,30 @@ struct SqlGenerator<'a, R: Rng> {
/// Relations bound in generated query.
/// We might not read from all tables.
bound_relations: Vec<Table>,

/// Columns bound in generated query.
/// May not contain all columns from Self::bound_relations.
/// e.g. GROUP BY clause will constrain bound_columns.
bound_columns: Vec<Column>,
}

/// Generators
impl<'a, R: Rng> SqlGenerator<'a, R> {
fn new(rng: &'a mut R, tables: Vec<Table>) -> Self {
SqlGenerator {
tables,
rng,
bound_relations: vec![],
bound_columns: vec![],
}
}

fn add_relation_to_context(&mut self, table: Table) {
let mut bound_columns = table.get_qualified_columns();
self.bound_columns.append(&mut bound_columns);
self.bound_relations.push(table);
}

fn gen_stmt(&mut self) -> Statement {
let (query, _) = self.gen_query();
Statement::Query(Box::new(query))
Expand Down Expand Up @@ -115,15 +129,14 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}

fn gen_order_by(&mut self) -> Vec<OrderByExpr> {
if self.bound_relations.is_empty() {
if self.bound_columns.is_empty() {
return vec![];
}
let mut order_by = vec![];
while self.flip_coin() {
let table = self.bound_relations.choose(&mut self.rng).unwrap();
let column = table.columns.choose(&mut self.rng).unwrap();
let column = self.bound_columns.choose(&mut self.rng).unwrap();
order_by.push(OrderByExpr {
expr: Expr::Identifier(Ident::new(format!("{}.{}", table.name, column.name))),
expr: Expr::Identifier(Ident::new(&column.name)),
asc: Some(self.rng.gen_bool(0.5)),
nulls_first: None,
})
Expand All @@ -145,14 +158,15 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
fn gen_select_stmt(&mut self) -> (Select, Vec<Column>) {
// Generate random tables/relations first so that select items can refer to them.
let from = self.gen_from();
let group_by = self.gen_group_by();
let (select_list, schema) = self.gen_select_list();
let select = Select {
distinct: false,
projection: select_list,
from,
lateral_views: vec![],
selection: self.gen_where(),
group_by: self.gen_group_by(),
group_by,
having: self.gen_having(),
};
(select, schema)
Expand Down Expand Up @@ -215,8 +229,20 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}
}

fn gen_group_by(&self) -> Vec<Expr> {
vec![]
fn gen_group_by(&mut self) -> Vec<Expr> {
let mut available = self.bound_columns.clone();
if !available.is_empty() {
available.shuffle(self.rng);
let n_group_by_cols = self.rng.gen_range(1..=available.len());
let group_by_cols = available.drain(0..n_group_by_cols).collect_vec();
self.bound_columns = group_by_cols.clone();
group_by_cols
.into_iter()
.map(|c| Expr::Identifier(Ident::new(c.name)))
.collect_vec()
} else {
vec![]
}
}

fn gen_having(&self) -> Option<Expr> {
Expand Down
6 changes: 3 additions & 3 deletions src/tests/sqlsmith/src/relation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
let alias = format!("t{}", self.bound_relations.len());
let mut table = self.tables.choose(&mut self.rng).unwrap().clone();
let table_factor = TableFactor::Table {
name: ObjectName(vec![Ident::new(table.name.clone())]),
name: ObjectName(vec![Ident::new(&table.name)]),
alias: Some(TableAlias {
name: Ident::new(alias.clone()),
columns: vec![],
Expand All @@ -65,7 +65,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
};
table.name = alias; // Rename the table.
let columns = table.get_qualified_columns();
self.bound_relations.push(table);
self.add_relation_to_context(table);
(table_factor, columns)
}

Expand Down Expand Up @@ -127,7 +127,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
},
joins: vec![],
};
self.bound_relations.push(table);
self.add_relation_to_context(table);
relation
}
}

0 comments on commit 9b0d3a1

Please sign in to comment.