Skip to content

Commit

Permalink
Add --complete auto completion mode to sqllogictests (#4665)
Browse files Browse the repository at this point in the history
* Add `--complete` auto completion mode to `sqllogictests`

* clippy

* Apply suggestions from code review

Co-authored-by: xudong.w <wxd963996380@gmail.com>

Co-authored-by: xudong.w <wxd963996380@gmail.com>
  • Loading branch information
alamb and xudong963 authored Dec 22, 2022
1 parent c9d6118 commit 04d095d
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 93 deletions.
2 changes: 1 addition & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ doc-comment = "0.3"
env_logger = "0.10"
parquet-test-utils = { path = "../../parquet-test-utils" }
rstest = "0.16.0"
sqllogictest = "0.9.0"
sqllogictest = "0.10.0"
sqlparser = "0.28"
test-utils = { path = "../../test-utils" }
thiserror = "1.0.37"
Expand Down
34 changes: 0 additions & 34 deletions datafusion/core/tests/sql/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1076,37 +1076,3 @@ async fn aggregate_with_alias() -> Result<()> {
);
Ok(())
}

#[tokio::test]
async fn array_agg_zero() -> Result<()> {
let ctx = SessionContext::new();
// 2 different aggregate functions: avg and sum(distinct)
let sql = "SELECT ARRAY_AGG([]);";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+------------------------+",
"| ARRAYAGG(List([NULL])) |",
"+------------------------+",
"| [] |",
"+------------------------+",
];
assert_batches_eq!(expected, &actual);
Ok(())
}

#[tokio::test]
async fn array_agg_one() -> Result<()> {
let ctx = SessionContext::new();
// 2 different aggregate functions: avg and sum(distinct)
let sql = "SELECT ARRAY_AGG([1]);";
let actual = execute_to_batches(&ctx, sql).await;
let expected = vec![
"+---------------------+",
"| ARRAYAGG(List([1])) |",
"+---------------------+",
"| [[1]] |",
"+---------------------+",
];
assert_batches_eq!(expected, &actual);
Ok(())
}
17 changes: 16 additions & 1 deletion datafusion/core/tests/sqllogictests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@

This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki). We use [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) as a parser/runner of `.slt` files in `test_files`.

#### Running tests
#### Running tests: Validation Mode

In this model, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the file to the output produced by that run.

Run all tests suites in validation mode

```shell
cargo test -p datafusion --test sqllogictests
Expand All @@ -40,6 +44,17 @@ Run only the tests in `information_schema.slt`:
cargo test -p datafusion --test sqllogictests -- information
```

#### Updating tests: Completion Mode

In test script completion mode, `sqllogictests` reads a prototype script and runs the statements and queries against the database engine. The output is is a full script that is a copy of the prototype script with result inserted.

You can update tests by passing the `--complete` argument.

```shell
# Update ddl.slt with output from running
cargo test -p datafusion --test sqllogictests -- ddl --complete
```

#### sqllogictests

> :warning: **Warning**:Datafusion's sqllogictest implementation and migration is still in progress. Definitions taken from https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki
Expand Down
9 changes: 9 additions & 0 deletions datafusion/core/tests/sqllogictests/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ pub enum DFSqlLogicTestError {
/// Error from arrow-rs
#[error("Arrow error: {0}")]
Arrow(ArrowError),
/// Generic error
#[error("Other Error: {0}")]
Other(String),
}

impl From<TestError> for DFSqlLogicTestError {
Expand All @@ -63,3 +66,9 @@ impl From<ArrowError> for DFSqlLogicTestError {
DFSqlLogicTestError::Arrow(value)
}
}

impl From<String> for DFSqlLogicTestError {
fn from(value: String) -> Self {
DFSqlLogicTestError::Other(value)
}
}
141 changes: 84 additions & 57 deletions datafusion/core/tests/sqllogictests/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use log::info;
use normalize::convert_batches;
use sqllogictest::DBOutput;
use sqlparser::ast::Statement as SQLStatement;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::time::Duration;

use crate::error::{DFSqlLogicTestError, Result};
Expand Down Expand Up @@ -78,78 +78,49 @@ pub async fn main() -> Result<()> {
#[cfg(not(target_family = "windows"))]
pub async fn main() -> Result<()> {
// Enable logging (e.g. set RUST_LOG=debug to see debug logs)

use sqllogictest::{default_validator, update_test_file};
env_logger::init();

// run each file using its own new DB
//
// Note: can't use tester.run_parallel_async()
// as that will reuse the same SessionContext
//
// We could run these tests in parallel eventually if we wanted.
let options = Options::new();

// default to all files in test directory filtering based on name
let files: Vec<_> = std::fs::read_dir(TEST_DIRECTORY)
.unwrap()
.map(|path| path.unwrap().path())
.filter(|path| options.check_test_file(path.as_path()))
.collect();

let files = get_test_files();
info!("Running test files {:?}", files);

for path in files {
println!("Running: {}", path.display());

let file_name = path.file_name().unwrap().to_str().unwrap().to_string();

// Create the test runner
let ctx = context_for_test_file(&file_name).await;

let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name });
tester.run_file_async(path).await?;
let mut runner = sqllogictest::Runner::new(DataFusion { ctx, file_name });

// run each file using its own new DB
//
// We could run these tests in parallel eventually if we wanted.
if options.complete_mode {
info!("Using complete mode to complete {}", path.display());
let col_separator = " ";
let validator = default_validator;
update_test_file(path, runner, col_separator, validator)
.await
.map_err(|e| e.to_string())?;
} else {
// run the test normally:
runner.run_file_async(path).await?;
}
}

Ok(())
}

/// Gets a list of test files to execute. If there were arguments
/// passed to the program treat it as a cargo test filter (substring match on filenames)
fn get_test_files() -> Vec<PathBuf> {
info!("Test directory: {}", TEST_DIRECTORY);

let args: Vec<_> = std::env::args().collect();

// treat args after the first as filters to run (substring matching)
let filters = if !args.is_empty() {
args.iter()
.skip(1)
.map(|arg| arg.as_str())
.collect::<Vec<_>>()
} else {
vec![]
};

// default to all files in test directory filtering based on name
std::fs::read_dir(TEST_DIRECTORY)
.unwrap()
.map(|path| path.unwrap().path())
.filter(|path| check_test_file(&filters, path.as_path()))
.collect()
}

/// because this test can be run as a cargo test, commands like
///
/// ```shell
/// cargo test foo
/// ```
///
/// Will end up passing `foo` as a command line argument.
///
/// be compatible with this, treat the command line arguments as a
/// filter and that does a substring match on each input.
/// returns true f this path should be run
fn check_test_file(filters: &[&str], path: &Path) -> bool {
if filters.is_empty() {
return true;
}

// otherwise check if any filter matches
let path_str = path.to_string_lossy();
filters.iter().any(|filter| path_str.contains(filter))
}

/// Create a SessionContext, configured for the specific test
async fn context_for_test_file(file_name: &str) -> SessionContext {
match file_name {
Expand Down Expand Up @@ -189,3 +160,59 @@ async fn run_query(ctx: &SessionContext, sql: impl Into<String>) -> Result<DBOut
let formatted_batches = convert_batches(results)?;
Ok(formatted_batches)
}

/// Parsed command line options
struct Options {
// regex like
/// arguments passed to the program which are treated as
/// cargo test filter (substring match on filenames)
filters: Vec<String>,

/// Auto complete mode to fill out expected results
complete_mode: bool,
}

impl Options {
fn new() -> Self {
let args: Vec<_> = std::env::args().collect();

let complete_mode = args.iter().any(|a| a == "--complete");

// treat args after the first as filters to run (substring matching)
let filters = if !args.is_empty() {
args.into_iter()
.skip(1)
// ignore command line arguments like `--complete`
.filter(|arg| !arg.as_str().starts_with("--"))
.collect::<Vec<_>>()
} else {
vec![]
};

Self {
filters,
complete_mode,
}
}

/// Because this test can be run as a cargo test, commands like
///
/// ```shell
/// cargo test foo
/// ```
///
/// Will end up passing `foo` as a command line argument.
///
/// To be compatible with this, treat the command line arguments as a
/// filter and that does a substring match on each input. returns
/// true f this path should be run
fn check_test_file(&self, path: &Path) -> bool {
if self.filters.is_empty() {
return true;
}

// otherwise check if any filter matches
let path_str = path.to_string_lossy();
self.filters.iter().any(|filter| path_str.contains(filter))
}
}
14 changes: 14 additions & 0 deletions datafusion/core/tests/sqllogictests/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1077,3 +1077,17 @@ b 5
c 4
d 4
e 4



# array_agg_zero
query U
SELECT ARRAY_AGG([]);
----
[]

# array_agg_one
query U
SELECT ARRAY_AGG([1]);
----
[[1]]
6 changes: 6 additions & 0 deletions datafusion/core/tests/sqllogictests/test_files/ddl.slt
Original file line number Diff line number Diff line change
Expand Up @@ -411,5 +411,11 @@ SELECT * FROM aggregate_simple order by c1 LIMIT 1;
----
0.00001 0.000000000001 true

query CCC
SELECT * FROM aggregate_simple order by c1 DESC LIMIT 1;
----
0.00005 0.000000000005 true


statement ok
DROP TABLE aggregate_simple

0 comments on commit 04d095d

Please sign in to comment.