Skip to content

Commit

Permalink
sqllogictest: A logging and command line filter (#4497)
Browse files Browse the repository at this point in the history
* sqllogictest: A logging and command line filter

* Reduce some println to info

* Be compatible with Rust test runner

* Fix typo, cargo fmt

* Add a note about substring matching
  • Loading branch information
alamb authored Dec 5, 2022
1 parent 237233f commit 2a754f8
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 21 deletions.
17 changes: 16 additions & 1 deletion datafusion/core/tests/sqllogictests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,22 @@ This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/s

#### Running tests

`cargo test -p datafusion --test sqllogictests`
```shell
cargo test -p datafusion --test sqllogictests
```

Run tests with debug logging enabled:

```shell
RUST_LOG=debug cargo test -p datafusion --test sqllogictests
```

Run only the tests in `information_schema.slt`:

```shell
# information_schema.slt matches due to substring matching `information`
cargo test -p datafusion --test sqllogictests -- information
```

#### sqllogictests

Expand Down
81 changes: 61 additions & 20 deletions datafusion/core/tests/sqllogictests/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ use datafusion::arrow::csv::WriterBuilder;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::prelude::{SessionConfig, SessionContext};
use datafusion_sql::parser::{DFParser, Statement};
use log::info;
use normalize::normalize_batch;
use sqlparser::ast::Statement as SQLStatement;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::time::Duration;

use crate::error::{DFSqlLogicTestError, Result};
Expand Down Expand Up @@ -76,56 +77,96 @@ pub async fn main() -> Result<()> {
#[tokio::main]
#[cfg(not(target_family = "windows"))]
pub async fn main() -> Result<()> {
let paths = std::fs::read_dir(TEST_DIRECTORY).unwrap();
// Enable logging (e.g. set RUST_LOG=debug to see debug logs)
env_logger::init();

// run each file using its own new SessionContext
// run each file using its own new DB
//
// Note: can't use tester.run_parallel_async()
// as that will reuse the same SessionContext
//
// We could run these tests in parallel eventually if we wanted.

for path in paths {
// TODO better error handling
let path = path.unwrap().path();
let files = get_test_files();
info!("Running test files {:?}", files);

run_file(&path).await?;
for path in files {
println!("Running: {}", path.display());

let file_name = path.file_name().unwrap().to_str().unwrap().to_string();

let ctx = context_for_test_file(&file_name).await;

let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name });
tester.run_file_async(path).await?;
}

Ok(())
}

/// Run the tests in the specified `.slt` file
async fn run_file(path: &Path) -> Result<()> {
println!("Running: {}", path.display());

let file_name = path.file_name().unwrap().to_str().unwrap().to_string();

let ctx = context_for_test_file(&file_name).await;
/// Gets a list of test files to execute. If there were arguments
/// passed to the program treat it as a cargo test filter (substring match on filenames)
fn get_test_files() -> Vec<PathBuf> {
info!("Test directory: {}", TEST_DIRECTORY);

let args: Vec<_> = std::env::args().collect();

// treat args after the first as filters to run (substring matching)
let filters = if !args.is_empty() {
args.iter()
.skip(1)
.map(|arg| arg.as_str())
.collect::<Vec<_>>()
} else {
vec![]
};

// default to all files in test directory filtering based on name
std::fs::read_dir(TEST_DIRECTORY)
.unwrap()
.map(|path| path.unwrap().path())
.filter(|path| check_test_file(&filters, path.as_path()))
.collect()
}

let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name });
tester.run_file_async(path).await?;
/// because this test can be run as a cargo test, commands like
///
/// ```shell
/// cargo test foo
/// ```
///
/// Will end up passing `foo` as a command line argument.
///
/// be compatible with this, treat the command line arguments as a
/// filter and that does a substring match on each input.
/// returns true f this path should be run
fn check_test_file(filters: &[&str], path: &Path) -> bool {
if filters.is_empty() {
return true;
}

Ok(())
// otherwise check if any filter matches
let path_str = path.to_string_lossy();
filters.iter().any(|filter| path_str.contains(filter))
}

/// Create a SessionContext, configured for the specific test
async fn context_for_test_file(file_name: &str) -> SessionContext {
match file_name {
"aggregate.slt" => {
println!("Registering aggregate tables");
info!("Registering aggregate tables");
let ctx = SessionContext::new();
setup::register_aggregate_tables(&ctx).await;
ctx
}
"information_schema.slt" => {
println!("Enabling information schema");
info!("Enabling information schema");
SessionContext::with_config(
SessionConfig::new().with_information_schema(true),
)
}
_ => {
println!("Using default SessionContex");
info!("Using default SessionContext");
SessionContext::new()
}
}
Expand Down

0 comments on commit 2a754f8

Please sign in to comment.