Skip to content

Commit

Permalink
test case for #1374
Browse files Browse the repository at this point in the history
Test table `issue_1374` was created by hand to have 2 data files where
only one file has the `min_values` for the statistics in the
`checkpoint.parquet` file set to null in order to trigger the bug.
There is no other significance to the table other than to demonstrate
issue #1374.

```
internal error: entered unreachable code
thread 'test_issue_1374' panicked at 'internal error: entered unreachable code', /Users/cole/.cargo/registry/src/index.crates.io-6f17d22bba15001f/datafusion-common-24.0.0/src/scalar.rs:2472:26
```
  • Loading branch information
cmackenzie1 committed May 30, 2023
1 parent df98587 commit 24ddc5b
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"protocol":{"minReaderVersion":1,"minWriterVersion":1}}
{"metaData":{"id":"d5ad9276-c21f-474e-bfa8-996099dce265","name":null,"description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"temperature\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["date"],"createdTime":1684886484991,"configuration":{}}}
{"commitInfo":{"timestamp":1684886484992,"operation":"CREATE TABLE","operationParameters":{"mode":"ErrorIfExists","metadata":"{\"configuration\":{},\"created_time\":1684886484991,\"description\":null,\"format\":{\"options\":{},\"provider\":\"parquet\"},\"id\":\"d5ad9276-c21f-474e-bfa8-996099dce265\",\"name\":null,\"partition_columns\":[\"date\"],\"schema\":{\"fields\":[{\"metadata\":{},\"name\":\"timestamp\",\"nullable\":true,\"type\":\"timestamp\"},{\"metadata\":{},\"name\":\"temperature\",\"nullable\":true,\"type\":\"integer\"},{\"metadata\":{},\"name\":\"date\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}","protocol":"{\"minReaderVersion\":1,\"minWriterVersion\":1}","location":"file:///Users/cole/github.com/cmackenzie1/delta-rs/rust/tests/data/issue_1374"},"clientVersion":"delta-rs.0.11.0"}}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"add":{"path":"date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd49c-c000.snappy.parquet","size":1021,"partitionValues":{"date":"2023-05-24"},"modificationTime":1684886485017,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"timestamp\":null,\"temperature\":8},\"maxValues\":{\"timestamp\":\"2023-05-24T00:01:25.014Z\",\"temperature\":90},\"nullCount\":{\"temperature\":0,\"timestamp\":0}}","tags":null}}
{"add":{"path":"date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd47d-c000.snappy.parquet","size":1021,"partitionValues":{"date":"2023-05-24"},"modificationTime":1684886485017,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"timestamp\":\"2023-05-24T00:01:25.014Z\",\"temperature\":8},\"maxValues\":{\"timestamp\":\"2023-05-24T00:01:25.014Z\",\"temperature\":90},\"nullCount\":{\"temperature\":0,\"timestamp\":0}}","tags":null}}
{"commitInfo":{"timestamp":1685483647338,"clientVersion":"delta-rs.0.11.0"}}
1 change: 1 addition & 0 deletions rust/tests/data/issue_1374/_delta_log/_last_checkpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"parts":null,"size":20622,"version":1}
Binary file not shown.
Binary file not shown.
35 changes: 35 additions & 0 deletions rust/tests/datafusion_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -873,3 +873,38 @@ async fn test_issue_1291_datafusion_sql_partitioned_data() -> Result<()> {

Ok(())
}

#[tokio::test]
async fn test_issue_1374() -> Result<()> {
let ctx = SessionContext::new();
let table = deltalake::open_table("./tests/data/issue_1374")
.await
.unwrap();
ctx.register_table("t", Arc::new(table))?;

let batches = ctx
.sql(
r#"SELECT *
FROM t
WHERE timestamp BETWEEN '2023-05-24T00:00:00.000Z' AND '2023-05-25T00:00:00.000Z'
LIMIT 5
"#,
)
.await?
.collect()
.await?;

let expected = vec![
"+---------------------+-------------+------------+",
"| timestamp | temperature | date |",
"+---------------------+-------------+------------+",
"| 2023-05-17T17:00:00 | 20 | 2023-05-17 |",
"| 2023-05-18T18:00:00 | 20 | 2023-05-18 |",
"| 2023-05-19T19:00:00 | 20 | 2023-05-19 |",
"+---------------------+-------------+------------+",
];

assert_batches_sorted_eq!(&expected, &batches);

Ok(())
}

0 comments on commit 24ddc5b

Please sign in to comment.