Skip to content

Commit

Permalink
Introduce a failing test for delta-io#1286
Browse files Browse the repository at this point in the history
This test currently fails because the RecordBatchWriter doesn't like the
difference between Timestamps:

---- writer::record_batch::tests::test_write_batch_with_timestamps stdout ----
thread 'writer::record_batch::tests::test_write_batch_with_timestamps' panicked at 'called `Result::unwrap()` on an `Err` value: InvalidArgumentError("column types must match schema types, expected Timestamp(Microsecond, None) but found Timestamp(Nanosecond, None) at column index 1")', rust/src/writer/record_batch.rs:507:101
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
  • Loading branch information
rtyler committed Apr 14, 2023
1 parent 2e0d72e commit e1b2b64
Showing 1 changed file with 44 additions and 1 deletion.
45 changes: 44 additions & 1 deletion rust/src/writer/record_batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ pub(crate) fn divide_by_partition_values(
mod tests {
use super::*;
use crate::writer::{
test_utils::{create_initialized_table, get_record_batch},
test_utils::{create_initialized_table, create_initialized_table_with, get_record_batch},
utils::PartitionPath,
};
use std::path::Path;
Expand Down Expand Up @@ -477,6 +477,49 @@ mod tests {
assert_eq!(adds.len(), 1);
}

/*
* This is a test case to address:
* <https://github.com/delta-io/delta-rs/issues/1286>
*/
#[tokio::test]
async fn test_write_batch_with_timestamps() {
use crate::{SchemaDataType, SchemaField};
use arrow::array::*;
use arrow::datatypes::{Field, TimeUnit, DataType as ArrowDataType};

let schema = Schema::new(vec![
SchemaField::new(
"id".to_string(),
SchemaDataType::primitive("string".to_string()),
true,
HashMap::new(),
),
SchemaField::new(
"timestamp".to_string(),
SchemaDataType::primitive("timestamp".to_string()),
true,
HashMap::new(),
),
]);

let batch_schema = Arc::new(ArrowSchema::new(vec![
Field::new("id", ArrowDataType::Utf8, true),
Field::new("timestamp", ArrowDataType::Timestamp(TimeUnit::Nanosecond, None), true),
]));

let table = create_initialized_table_with(schema, &vec![]).await;

let id_values = Arc::new(StringArray::from(vec![Some("Hi")]));
let timestamp_values = Arc::new(TimestampNanosecondArray::from(vec![1]));
let batch = RecordBatch::try_new(batch_schema, vec![id_values, timestamp_values])
.unwrap();
let mut writer = RecordBatchWriter::for_table(&table).unwrap();

writer.write(batch).await.unwrap();
let adds = writer.flush().await.unwrap();
assert_eq!(adds.len(), 1);
}

#[tokio::test]
async fn test_write_multiple_partitions() {
let batch = get_record_batch(None, false);
Expand Down

0 comments on commit e1b2b64

Please sign in to comment.