diff --git a/crates/core/src/operations/write.rs b/crates/core/src/operations/write.rs index f4c6f36cf3..0606707c19 100644 --- a/crates/core/src/operations/write.rs +++ b/crates/core/src/operations/write.rs @@ -666,7 +666,13 @@ impl std::future::IntoFuture for WriteBuilder { try_cast_batch(schema.fields(), table_schema.fields()) { schema_drift = true; - if this.mode == SaveMode::Overwrite && this.schema_mode.is_some() { + if this.mode == SaveMode::Overwrite + && this.schema_mode == Some(SchemaMode::Merge) + { + new_schema = + Some(merge_schema(table_schema.clone(), schema.clone())?); + } else if this.mode == SaveMode::Overwrite && this.schema_mode.is_some() + { new_schema = None // we overwrite anyway, so no need to cast } else if this.schema_mode == Some(SchemaMode::Merge) { new_schema = diff --git a/python/tests/test_writer.py b/python/tests/test_writer.py index 169db15e80..47eac4b22b 100644 --- a/python/tests/test_writer.py +++ b/python/tests/test_writer.py @@ -304,6 +304,34 @@ def test_update_schema_rust_writer_invalid(existing_table: DeltaTable): assert existing_table.schema().to_pyarrow() == new_data.schema +def test_merge_schema_rust_writer_with_overwrite(tmp_path: pathlib.Path): + data = pa.table( + { + "a": pa.array([1, 2, 3, 4]), + "b": pa.array([1, 1, 2, 2]), + "c": pa.array([10, 11, 12, 13]), + } + ) + write_deltalake( + tmp_path, + data, + engine="rust", + ) + + new_data = pa.table({"a": pa.array([100, 200, 300]), "b": pa.array([1, 1, 1])}) + + write_deltalake( + tmp_path, + new_data, + mode="overwrite", + schema_mode="merge", + engine="rust", + ) + assert set(DeltaTable(tmp_path).to_pyarrow_table().column_names) == set( + ["a", "b", "c"] + ) + + @pytest.mark.parametrize("engine", ["pyarrow", "rust"]) def test_local_path( tmp_path: pathlib.Path,