From efa5628a6f4b471a4efcb202d0b9e07aa6951677 Mon Sep 17 00:00:00 2001 From: francis-du Date: Thu, 13 Oct 2022 00:33:19 +0800 Subject: [PATCH 1/2] add write_csv/write_parquet/write_json for DataFrame --- datafusion/tests/test_dataframe.py | 2 +- src/dataframe.rs | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index 9880b6d3..f7dc7390 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -380,4 +380,4 @@ def test_union_distinct(ctx): df_a_u_b = df_a.union(df_b, True).sort(column("a").sort(ascending=True)) assert df_c.collect() == df_a_u_b.collect() - assert df_c.collect() == df_a_u_b.collect() + assert df_c.collect() == df_a_u_b.collect() \ No newline at end of file diff --git a/src/dataframe.rs b/src/dataframe.rs index c992d105..6a5d1f57 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -234,4 +234,22 @@ impl PyDataFrame { let new_df = self.df.except(py_df.df)?; Ok(Self::new(new_df)) } + + /// Write a `DataFrame` to a CSV file. + fn write_csv(&self, path: &str, py: Python) -> PyResult<()> { + wait_for_future(py, self.df.write_csv(path))?; + Ok(()) + } + + /// Write a `DataFrame` to a Parquet file. + fn write_parquet(&self, path: &str, py: Python) -> PyResult<()> { + wait_for_future(py, self.df.write_parquet(path, None))?; + Ok(()) + } + + /// Executes a query and writes the results to a partitioned JSON file. + fn write_json(&self, path: &str, py: Python) -> PyResult<()> { + wait_for_future(py, self.df.write_json(path))?; + Ok(()) + } } From d27c45149e8fed360ca7c76f56c1616f1486969f Mon Sep 17 00:00:00 2001 From: francis-du Date: Thu, 13 Oct 2022 00:35:41 +0800 Subject: [PATCH 2/2] fix: test issue --- datafusion/tests/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index f7dc7390..9880b6d3 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -380,4 +380,4 @@ def test_union_distinct(ctx): df_a_u_b = df_a.union(df_b, True).sort(column("a").sort(ascending=True)) assert df_c.collect() == df_a_u_b.collect() - assert df_c.collect() == df_a_u_b.collect() \ No newline at end of file + assert df_c.collect() == df_a_u_b.collect()