Skip to content

Commit 61b5da6

Browse files
committed
Implement to_pandas()
1 parent b8ef9bf commit 61b5da6

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

src/dataframe.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,24 @@ impl PyDataFrame {
313313
Ok(())
314314
}
315315

316+
// Convert to pandas dataframe with pyarrow
317+
// Collect the batches, pass to Arrow Table & then convert to Pandas DataFrame
318+
fn to_pandas(&self, py: Python) -> PyResult<PyObject> {
319+
let batches = self.collect(py);
320+
321+
Python::with_gil(|py| {
322+
// Instantiate pyarrow Table class and use its from_batches method
323+
let table_class = py.import("pyarrow")?.getattr("Table")?;
324+
let args = PyTuple::new(py, batches);
325+
let table: PyObject = table_class.call_method1("from_batches", args)?.into();
326+
327+
// Use Table.to_pandas() method to convert batches to pandas dataframe
328+
// See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pandas
329+
let result = table.call_method0(py, "to_pandas")?;
330+
Ok(result)
331+
})
332+
}
333+
316334
// Executes this DataFrame to get the total number of rows.
317335
fn count(&self, py: Python) -> PyResult<usize> {
318336
Ok(wait_for_future(py, self.df.as_ref().clone().count())?)

0 commit comments

Comments
 (0)