From e258d7de64c7eeb3472111965854f34f06e2fab6 Mon Sep 17 00:00:00 2001 From: Dejan Simic <10134699+simicd@users.noreply.github.com> Date: Thu, 19 Jan 2023 21:56:35 +0100 Subject: [PATCH 1/6] Implement str and repr functions for expression and dataframe --- src/dataframe.rs | 20 ++++++++++++++++++++ src/expression.rs | 4 ++++ 2 files changed, 24 insertions(+) diff --git a/src/dataframe.rs b/src/dataframe.rs index c6162e4e4..e19880bf6 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -66,6 +66,26 @@ impl PyDataFrame { }) } + fn __str__(&self, py: Python) -> PyResult { + let df = self.df.as_ref().clone().limit(0, Some(10))?; + let batches = wait_for_future(py, df.collect())?; + let batches_as_string = pretty::pretty_format_batches(&batches); + match batches_as_string { + Ok(batch) => Ok(format!("{}", batch)), + Err(err) => Ok(format!("Error: {:?}", err.to_string())), + } + } + + fn __repr__(&self, py: Python) -> PyResult { + let df = self.df.as_ref().clone().limit(0, Some(10))?; + let batches = wait_for_future(py, df.collect())?; + let batches_as_string = pretty::pretty_format_batches(&batches); + match batches_as_string { + Ok(batch) => Ok(format!("{}", batch)), + Err(err) => Ok(format!("Error: {:?}", err.to_string())), + } + } + /// Returns the schema from the logical plan fn schema(&self) -> PyArrowType { PyArrowType(self.df.schema().into()) diff --git a/src/expression.rs b/src/expression.rs index 2e8fb801c..9f609a320 100644 --- a/src/expression.rs +++ b/src/expression.rs @@ -61,6 +61,10 @@ impl PyExpr { Ok(format!("{}", self.expr)) } + fn __repr__(&self) -> PyResult { + Ok(format!("{}", self.expr)) + } + fn __add__(&self, rhs: PyExpr) -> PyResult { Ok((self.expr.clone() + rhs.expr).into()) } From c34ec88b0ff16a3044537b3140051693927abd77 Mon Sep 17 00:00:00 2001 From: Dejan Simic <10134699+simicd@users.noreply.github.com> Date: Thu, 19 Jan 2023 22:18:44 +0100 Subject: [PATCH 2/6] Implement __repr__ for config --- src/config.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/config.rs b/src/config.rs index d5d577765..2381f332b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -72,6 +72,14 @@ impl PyConfig { } Ok(dict.into()) } + + fn __repr__(&mut self, py: Python) -> PyResult { + let dict = self.get_all(py); + match dict { + Ok(result) => Ok(format!("{}", result)), + Err(err) => Ok(format!("Error: {:?}", err.to_string())), + } + } } /// Convert a python object to a ScalarValue From 4c75098b4e925adfec6301ce249150600182eaf4 Mon Sep 17 00:00:00 2001 From: Dejan Simic <10134699+simicd@users.noreply.github.com> Date: Fri, 20 Jan 2023 21:33:58 +0100 Subject: [PATCH 3/6] Implement repr for catalog, context and UDFs --- datafusion/tests/test_udaf.py | 2 +- src/catalog.rs | 14 ++++++++++++++ src/context.rs | 8 ++++++++ src/udaf.rs | 4 ++++ src/udf.rs | 4 ++++ 5 files changed, 31 insertions(+), 1 deletion(-) diff --git a/datafusion/tests/test_udaf.py b/datafusion/tests/test_udaf.py index c2b29d199..4e9309cf2 100644 --- a/datafusion/tests/test_udaf.py +++ b/datafusion/tests/test_udaf.py @@ -73,7 +73,7 @@ def df(): return ctx.create_dataframe([[batch]]) -@pytest.mark.skip(reason="df.collect() will hang, need more investigations") +# @pytest.mark.skip(reason="df.collect() will hang, need more investigations") def test_errors(df): with pytest.raises(TypeError): udaf( diff --git a/src/catalog.rs b/src/catalog.rs index 900960607..08a808898 100644 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -81,6 +81,10 @@ impl PyCatalog { ))), } } + + fn __repr__(&self) -> PyResult { + Ok(format!("Catalog(names=[{}])", self.names().join(";"))) + } } #[pymethods] @@ -97,6 +101,10 @@ impl PyDatabase { } } + fn __repr__(&self) -> PyResult { + Ok(format!("Database(names=[{}])", Vec::from_iter(self.names()).join(";"))) + } + // register_table // deregister_table } @@ -119,6 +127,12 @@ impl PyTable { } } + fn __repr__(&self) -> PyResult { + let kind = self.kind(); + Ok(format!("Table(kind={})", kind)) + + } + // fn scan // fn statistics // fn has_exact_statistics diff --git a/src/context.rs b/src/context.rs index 3990a4c37..14e34ecae 100644 --- a/src/context.rs +++ b/src/context.rs @@ -460,6 +460,14 @@ impl PySessionContext { }; Ok(PyDataFrame::new(df)) } + + fn __repr__(&self) -> PyResult { + let id = self.session_id(); + match id { + Ok(value) => Ok(format!("SessionContext(session_id={})", value)), + Err(err) => Ok(format!("Error: {:?}", err.to_string())), + } + } } impl PySessionContext { diff --git a/src/udaf.rs b/src/udaf.rs index 863d8d799..a623de6b0 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -141,4 +141,8 @@ impl PyAggregateUDF { let args = args.iter().map(|e| e.expr.clone()).collect(); Ok(self.function.call(args).into()) } + + fn __repr__(&self) -> PyResult { + Ok(format!("AggregateUDF({})", self.function.name)) + } } diff --git a/src/udf.rs b/src/udf.rs index 4804f999e..10a8782b2 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -92,4 +92,8 @@ impl PyScalarUDF { let args = args.iter().map(|e| e.expr.clone()).collect(); Ok(self.function.call(args).into()) } + + fn __repr__(&self) -> PyResult { + Ok(format!("ScalarUDF({})", self.function.name)) + } } From 800821bfa80bc4bbd7d15979a55abe1a85ab1669 Mon Sep 17 00:00:00 2001 From: Dejan Simic <10134699+simicd@users.noreply.github.com> Date: Fri, 20 Jan 2023 22:26:43 +0100 Subject: [PATCH 4/6] Refine repr function definitions --- datafusion/tests/test_udaf.py | 2 +- src/catalog.rs | 4 ++-- src/config.rs | 2 +- src/dataframe.rs | 12 +----------- src/expression.rs | 6 +----- 5 files changed, 6 insertions(+), 20 deletions(-) diff --git a/datafusion/tests/test_udaf.py b/datafusion/tests/test_udaf.py index 4e9309cf2..c2b29d199 100644 --- a/datafusion/tests/test_udaf.py +++ b/datafusion/tests/test_udaf.py @@ -73,7 +73,7 @@ def df(): return ctx.create_dataframe([[batch]]) -# @pytest.mark.skip(reason="df.collect() will hang, need more investigations") +@pytest.mark.skip(reason="df.collect() will hang, need more investigations") def test_errors(df): with pytest.raises(TypeError): udaf( diff --git a/src/catalog.rs b/src/catalog.rs index 08a808898..34df44d19 100644 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -83,7 +83,7 @@ impl PyCatalog { } fn __repr__(&self) -> PyResult { - Ok(format!("Catalog(names=[{}])", self.names().join(";"))) + Ok(format!("Catalog(schema_names=[{}])", self.names().join(";"))) } } @@ -102,7 +102,7 @@ impl PyDatabase { } fn __repr__(&self) -> PyResult { - Ok(format!("Database(names=[{}])", Vec::from_iter(self.names()).join(";"))) + Ok(format!("Database(table_names=[{}])", Vec::from_iter(self.names()).join(";"))) } // register_table diff --git a/src/config.rs b/src/config.rs index 2381f332b..abcd1a7fd 100644 --- a/src/config.rs +++ b/src/config.rs @@ -76,7 +76,7 @@ impl PyConfig { fn __repr__(&mut self, py: Python) -> PyResult { let dict = self.get_all(py); match dict { - Ok(result) => Ok(format!("{}", result)), + Ok(result) => Ok(format!("Config({})", result)), Err(err) => Ok(format!("Error: {:?}", err.to_string())), } } diff --git a/src/dataframe.rs b/src/dataframe.rs index e19880bf6..423d7e55d 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -66,22 +66,12 @@ impl PyDataFrame { }) } - fn __str__(&self, py: Python) -> PyResult { - let df = self.df.as_ref().clone().limit(0, Some(10))?; - let batches = wait_for_future(py, df.collect())?; - let batches_as_string = pretty::pretty_format_batches(&batches); - match batches_as_string { - Ok(batch) => Ok(format!("{}", batch)), - Err(err) => Ok(format!("Error: {:?}", err.to_string())), - } - } - fn __repr__(&self, py: Python) -> PyResult { let df = self.df.as_ref().clone().limit(0, Some(10))?; let batches = wait_for_future(py, df.collect())?; let batches_as_string = pretty::pretty_format_batches(&batches); match batches_as_string { - Ok(batch) => Ok(format!("{}", batch)), + Ok(batch) => Ok(format!("DataFrame()\n{}", batch)), Err(err) => Ok(format!("Error: {:?}", err.to_string())), } } diff --git a/src/expression.rs b/src/expression.rs index 9f609a320..1eb7813ed 100644 --- a/src/expression.rs +++ b/src/expression.rs @@ -57,12 +57,8 @@ impl PyExpr { expr.into() } - fn __str__(&self) -> PyResult { - Ok(format!("{}", self.expr)) - } - fn __repr__(&self) -> PyResult { - Ok(format!("{}", self.expr)) + Ok(format!("Expr({})", self.expr)) } fn __add__(&self, rhs: PyExpr) -> PyResult { From 0ede908d2890f8cab733f2cfaa4bb81642e60cf5 Mon Sep 17 00:00:00 2001 From: Dejan Simic <10134699+simicd@users.noreply.github.com> Date: Sun, 29 Jan 2023 21:19:10 +0100 Subject: [PATCH 5/6] Fix formatting --- src/catalog.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/catalog.rs b/src/catalog.rs index 34df44d19..f43bcae87 100644 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -83,7 +83,10 @@ impl PyCatalog { } fn __repr__(&self) -> PyResult { - Ok(format!("Catalog(schema_names=[{}])", self.names().join(";"))) + Ok(format!( + "Catalog(schema_names=[{}])", + self.names().join(";") + )) } } @@ -102,7 +105,10 @@ impl PyDatabase { } fn __repr__(&self) -> PyResult { - Ok(format!("Database(table_names=[{}])", Vec::from_iter(self.names()).join(";"))) + Ok(format!( + "Database(table_names=[{}])", + Vec::from_iter(self.names()).join(";") + )) } // register_table @@ -130,7 +136,6 @@ impl PyTable { fn __repr__(&self) -> PyResult { let kind = self.kind(); Ok(format!("Table(kind={})", kind)) - } // fn scan From a0340e1ce90b45072d90cc1489dbd5043202a45f Mon Sep 17 00:00:00 2001 From: Dejan Simic <10134699+simicd@users.noreply.github.com> Date: Thu, 2 Feb 2023 23:40:36 +0100 Subject: [PATCH 6/6] Apply clippy suggestions --- src/catalog.rs | 2 +- src/config.rs | 2 +- src/context.rs | 2 +- src/dataframe.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/catalog.rs b/src/catalog.rs index f43bcae87..4dd431fcb 100644 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -135,7 +135,7 @@ impl PyTable { fn __repr__(&self) -> PyResult { let kind = self.kind(); - Ok(format!("Table(kind={})", kind)) + Ok(format!("Table(kind={kind})")) } // fn scan diff --git a/src/config.rs b/src/config.rs index abcd1a7fd..228f95a0b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -76,7 +76,7 @@ impl PyConfig { fn __repr__(&mut self, py: Python) -> PyResult { let dict = self.get_all(py); match dict { - Ok(result) => Ok(format!("Config({})", result)), + Ok(result) => Ok(format!("Config({result})")), Err(err) => Ok(format!("Error: {:?}", err.to_string())), } } diff --git a/src/context.rs b/src/context.rs index 14e34ecae..915ac4573 100644 --- a/src/context.rs +++ b/src/context.rs @@ -464,7 +464,7 @@ impl PySessionContext { fn __repr__(&self) -> PyResult { let id = self.session_id(); match id { - Ok(value) => Ok(format!("SessionContext(session_id={})", value)), + Ok(value) => Ok(format!("SessionContext(session_id={value})")), Err(err) => Ok(format!("Error: {:?}", err.to_string())), } } diff --git a/src/dataframe.rs b/src/dataframe.rs index 423d7e55d..3dd8210e0 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -71,7 +71,7 @@ impl PyDataFrame { let batches = wait_for_future(py, df.collect())?; let batches_as_string = pretty::pretty_format_batches(&batches); match batches_as_string { - Ok(batch) => Ok(format!("DataFrame()\n{}", batch)), + Ok(batch) => Ok(format!("DataFrame()\n{batch}")), Err(err) => Ok(format!("Error: {:?}", err.to_string())), } }