From d2e322314c786b892f4d8b37f383fae8e8827ca9 Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Mon, 21 Oct 2024 11:57:30 +0800 Subject: [PATCH] [SPARK-50001][PYTHON][PS][CONNECT] Adjust "precision" to be part of kwargs for box plots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Adjust "precision" to be kwargs for box plots in both Pandas on Spark and PySpark. ### Why are the changes needed? Per discussion here (https://github.com/apache/spark/pull/48445#discussion_r1804042377), precision is Spark-specific implementation detail, so we wanted to keep “precision” as part of kwargs for box plots. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48513 from xinrong-meng/precision. Authored-by: Xinrong Meng Signed-off-by: Xinrong Meng --- python/pyspark/pandas/plot/core.py | 15 +++++++-------- python/pyspark/sql/plot/core.py | 13 +++++-------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py index 12c17a06f153b..f5652177fe4a5 100644 --- a/python/pyspark/pandas/plot/core.py +++ b/python/pyspark/pandas/plot/core.py @@ -841,7 +841,7 @@ def barh(self, x=None, y=None, **kwargs): elif isinstance(self.data, DataFrame): return self(kind="barh", x=x, y=y, **kwargs) - def box(self, precision=0.01, **kwds): + def box(self, **kwds): """ Make a box plot of the DataFrame columns. @@ -857,12 +857,11 @@ def box(self, precision=0.01, **kwds): Parameters ---------- - precision: scalar, default = 0.01 - This argument is used by pandas-on-Spark to compute approximate statistics - for building a boxplot. Use *smaller* values to get more precise - statistics. - **kwds : optional - Additional keyword arguments are documented in + **kwds : dict, optional + Extra arguments to `precision`: refer to a float that is used by + pandas-on-Spark to compute approximate statistics for building a + boxplot. The default value is 0.01. Use smaller values to get more + precise statistics. Additional keyword arguments are documented in :meth:`pyspark.pandas.Series.plot`. Returns @@ -901,7 +900,7 @@ def box(self, precision=0.01, **kwds): from pyspark.pandas import DataFrame, Series if isinstance(self.data, (Series, DataFrame)): - return self(kind="box", precision=precision, **kwds) + return self(kind="box", **kwds) def hist(self, bins=10, **kwds): """ diff --git a/python/pyspark/sql/plot/core.py b/python/pyspark/sql/plot/core.py index f44c0768d4337..178411e5c5ef8 100644 --- a/python/pyspark/sql/plot/core.py +++ b/python/pyspark/sql/plot/core.py @@ -359,9 +359,7 @@ def pie(self, x: str, y: str, **kwargs: Any) -> "Figure": ) return self(kind="pie", x=x, y=y, **kwargs) - def box( - self, column: Union[str, List[str]], precision: float = 0.01, **kwargs: Any - ) -> "Figure": + def box(self, column: Union[str, List[str]], **kwargs: Any) -> "Figure": """ Make a box plot of the DataFrame columns. @@ -377,11 +375,10 @@ def box( ---------- column: str or list of str Column name or list of names to be used for creating the boxplot. - precision: float, default = 0.01 - This argument is used by pyspark to compute approximate statistics - for building a boxplot. **kwargs - Additional keyword arguments. + Extra arguments to `precision`: refer to a float that is used by + pyspark to compute approximate statistics for building a boxplot. + The default value is 0.01. Use smaller values to get more precise statistics. Returns ------- @@ -404,7 +401,7 @@ def box( >>> df.plot.box(column="math_score") # doctest: +SKIP >>> df.plot.box(column=["math_score", "english_score"]) # doctest: +SKIP """ - return self(kind="box", column=column, precision=precision, **kwargs) + return self(kind="box", column=column, **kwargs) def kde( self,