diff --git a/superset/utils/pandas_postprocessing/boxplot.py b/superset/utils/pandas_postprocessing/boxplot.py index 4436af9182c0f..40ce9200d358e 100644 --- a/superset/utils/pandas_postprocessing/boxplot.py +++ b/superset/utils/pandas_postprocessing/boxplot.py @@ -18,7 +18,7 @@ import numpy as np from flask_babel import gettext as _ -from pandas import DataFrame, Series +from pandas import DataFrame, Series, to_numeric from superset.exceptions import InvalidPostProcessingError from superset.utils.core import PostProcessingBoxplotWhiskerType @@ -122,4 +122,11 @@ def outliers(series: Series) -> Set[float]: for operator_name, operator in operators.items() for metric in metrics } + + # nanpercentile needs numeric values, otherwise the isnan function + # that's used in the underlying function will fail + for column in metrics: + if df.dtypes[column] == np.object: + df[column] = to_numeric(df[column], errors="coerce") + return aggregate(df, groupby=groupby, aggregates=aggregates) diff --git a/tests/unit_tests/pandas_postprocessing/test_boxplot.py b/tests/unit_tests/pandas_postprocessing/test_boxplot.py index 9252b0da78846..27dff0adeb894 100644 --- a/tests/unit_tests/pandas_postprocessing/test_boxplot.py +++ b/tests/unit_tests/pandas_postprocessing/test_boxplot.py @@ -124,3 +124,28 @@ def test_boxplot_percentile_incorrect_params(): metrics=["cars"], percentiles=[10, 90, 10], ) + + +def test_boxplot_type_coercion(): + df = names_df + df["cars"] = df["cars"].astype(str) + df = boxplot( + df=df, + groupby=["region"], + whisker_type=PostProcessingBoxplotWhiskerType.TUKEY, + metrics=["cars"], + ) + + columns = {column for column in df.columns} + assert columns == { + "cars__mean", + "cars__median", + "cars__q1", + "cars__q3", + "cars__max", + "cars__min", + "cars__count", + "cars__outliers", + "region", + } + assert len(df) == 4