Skip to content

Commit

Permalink
Adds logic for NULL values
Browse files Browse the repository at this point in the history
  • Loading branch information
michael-s-molina committed Jun 5, 2024
1 parent 7a664af commit 687f159
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { PostProcessingFactory } from './types';
export const histogramOperator: PostProcessingFactory<
PostProcessingHistogram
> = (formData, queryObject) => {
const { bins, column, cumulative, groupby, normalize } = formData;
const { bins, column, cumulative, groupby = [], normalize } = formData;
const parsedBins = Number.isNaN(Number(bins)) ? 5 : Number(bins);
const parsedColumn = getColumnLabel(column);
const parsedGroupBy = groupby!.map(getColumnLabel);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ export default function buildQuery(formData: HistogramFormData) {
return buildQueryContext(formData, baseQueryObject => [
{
...baseQueryObject,
extras: { where: `${column} IS NOT NULL` },
columns: [...groupby, column],
post_processing: [histogramOperator(formData, baseQueryObject)],
metrics: undefined,
},
]);
}
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,10 @@ export default function transformProps(
const echartOptions: EChartsOption = {
grid: {
...defaultGrid,
bottom: 30,
left: 30,
right: 30,
left: '5%',
right: '5%',
top: '10%',
bottom: '10%',
},
xAxis: {
data: xAxisData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ const ColumnSelectPopoverTrigger = ({
isTemporal,
onColumnEdit,
popoverLabel,
disabledTabs,
],
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ function DndColumnSelect(props: DndColumnSelectProps) {
closePopover={closePopover}
visible={newColumnPopoverVisible}
isTemporal={isTemporal}
disabledTabs={disabledTabs}
>
<div />
</ColumnSelectPopoverTrigger>
Expand Down
6 changes: 5 additions & 1 deletion superset/migrations/shared/migrate_viz/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ class MigrateHistogramChart(MigrateViz):
"y_axis_label": "y_axis_title",
"normalized": "normalize",
}
remove_keys = {"all_columns_x", "link_length"}
remove_keys = {"all_columns_x", "link_length", "queryFields"}

def _pre_action(self) -> None:
all_columns_x = self.data.get("all_columns_x")
Expand All @@ -299,3 +299,7 @@ def _pre_action(self) -> None:

link_length = self.data.get("link_length")
self.data["bins"] = int(link_length) if link_length else 5

groupby = self.data.get("groupby")
if not groupby:
self.data["groupby"] = []
4 changes: 2 additions & 2 deletions superset/utils/pandas_postprocessing/histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def histogram(
raise ValueError(f"The column '{column}' must be numeric.")

# calculate the histogram bin edges
bin_edges = np.histogram_bin_edges(df[column], bins=bins)
bin_edges = np.histogram_bin_edges(df[column].dropna(), bins=bins)

# convert the bin edges to strings
bin_edges_str = [
Expand All @@ -62,7 +62,7 @@ def histogram(
]

def hist_values(series: Series) -> np.ndarray:
result = np.histogram(series, bins=bin_edges)[0]
result = np.histogram(series.dropna(), bins=bin_edges)[0]
return result if not cumulative else np.cumsum(result)

if len(groupby) == 0:
Expand Down
22 changes: 22 additions & 0 deletions tests/unit_tests/pandas_postprocessing/test_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,25 @@ def test_histogram_with_non_numeric_column():
histogram(data, "b", ["group"], bins)
except ValueError as e:
assert str(e) == "The column 'b' must be numeric."


# test histogram ignore null values
def test_histogram_ignore_null_values():
data_with_null = DataFrame(
{
"group": ["A", "A", "B", "B", "A", "A", "B", "B", "A", "A"],
"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
"b": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
}
)
result = histogram(data_with_null, "a", ["group"], bins)
assert result.shape == (2, bins + 1)
assert result.columns.tolist() == [
"group",
"1 - 2",
"2 - 4",
"4 - 5",
"5 - 7",
"7 - 9",
]
assert result.values.tolist() == [["A", 2, 0, 1, 1, 1], ["B", 0, 2, 0, 1, 1]]

0 comments on commit 687f159

Please sign in to comment.