Skip to content

Commit

Permalink
Fix approx_percentile_cont in presence of nulls (#255)
Browse files Browse the repository at this point in the history
* Merge

* Update datafusion/physical-expr/src/aggregate/approx_percentile_cont.rs

Co-authored-by: Georgi Krastev <georgi.krastev@coralogix.com>

---------

Co-authored-by: Georgi Krastev <georgi.krastev@coralogix.com>
  • Loading branch information
Dandandan and joroKr21 authored Jul 30, 2024
1 parent 859b55c commit 9919d99
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
15 changes: 11 additions & 4 deletions datafusion/physical-expr/src/aggregate/approx_percentile_cont.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,25 @@ use crate::aggregate::tdigest::{TDigest, DEFAULT_MAX_SIZE};
use crate::aggregate::utils::down_cast_any_ref;
use crate::expressions::format_state_name;
use crate::{AggregateExpr, PhysicalExpr};
use std::any::Any;
use std::fmt::Debug;
use std::sync::Arc;

use arrow::array::{Array, RecordBatch};
use arrow::compute::{filter, is_not_null};
use arrow::{
array::{
ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
},
datatypes::{DataType, Field},
};
use arrow_array::RecordBatch;
use arrow_schema::Schema;
use datafusion_common::{
downcast_value, internal_err, not_impl_err, plan_err, DataFusionError, Result,
ScalarValue,
};
use datafusion_expr::{Accumulator, ColumnarValue};
use std::{any::Any, sync::Arc};

/// APPROX_PERCENTILE_CONT aggregate expression
#[derive(Debug)]
Expand Down Expand Up @@ -383,8 +387,11 @@ impl Accumulator for ApproxPercentileAccumulator {
}

fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
let values = &values[0];
let sorted_values = &arrow::compute::sort(values, None)?;
let mut values = Arc::clone(&values[0]);
if values.nulls().is_some() {
values = filter(&values, &is_not_null(&values)?)?;
}
let sorted_values = &arrow::compute::sort(&values, None)?;
let sorted_values = ApproxPercentileAccumulator::convert_to_float(sorted_values)?;
self.digest = self.digest.merge_sorted_f64(&sorted_values);
Ok(())
Expand Down
6 changes: 6 additions & 0 deletions datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1218,6 +1218,12 @@ SELECT (ABS(1 - CAST(approx_percentile_cont(c11, 0.9) AS DOUBLE) / 0.834) < 0.05
----
true

# percentile_cont_with_nulls
query I
SELECT APPROX_PERCENTILE_CONT(v, 0.5) FROM (VALUES (1), (2), (3), (NULL), (NULL), (NULL)) as t (v);
----
2

# csv_query_cube_avg
query TIR
SELECT c1, c2, AVG(c3) FROM aggregate_test_100 GROUP BY CUBE (c1, c2) ORDER BY c1, c2
Expand Down

0 comments on commit 9919d99

Please sign in to comment.