refactor: rename to approx_percentile_cont

apache · Jan 29, 2022 · c216f48 · c216f48
1 parent 03a5eff
commit c216f48
Show file tree

Hide file tree

Showing 14 changed files with 129 additions and 122 deletions.
diff --git a/ballista/rust/core/proto/ballista.proto b/ballista/rust/core/proto/ballista.proto
@@ -176,7 +176,7 @@ enum AggregateFunction {
   STDDEV=11;
   STDDEV_POP=12;
   CORRELATION=13;
-  APPROX_QUANTILE = 14;
+  APPROX_PERCENTILE_CONT = 14;
 }
 
 message AggregateExprNode {

diff --git a/ballista/rust/core/src/serde/logical_plan/mod.rs b/ballista/rust/core/src/serde/logical_plan/mod.rs
@@ -1001,9 +1001,9 @@ mod roundtrip_tests {
     }
 
     #[test]
-    fn roundtrip_approx_quantile() -> Result<()> {
+    fn roundtrip_approx_percentile_cont() -> Result<()> {
         let test_expr = Expr::AggregateFunction {
-            fun: aggregates::AggregateFunction::ApproxQuantile,
+            fun: aggregates::AggregateFunction::ApproxPercentileCont,
             args: vec![col("bananas"), lit(0.42)],
             distinct: false,
         };

diff --git a/ballista/rust/core/src/serde/logical_plan/to_proto.rs b/ballista/rust/core/src/serde/logical_plan/to_proto.rs
@@ -1074,8 +1074,8 @@ impl TryInto<protobuf::LogicalExprNode> for &Expr {
                     AggregateFunction::ApproxDistinct => {
                         protobuf::AggregateFunction::ApproxDistinct
                     }
-                    AggregateFunction::ApproxQuantile => {
-                        protobuf::AggregateFunction::ApproxQuantile
+                    AggregateFunction::ApproxPercentileCont => {
+                        protobuf::AggregateFunction::ApproxPercentileCont
                     }
                     AggregateFunction::ArrayAgg => protobuf::AggregateFunction::ArrayAgg,
                     AggregateFunction::Min => protobuf::AggregateFunction::Min,
@@ -1339,7 +1339,7 @@ impl From<&AggregateFunction> for protobuf::AggregateFunction {
             AggregateFunction::Stddev => Self::Stddev,
             AggregateFunction::StddevPop => Self::StddevPop,
             AggregateFunction::Correlation => Self::Correlation,
-            AggregateFunction::ApproxQuantile => Self::ApproxQuantile,
+            AggregateFunction::ApproxPercentileCont => Self::ApproxPercentileCont,
         }
     }
 }

diff --git a/ballista/rust/core/src/serde/mod.rs b/ballista/rust/core/src/serde/mod.rs
@@ -129,8 +129,8 @@ impl From<protobuf::AggregateFunction> for AggregateFunction {
             protobuf::AggregateFunction::Stddev => AggregateFunction::Stddev,
             protobuf::AggregateFunction::StddevPop => AggregateFunction::StddevPop,
             protobuf::AggregateFunction::Correlation => AggregateFunction::Correlation,
-            protobuf::AggregateFunction::ApproxQuantile => {
-                AggregateFunction::ApproxQuantile
+            protobuf::AggregateFunction::ApproxPercentileCont => {
+                AggregateFunction::ApproxPercentileCont
             }
         }
     }

diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs
@@ -1647,12 +1647,12 @@ pub fn approx_distinct(expr: Expr) -> Expr {
     }
 }
 
-/// Calculate an approximation of the specified `quantile` for `expr`.
-pub fn approx_quantile(expr: Expr, quantile: Expr) -> Expr {
+/// Calculate an approximation of the specified `percentile` for `expr`.
+pub fn approx_percentile_cont(expr: Expr, percentile: Expr) -> Expr {
     Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::ApproxQuantile,
+        fun: aggregates::AggregateFunction::ApproxPercentileCont,
         distinct: false,
-        args: vec![expr, quantile],
+        args: vec![expr, percentile],
     }
 }
 

diff --git a/datafusion/src/logical_plan/mod.rs b/datafusion/src/logical_plan/mod.rs
@@ -36,8 +36,8 @@ pub use builder::{
 pub use dfschema::{DFField, DFSchema, DFSchemaRef, ToDFSchema};
 pub use display::display_schema;
 pub use expr::{
-    abs, acos, and, approx_distinct, approx_quantile, array, ascii, asin, atan, avg,
-    binary_expr, bit_length, btrim, case, ceil, character_length, chr, col,
+    abs, acos, and, approx_distinct, approx_percentile_cont, array, ascii, asin, atan,
+    avg, binary_expr, bit_length, btrim, case, ceil, character_length, chr, col,
     columnize_expr, combine_filters, concat, concat_ws, cos, count, count_distinct,
     create_udaf, create_udf, date_part, date_trunc, digest, exp, exprlist_to_fields,
     floor, in_list, initcap, left, length, lit, lit_timestamp_nano, ln, log10, log2,

diff --git a/datafusion/src/physical_plan/aggregates.rs b/datafusion/src/physical_plan/aggregates.rs
@@ -80,8 +80,8 @@ pub enum AggregateFunction {
     CovariancePop,
     /// Correlation
     Correlation,
-    /// Approximate quantile function
-    ApproxQuantile,
+    /// Approximate continuous percentile function
+    ApproxPercentileCont,
 }
 
 impl fmt::Display for AggregateFunction {
@@ -112,7 +112,7 @@ impl FromStr for AggregateFunction {
             "covar_samp" => AggregateFunction::Covariance,
             "covar_pop" => AggregateFunction::CovariancePop,
             "corr" => AggregateFunction::Correlation,
-            "approx_quantile" => AggregateFunction::ApproxQuantile,
+            "approx_percentile_cont" => AggregateFunction::ApproxPercentileCont,
             _ => {
                 return Err(DataFusionError::Plan(format!(
                     "There is no built-in function named {}",
@@ -160,7 +160,7 @@ pub fn return_type(
             coerced_data_types[0].clone(),
             true,
         )))),
-        AggregateFunction::ApproxQuantile => Ok(coerced_data_types[0].clone()),
+        AggregateFunction::ApproxPercentileCont => Ok(coerced_data_types[0].clone()),
     }
 }
 
@@ -335,17 +335,18 @@ pub fn create_aggregate_expr(
                 "CORR(DISTINCT) aggregations are not available".to_string(),
             ));
         }
-        (AggregateFunction::ApproxQuantile, false) => {
-            Arc::new(expressions::ApproxQuantile::new(
-                // Pass in the desired quantile expr
+        (AggregateFunction::ApproxPercentileCont, false) => {
+            Arc::new(expressions::ApproxPercentileCont::new(
+                // Pass in the desired percentile expr
                 coerced_phy_exprs,
                 name,
                 return_type,
             )?)
         }
-        (AggregateFunction::ApproxQuantile, true) => {
+        (AggregateFunction::ApproxPercentileCont, true) => {
             return Err(DataFusionError::NotImplemented(
-                "approx_quantile(DISTINCT) aggregations are not available".to_string(),
+                "approx_percentile_cont(DISTINCT) aggregations are not available"
+                    .to_string(),
             ));
         }
     })
@@ -406,8 +407,8 @@ pub fn signature(fun: &AggregateFunction) -> Signature {
         AggregateFunction::Correlation => {
             Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable)
         }
-        AggregateFunction::ApproxQuantile => Signature::one_of(
-            // Accept any numeric value paired with a float64 quantile
+        AggregateFunction::ApproxPercentileCont => Signature::one_of(
+            // Accept any numeric value paired with a float64 percentile
             NUMERICS
                 .iter()
                 .map(|t| TypeSignature::Exact(vec![t.clone(), DataType::Float64]))
@@ -421,8 +422,8 @@ pub fn signature(fun: &AggregateFunction) -> Signature {
 mod tests {
     use super::*;
     use crate::physical_plan::expressions::{
-        ApproxDistinct, ApproxQuantile, ArrayAgg, Avg, Correlation, Count, Covariance,
-        DistinctArrayAgg, DistinctCount, Max, Min, Stddev, Sum, Variance,
+        ApproxDistinct, ApproxPercentileCont, ArrayAgg, Avg, Correlation, Count,
+        Covariance, DistinctArrayAgg, DistinctCount, Max, Min, Stddev, Sum, Variance,
     };
     use crate::{error::Result, scalar::ScalarValue};
 
@@ -539,7 +540,7 @@ mod tests {
     }
 
     #[test]
-    fn test_agg_approx_quantile_phy_expr() {
+    fn test_agg_approx_percentile_phy_expr() {
         for data_type in NUMERICS {
             let input_schema =
                 Schema::new(vec![Field::new("c1", data_type.clone(), true)]);
@@ -550,15 +551,15 @@ mod tests {
                 Arc::new(expressions::Literal::new(ScalarValue::Float64(Some(0.2)))),
             ];
             let result_agg_phy_exprs = create_aggregate_expr(
-                &AggregateFunction::ApproxQuantile,
+                &AggregateFunction::ApproxPercentileCont,
                 false,
                 &input_phy_exprs[..],
                 &input_schema,
                 "c1",
             )
             .expect("failed to create aggregate expr");
 
-            assert!(result_agg_phy_exprs.as_any().is::<ApproxQuantile>());
+            assert!(result_agg_phy_exprs.as_any().is::<ApproxPercentileCont>());
             assert_eq!("c1", result_agg_phy_exprs.name());
             assert_eq!(
                 Field::new("c1", data_type.clone(), false),
@@ -568,7 +569,7 @@ mod tests {
     }
 
     #[test]
-    fn test_agg_approx_quantile_invalid_phy_expr() {
+    fn test_agg_approx_percentile_invalid_phy_expr() {
         for data_type in NUMERICS {
             let input_schema =
                 Schema::new(vec![Field::new("c1", data_type.clone(), true)]);
@@ -579,13 +580,13 @@ mod tests {
                 Arc::new(expressions::Literal::new(ScalarValue::Float64(Some(4.2)))),
             ];
             let err = create_aggregate_expr(
-                &AggregateFunction::ApproxQuantile,
+                &AggregateFunction::ApproxPercentileCont,
                 false,
                 &input_phy_exprs[..],
                 &input_schema,
                 "c1",
             )
-            .expect_err("should fail due to invalid quantile");
+            .expect_err("should fail due to invalid percentile");
 
             assert!(matches!(err, DataFusionError::Plan(_)));
         }

diff --git a/datafusion/src/physical_plan/coercion_rule/aggregate_rule.rs b/datafusion/src/physical_plan/coercion_rule/aggregate_rule.rs
@@ -28,7 +28,7 @@ use crate::physical_plan::functions::{Signature, TypeSignature};
 use crate::physical_plan::PhysicalExpr;
 use crate::{
     arrow::datatypes::Schema,
-    physical_plan::expressions::is_approx_quantile_supported_arg_type,
+    physical_plan::expressions::is_approx_percentile_cont_supported_arg_type,
 };
 use arrow::datatypes::DataType;
 use std::ops::Deref;
@@ -139,16 +139,16 @@ pub(crate) fn coerce_types(
             }
             Ok(input_types.to_vec())
         }
-        AggregateFunction::ApproxQuantile => {
-            if !is_approx_quantile_supported_arg_type(&input_types[0]) {
+        AggregateFunction::ApproxPercentileCont => {
+            if !is_approx_percentile_cont_supported_arg_type(&input_types[0]) {
                 return Err(DataFusionError::Plan(format!(
                     "The function {:?} does not support inputs of type {:?}.",
                     agg_fun, input_types[0]
                 )));
             }
             if !matches!(input_types[1], DataType::Float64) {
                 return Err(DataFusionError::Plan(format!(
-                    "The quantile argument for {:?} must be Float64, not {:?}.",
+                    "The percentile argument for {:?} must be Float64, not {:?}.",
                     agg_fun, input_types[1]
                 )));
             }
@@ -324,7 +324,7 @@ mod tests {
             }
         }
 
-        // ApproxQuantile input types
+        // ApproxPercentileCont input types
         let input_types = vec![
             vec![DataType::Int8, DataType::Float64],
             vec![DataType::Int16, DataType::Float64],
@@ -338,9 +338,13 @@ mod tests {
             vec![DataType::Float64, DataType::Float64],
         ];
         for input_type in &input_types {
-            let signature = aggregates::signature(&AggregateFunction::ApproxQuantile);
-            let result =
-                coerce_types(&AggregateFunction::ApproxQuantile, input_type, &signature);
+            let signature =
+                aggregates::signature(&AggregateFunction::ApproxPercentileCont);
+            let result = coerce_types(
+                &AggregateFunction::ApproxPercentileCont,
+                input_type,
+                &signature,
+            );
             assert_eq!(*input_type, result.unwrap());
         }
     }