added approx_percentile_array fn and accessor

timescale · Dec 5, 2022 · b77b988 · b77b988
1 parent 72313b1
commit b77b988
Show file tree

Hide file tree

Showing 3 changed files with 216 additions and 3 deletions.
diff --git a/Changelog.md b/Changelog.md
@@ -22,7 +22,11 @@ This changelog should be updated as part of a PR if the work is worth noting (mo
   This is a lightly tested prototype; try it out at your own risk!
 
   [Examples](docs/examples/)
+
+- [#609](https://github.com/timescale/timescaledb-toolkit/pull/609): New `approx_percentile_array()` function
 
+  Users can use the new `toolkit_experimental.approx_percentile_array(percentiles)` to generate an array of percentile results instead of having to call and rebuild the aggregate multiple times.
+
 #### Bug fixes
 
 #### Other notable changes

diff --git a/extension/src/accessors.rs b/extension/src/accessors.rs
@@ -545,4 +545,26 @@ pub mod toolkit_experimental {
             }
         }
     }
+
+    pg_type! {
+        #[derive(Debug)]
+        struct AccessorPercentileArray<'input> {
+            len: u64,
+            percentile: [f64; self.len],
+        }
+    }
+
+    ron_inout_funcs!(AccessorPercentileArray);
+
+    #[pg_extern(immutable, name = "approx_percentiles")]
+    pub fn accessor_percentiles(unit: Vec<f64>) -> AccessorPercentileArray<'static> {
+        unsafe {
+            flatten! {
+                AccessorPercentileArray{
+                    len: unit.len().try_into().unwrap(),
+                    percentile: unit.into(),
+                }
+            }
+        }
+    }
 }
diff --git a/extension/src/uddsketch.rs b/extension/src/uddsketch.rs
@@ -6,8 +6,8 @@ use uddsketch::{SketchHashKey, UDDSketch as UddSketchInternal};
 
 use crate::{
     accessors::{
-        AccessorApproxPercentile, AccessorApproxPercentileRank, AccessorError, AccessorMean,
-        AccessorNumVals,
+        toolkit_experimental, AccessorApproxPercentile, AccessorApproxPercentileRank,
+        AccessorError, AccessorMean, AccessorNumVals,
     },
     aggregate_utils::in_aggregate_context,
     flatten,
@@ -74,7 +74,6 @@ pub fn percentile_agg_trans_inner(
     let default_max_error = PERCENTILE_AGG_DEFAULT_ERROR;
     uddsketch_trans_inner(state, default_size as _, default_max_error, value, fcinfo)
 }
-
 // PG function for merging sketches.
 #[pg_extern(immutable, parallel_safe)]
 pub fn uddsketch_combine(
@@ -582,6 +581,45 @@ pub fn uddsketch_approx_percentile<'a>(percentile: f64, sketch: UddSketch<'a>) -
     )
 }
 
+#[pg_operator(immutable)]
+#[opname(->)]
+pub fn arrow_uddsketch_approx_percentile_array<'a>(
+    sketch: UddSketch<'a>,
+    percentiles: toolkit_experimental::AccessorPercentileArray<'a>,
+) -> Vec<f64> {
+    approx_percentile_slice(percentiles.percentile.as_slice(), sketch)
+}
+
+// Approximate the value at the given approx_percentile (0.0-1.0) for each entry in an array
+#[pg_extern(
+    immutable,
+    schema = "toolkit_experimental",
+    name = "approx_percentile_array"
+)]
+pub fn uddsketch_approx_percentile_array<'a>(
+    percentiles: Vec<f64>,
+    sketch: UddSketch<'a>,
+) -> Vec<f64> {
+    approx_percentile_slice(&percentiles, sketch)
+}
+
+fn approx_percentile_slice<'a, 'b>(
+    percentiles: impl IntoIterator<Item = &'b f64>,
+    sketch: UddSketch<'a>,
+) -> Vec<f64> {
+    let mut results = Vec::new();
+    for percentile in percentiles {
+        results.push(uddsketch::estimate_quantile(
+            *percentile,
+            sketch.alpha,
+            uddsketch::gamma(sketch.alpha),
+            sketch.count,
+            sketch.keys().zip(sketch.counts()),
+        ))
+    }
+    results
+}
+
 #[pg_operator(immutable, parallel_safe)]
 #[opname(->)]
 pub fn arrow_uddsketch_approx_rank<'a>(
@@ -920,6 +958,155 @@ mod tests {
             pct_eql(test_value.unwrap(), 9.0, test_error.unwrap());
         });
     }
+    #[pg_test]
+    fn test_approx_percentile_array() {
+        Spi::execute(|client| {
+            client.select(
+                "CREATE TABLE paa_test (device INTEGER, value DOUBLE PRECISION)",
+                None,
+                None,
+            );
+            client.select("INSERT INTO paa_test SELECT dev, dev - v FROM generate_series(1,10) dev, generate_series(0, 1.0, 0.01) v", None, None);
+
+            let sanity = client
+                .select("SELECT COUNT(*) FROM paa_test", None, None)
+                .first()
+                .get_one::<i32>();
+            assert_eq!(Some(1010), sanity);
+
+            client.select(
+                "CREATE VIEW uddsketch_test AS \
+                SELECT uddsketch(200, 0.001, value) as approx \
+                FROM paa_test ",
+                None,
+                None,
+            );
+
+            client.select(
+                "CREATE VIEW percentile_agg AS \
+                SELECT percentile_agg(value) as approx \
+                FROM paa_test",
+                None,
+                None,
+            );
+
+            let (value, error) = client
+                .select(
+                    "SELECT \
+                    toolkit_experimental.approx_percentile_array(array[0.9,0.5,0.2], approx), \
+                    error(approx) \
+                    FROM uddsketch_test",
+                    None,
+                    None,
+                )
+                .first()
+                .get_two::<Vec<f64>, f64>();
+
+            let (test_value, test_error) = client
+                .select(
+                    "SELECT \
+                    toolkit_experimental.approx_percentile_array(array[0.9,0.5,0.2], approx), \
+                    error(approx) \
+                    FROM percentile_agg",
+                    None,
+                    None,
+                )
+                .first()
+                .get_two::<Vec<f64>, f64>();
+            assert!(
+                test_value
+                    .as_ref()
+                    .unwrap()
+                    .iter()
+                    .zip(value.unwrap())
+                    .all(|(a, b)| { (a - b).abs() < 0.0001 }),
+                "Some Float value differs from expected by more than {}",
+                0.0001
+            );
+
+            apx_eql(test_error.unwrap(), error.unwrap(), 0.000001);
+            assert!(test_value
+                .unwrap()
+                .iter()
+                .zip(vec![9.0, 5.0, 2.0])
+                .all(|(a, b)| { matches!(pct_eql(*a, b, test_error.unwrap()), ()) }));
+        });
+    }
+
+    #[pg_test]
+    fn test_approx_percentile_array_arrow() {
+        Spi::execute(|client| {
+            client.select(
+                "CREATE TABLE paa_test (device INTEGER, value DOUBLE PRECISION)",
+                None,
+                None,
+            );
+            client.select("INSERT INTO paa_test SELECT dev, dev - v FROM generate_series(1,10) dev, generate_series(0, 1.0, 0.01) v", None, None);
+
+            let sanity = client
+                .select("SELECT COUNT(*) FROM paa_test", None, None)
+                .first()
+                .get_one::<i32>();
+            assert_eq!(Some(1010), sanity);
+
+            client.select(
+                "CREATE VIEW uddsketch_test AS \
+                SELECT uddsketch(200, 0.001, value) as approx \
+                FROM paa_test ",
+                None,
+                None,
+            );
+
+            client.select(
+                "CREATE VIEW percentile_agg AS \
+                SELECT percentile_agg(value) as approx \
+                FROM paa_test",
+                None,
+                None,
+            );
+
+            let (value, error) = client
+                .select(
+                    "SELECT \
+                    toolkit_experimental.approx_percentile_array(array[0.9,0.5,0.2], approx), \
+                    error(approx) \
+                    FROM uddsketch_test",
+                    None,
+                    None,
+                )
+                .first()
+                .get_two::<Vec<f64>, f64>();
+
+            let (test_value_arrow, test_error_arrow) = client
+                .select(
+                    "SELECT approx->toolkit_experimental.approx_percentiles(array[0.9,0.5,0.2]), \
+        	     error(approx) \
+                    FROM uddsketch_test",
+                    None,
+                    None,
+                )
+                .first()
+                .get_two::<Vec<f64>, f64>();
+
+            assert!(
+                test_value_arrow
+                    .as_ref()
+                    .unwrap()
+                    .iter()
+                    .zip(value.as_ref().unwrap())
+                    .all(|(a, b)| { (a - b).abs() < 0.0001 }),
+                "Some Float value differs from expected by more than {}",
+                0.0001
+            );
+
+            apx_eql(test_error_arrow.unwrap(), error.unwrap(), 0.000001);
+            assert!(test_value_arrow
+                .unwrap()
+                .iter()
+                .zip(vec![9.0, 5.0, 2.0])
+                .all(|(a, b)| { matches!(pct_eql(*a, b, test_error_arrow.unwrap()), ()) }));
+        });
+    }
 
     #[pg_test]
     fn uddsketch_io_test() {