Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new approx_percentile_array function #609

Merged
merged 1 commit into from
Dec 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ This changelog should be updated as part of a PR if the work is worth noting (mo

This allows users to join two timevectors with the following semantics `timevectorA -> asof(timevectorB)`. This will return records with the LOCF value from timevectorA at the timestamps from timevectorB. Specifically the returned records contain, for each value in timevectorB, {the LOCF value from timevectorA, the value from timevectorB, the timestamp from timevectorB}.

- [#609](https://github.com/timescale/timescaledb-toolkit/pull/609): New `approx_percentile_array()` function

Users can use the new `toolkit_experimental.approx_percentile_array(percentiles)` to generate an array of percentile results instead of having to call and rebuild the aggregate multiple times.

#### Bug fixes
- [#644](https://github.com/timescale/timescaledb-toolkit/pull/644): Fix bug in Candlestick aggregate and reenable partial aggregation.

Expand Down
22 changes: 22 additions & 0 deletions extension/src/accessors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -545,4 +545,26 @@ pub mod toolkit_experimental {
}
}
}

pg_type! {
#[derive(Debug)]
struct AccessorPercentileArray<'input> {
len: u64,
percentile: [f64; self.len],
}
}

ron_inout_funcs!(AccessorPercentileArray);

#[pg_extern(immutable, name = "approx_percentiles")]
pub fn accessor_percentiles(unit: Vec<f64>) -> AccessorPercentileArray<'static> {
unsafe {
flatten! {
AccessorPercentileArray{
len: unit.len().try_into().unwrap(),
percentile: unit.into(),
}
}
}
}
}
193 changes: 190 additions & 3 deletions extension/src/uddsketch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use uddsketch::{SketchHashKey, UDDSketch as UddSketchInternal};

use crate::{
accessors::{
AccessorApproxPercentile, AccessorApproxPercentileRank, AccessorError, AccessorMean,
AccessorNumVals,
toolkit_experimental, AccessorApproxPercentile, AccessorApproxPercentileRank,
AccessorError, AccessorMean, AccessorNumVals,
},
aggregate_utils::in_aggregate_context,
flatten,
Expand Down Expand Up @@ -74,7 +74,6 @@ pub fn percentile_agg_trans_inner(
let default_max_error = PERCENTILE_AGG_DEFAULT_ERROR;
uddsketch_trans_inner(state, default_size as _, default_max_error, value, fcinfo)
}

// PG function for merging sketches.
#[pg_extern(immutable, parallel_safe)]
pub fn uddsketch_combine(
Expand Down Expand Up @@ -582,6 +581,45 @@ pub fn uddsketch_approx_percentile<'a>(percentile: f64, sketch: UddSketch<'a>) -
)
}

#[pg_operator(immutable)]
#[opname(->)]
pub fn arrow_uddsketch_approx_percentile_array<'a>(
sketch: UddSketch<'a>,
percentiles: toolkit_experimental::AccessorPercentileArray<'a>,
) -> Vec<f64> {
approx_percentile_slice(percentiles.percentile.as_slice(), sketch)
}

// Approximate the value at the given approx_percentile (0.0-1.0) for each entry in an array
#[pg_extern(
immutable,
schema = "toolkit_experimental",
name = "approx_percentile_array"
)]
pub fn uddsketch_approx_percentile_array<'a>(
percentiles: Vec<f64>,
sketch: UddSketch<'a>,
) -> Vec<f64> {
approx_percentile_slice(&percentiles, sketch)
}

fn approx_percentile_slice<'a, 'b>(
percentiles: impl IntoIterator<Item = &'b f64>,
sketch: UddSketch<'a>,
) -> Vec<f64> {
let mut results = Vec::new();
for percentile in percentiles {
results.push(uddsketch::estimate_quantile(
*percentile,
sketch.alpha,
uddsketch::gamma(sketch.alpha),
sketch.count,
sketch.keys().zip(sketch.counts()),
))
}
results
}

#[pg_operator(immutable, parallel_safe)]
#[opname(->)]
pub fn arrow_uddsketch_approx_rank<'a>(
Expand Down Expand Up @@ -920,6 +958,155 @@ mod tests {
pct_eql(test_value.unwrap(), 9.0, test_error.unwrap());
});
}
#[pg_test]
fn test_approx_percentile_array() {
Spi::execute(|client| {
client.select(
"CREATE TABLE paa_test (device INTEGER, value DOUBLE PRECISION)",
None,
None,
);
client.select("INSERT INTO paa_test SELECT dev, dev - v FROM generate_series(1,10) dev, generate_series(0, 1.0, 0.01) v", None, None);

let sanity = client
.select("SELECT COUNT(*) FROM paa_test", None, None)
.first()
.get_one::<i32>();
assert_eq!(Some(1010), sanity);

client.select(
"CREATE VIEW uddsketch_test AS \
SELECT uddsketch(200, 0.001, value) as approx \
FROM paa_test ",
None,
None,
);

client.select(
"CREATE VIEW percentile_agg AS \
SELECT percentile_agg(value) as approx \
FROM paa_test",
None,
None,
);

let (value, error) = client
.select(
"SELECT \
toolkit_experimental.approx_percentile_array(array[0.9,0.5,0.2], approx), \
error(approx) \
FROM uddsketch_test",
None,
None,
)
.first()
.get_two::<Vec<f64>, f64>();

let (test_value, test_error) = client
.select(
"SELECT \
toolkit_experimental.approx_percentile_array(array[0.9,0.5,0.2], approx), \
error(approx) \
FROM percentile_agg",
None,
None,
)
.first()
.get_two::<Vec<f64>, f64>();
assert!(
test_value
.as_ref()
.unwrap()
.iter()
.zip(value.unwrap())
.all(|(a, b)| { (a - b).abs() < 0.0001 }),
"Some Float value differs from expected by more than {}",
0.0001
);

apx_eql(test_error.unwrap(), error.unwrap(), 0.000001);
assert!(test_value
.unwrap()
.iter()
.zip(vec![9.0, 5.0, 2.0])
.all(|(a, b)| { matches!(pct_eql(*a, b, test_error.unwrap()), ()) }));
});
}

#[pg_test]
fn test_approx_percentile_array_arrow() {
Spi::execute(|client| {
client.select(
"CREATE TABLE paa_test (device INTEGER, value DOUBLE PRECISION)",
None,
None,
);
client.select("INSERT INTO paa_test SELECT dev, dev - v FROM generate_series(1,10) dev, generate_series(0, 1.0, 0.01) v", None, None);

let sanity = client
.select("SELECT COUNT(*) FROM paa_test", None, None)
.first()
.get_one::<i32>();
assert_eq!(Some(1010), sanity);

client.select(
"CREATE VIEW uddsketch_test AS \
SELECT uddsketch(200, 0.001, value) as approx \
FROM paa_test ",
None,
None,
);

client.select(
"CREATE VIEW percentile_agg AS \
SELECT percentile_agg(value) as approx \
FROM paa_test",
None,
None,
);

let (value, error) = client
.select(
"SELECT \
toolkit_experimental.approx_percentile_array(array[0.9,0.5,0.2], approx), \
error(approx) \
FROM uddsketch_test",
None,
None,
)
.first()
.get_two::<Vec<f64>, f64>();

let (test_value_arrow, test_error_arrow) = client
.select(
"SELECT approx->toolkit_experimental.approx_percentiles(array[0.9,0.5,0.2]), \
error(approx) \
FROM uddsketch_test",
None,
None,
)
.first()
.get_two::<Vec<f64>, f64>();

assert!(
test_value_arrow
.as_ref()
.unwrap()
.iter()
.zip(value.as_ref().unwrap())
.all(|(a, b)| { (a - b).abs() < 0.0001 }),
"Some Float value differs from expected by more than {}",
0.0001
);

apx_eql(test_error_arrow.unwrap(), error.unwrap(), 0.000001);
assert!(test_value_arrow
.unwrap()
.iter()
.zip(vec![9.0, 5.0, 2.0])
.all(|(a, b)| { matches!(pct_eql(*a, b, test_error_arrow.unwrap()), ()) }));
});
}

#[pg_test]
fn uddsketch_io_test() {
Expand Down