Skip to content

Commit

Permalink
feat($percentile): Implemented $percentile accumulator and expression…
Browse files Browse the repository at this point in the history
… operator. Closes #378.
  • Loading branch information
kofrasa committed Oct 26, 2023
1 parent c406742 commit 511160b
Show file tree
Hide file tree
Showing 8 changed files with 197 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ export enum OperatorType {
}

export type AccumulatorOperator<R = AnyVal> = (
collection: RawObject[],
collection: RawArray,
expr: AnyVal,
options: Options
) => R;
Expand Down
4 changes: 2 additions & 2 deletions src/lazy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,9 @@ export class Iterator {
* Returns a new lazy object with results of the transformation
* The entire sequence is realized.
*
* @param {Function} fn Tranform function of type (Array) => (Any)
* @param {Callback<Source, RawArray>} fn Tranform function of type (Array) => (Any)
*/
transform(fn: Callback<Source>): Iterator {
transform(fn: Callback<Source, RawArray>): Iterator {
const self = this;
let iter: Iterator;
return Lazy(() => {
Expand Down
1 change: 1 addition & 0 deletions src/operators/accumulator/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export * from "./maxN";
export * from "./mergeObjects";
export * from "./min";
export * from "./minN";
export * from "./percentile";
export * from "./push";
export * from "./stdDevPop";
export * from "./stdDevSamp";
Expand Down
37 changes: 37 additions & 0 deletions src/operators/accumulator/percentile.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// https://www.mongodb.com/docs/manual/reference/operator/aggregation/percentile/
import { AccumulatorOperator, Options } from "../../core";
import { AnyVal, RawArray, RawObject } from "../../types";
import { assert, isNumber } from "../../util";
import { $push } from "./push";

/**
* Returns an array of scalar values that correspond to specified percentile values.
*
* @param collection The collection of objects.
* @param expr The operator expression.
* @param options Options to use for processing.
* @returns {Object|*}
*/
export const $percentile: AccumulatorOperator<number[]> = (
collection: RawObject[],
expr: { input: AnyVal; p: RawArray },
options: Options
): number[] => {
// MongoDB uses the t-digest algorithm to estimate percentiles.
// Since this library expects all data in memory we compute percentiles using linear interpolation method.
// see https://en.wikipedia.org/wiki/Percentile#The_linear_interpolation_between_closest_ranks_method
const X = $push(collection, expr.input, options).filter(isNumber).sort();
const centiles = $push(expr.p, "$$CURRENT", options).filter(isNumber);
return centiles.map(p => {
assert(
p > 0 && p <= 1,
`percentile value must be between 0 and 1 (inclusive): found ${p}.`
);
// compute rank for the percentile
const r = p * (X.length - 1) + 1;
// get the integer part
const ri = Math.floor(r);
// return zero for NaN values when X[ri+1] is undefined.
return r === ri ? X[r - 1] : X[ri] + (r % 1) * (X[ri + 1] - X[ri] || 0);
});
};
4 changes: 2 additions & 2 deletions src/operators/accumulator/push.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import {
computeValue,
Options
} from "../../core";
import { AnyVal, RawArray, RawObject } from "../../types";
import { AnyVal, RawArray } from "../../types";
import { isNil } from "../../util";

/**
Expand All @@ -16,7 +16,7 @@ import { isNil } from "../../util";
* @returns {Array|*}
*/
export const $push: AccumulatorOperator<RawArray> = (
collection: RawObject[],
collection: RawArray,
expr: AnyVal,
options: Options
): RawArray => {
Expand Down
1 change: 1 addition & 0 deletions src/operators/expression/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export * from "./date";
export * from "./literal";
export * from "./misc";
export * from "./object";
export * from "./percentile";
export * from "./set";
export * from "./string";
export * from "./trignometry";
Expand Down
20 changes: 20 additions & 0 deletions src/operators/expression/percentile.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { computeValue, ExpressionOperator, Options } from "../../core";
import { AnyVal, RawArray, RawObject } from "../../types";
import { $percentile as __percentile } from "../accumulator/percentile";

/**
* Returns an array of scalar values that correspond to specified percentile values.
*
* @param obj The current object
* @param expr The operator expression
* @param options Options to use for processing
* @returns {Array<number>}
*/
export const $percentile: ExpressionOperator<number[]> = (
obj: RawObject,
expr: { input: AnyVal; p: RawArray },
options: Options
): number[] => {
const input = computeValue(obj, expr.input, null, options) as RawArray;
return __percentile(input, { ...expr, input: "$$CURRENT" }, options);
};
133 changes: 133 additions & 0 deletions test/operators/accumulator/percentile.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import { aggregate } from "../../../src";
import { initOptions, ProcessingMode } from "../../../src/core";
import { $percentile } from "../../../src/operators/accumulator";
import { DEFAULT_OPTS, testPath } from "../../support";

const options = initOptions({
...DEFAULT_OPTS,
processingMode: ProcessingMode.CLONE_INPUT
});

describe(testPath("accumulator/percentile"), () => {
const input = [
{ studentId: "2345", test01: 62, test02: 81, test03: 80 },
{ studentId: "2356", test01: 60, test02: 83, test03: 79 },
{ studentId: "2358", test01: 67, test02: 82, test03: 78 },
{ studentId: "2367", test01: 64, test02: 72, test03: 77 },
{ studentId: "2369", test01: 60, test02: 53, test03: 72 }
];
//60 60 62 64 67
it("Calculate a Single Value", () => {
const result = $percentile(
input,
{
input: "$test01",
p: [0.95]
},
options
);

expect(result).toEqual([67]);
});

describe("Calculate Multiple Values", () => {
it.each([
["$test01", [62, 64, 67, 67]],
["$test02", [81, 82, 83, 83]],
["$test03", [78, 79, 80, 80]]
])("should comput for %p", (expr, expected) => {
const result = $percentile(
input,
{
input: expr,
p: [0.5, 0.75, 0.9, 0.95]
},
options
);
expect(result).toEqual(expected);
});

it("computes alternative percentiles", () => {
const result = $percentile(
input,
{
input: "$test03",
p: [0.9, 0.5, 0.75, 0.95]
},
options
);
expect(result).toEqual([80, 78, 79, 80]);
});
});

it("should Use $percentile in a $project Stage", () => {
const result = aggregate(
input,
[
{
$project: {
_id: 0,
studentId: 1,
testPercentiles: {
$percentile: {
input: ["$test01", "$test02", "$test03"],
p: [0.5, 0.95],
method: "approximate"
}
}
}
}
],
options
);

expect(result).toEqual([
{ studentId: "2345", testPercentiles: [80, 81] },
{ studentId: "2356", testPercentiles: [79, 83] },
{ studentId: "2358", testPercentiles: [78, 82] },
{ studentId: "2367", testPercentiles: [72, 77] },
{ studentId: "2369", testPercentiles: [60, 72] }
]);
});

it("should Use $percentile in a $setWindowField Stage", () => {
const result = aggregate(
input,
[
{
$setWindowFields: {
sortBy: { test01: 1 },
output: {
test01_95percentile: {
$percentile: {
input: "$test01",
p: [0.95],
method: "approximate"
},
window: {
range: [-3, 3]
}
}
}
}
},
{
$project: {
_id: 0,
studentId: 1,
test01_95percentile: 1
}
}
],
options
);

expect(result).toEqual([
{ studentId: "2356", test01_95percentile: [62] },
{ studentId: "2369", test01_95percentile: [62] },
{ studentId: "2345", test01_95percentile: [64] },
{ studentId: "2367", test01_95percentile: [67] },
{ studentId: "2358", test01_95percentile: [67] }
]);
});
});

0 comments on commit 511160b

Please sign in to comment.