From 511160bb634220d4f62edb39b63941e0178fce33 Mon Sep 17 00:00:00 2001 From: Francis Asante Date: Mon, 9 Oct 2023 19:48:15 -0600 Subject: [PATCH] feat($percentile): Implemented $percentile accumulator and expression operator. Closes #378. --- src/core.ts | 2 +- src/lazy.ts | 4 +- src/operators/accumulator/index.ts | 1 + src/operators/accumulator/percentile.ts | 37 +++++ src/operators/accumulator/push.ts | 4 +- src/operators/expression/index.ts | 1 + src/operators/expression/percentile.ts | 20 +++ test/operators/accumulator/percentile.test.ts | 133 ++++++++++++++++++ 8 files changed, 197 insertions(+), 5 deletions(-) create mode 100644 src/operators/accumulator/percentile.ts create mode 100644 src/operators/expression/percentile.ts create mode 100644 test/operators/accumulator/percentile.test.ts diff --git a/src/core.ts b/src/core.ts index ec11306a..c816f5ca 100644 --- a/src/core.ts +++ b/src/core.ts @@ -263,7 +263,7 @@ export enum OperatorType { } export type AccumulatorOperator = ( - collection: RawObject[], + collection: RawArray, expr: AnyVal, options: Options ) => R; diff --git a/src/lazy.ts b/src/lazy.ts index 33825dc2..8e5a01b6 100644 --- a/src/lazy.ts +++ b/src/lazy.ts @@ -239,9 +239,9 @@ export class Iterator { * Returns a new lazy object with results of the transformation * The entire sequence is realized. * - * @param {Function} fn Tranform function of type (Array) => (Any) + * @param {Callback} fn Tranform function of type (Array) => (Any) */ - transform(fn: Callback): Iterator { + transform(fn: Callback): Iterator { const self = this; let iter: Iterator; return Lazy(() => { diff --git a/src/operators/accumulator/index.ts b/src/operators/accumulator/index.ts index 15542e76..99479d0c 100644 --- a/src/operators/accumulator/index.ts +++ b/src/operators/accumulator/index.ts @@ -19,6 +19,7 @@ export * from "./maxN"; export * from "./mergeObjects"; export * from "./min"; export * from "./minN"; +export * from "./percentile"; export * from "./push"; export * from "./stdDevPop"; export * from "./stdDevSamp"; diff --git a/src/operators/accumulator/percentile.ts b/src/operators/accumulator/percentile.ts new file mode 100644 index 00000000..ffcc93fa --- /dev/null +++ b/src/operators/accumulator/percentile.ts @@ -0,0 +1,37 @@ +// https://www.mongodb.com/docs/manual/reference/operator/aggregation/percentile/ +import { AccumulatorOperator, Options } from "../../core"; +import { AnyVal, RawArray, RawObject } from "../../types"; +import { assert, isNumber } from "../../util"; +import { $push } from "./push"; + +/** + * Returns an array of scalar values that correspond to specified percentile values. + * + * @param collection The collection of objects. + * @param expr The operator expression. + * @param options Options to use for processing. + * @returns {Object|*} + */ +export const $percentile: AccumulatorOperator = ( + collection: RawObject[], + expr: { input: AnyVal; p: RawArray }, + options: Options +): number[] => { + // MongoDB uses the t-digest algorithm to estimate percentiles. + // Since this library expects all data in memory we compute percentiles using linear interpolation method. + // see https://en.wikipedia.org/wiki/Percentile#The_linear_interpolation_between_closest_ranks_method + const X = $push(collection, expr.input, options).filter(isNumber).sort(); + const centiles = $push(expr.p, "$$CURRENT", options).filter(isNumber); + return centiles.map(p => { + assert( + p > 0 && p <= 1, + `percentile value must be between 0 and 1 (inclusive): found ${p}.` + ); + // compute rank for the percentile + const r = p * (X.length - 1) + 1; + // get the integer part + const ri = Math.floor(r); + // return zero for NaN values when X[ri+1] is undefined. + return r === ri ? X[r - 1] : X[ri] + (r % 1) * (X[ri + 1] - X[ri] || 0); + }); +}; diff --git a/src/operators/accumulator/push.ts b/src/operators/accumulator/push.ts index 839f1ade..7e191123 100644 --- a/src/operators/accumulator/push.ts +++ b/src/operators/accumulator/push.ts @@ -4,7 +4,7 @@ import { computeValue, Options } from "../../core"; -import { AnyVal, RawArray, RawObject } from "../../types"; +import { AnyVal, RawArray } from "../../types"; import { isNil } from "../../util"; /** @@ -16,7 +16,7 @@ import { isNil } from "../../util"; * @returns {Array|*} */ export const $push: AccumulatorOperator = ( - collection: RawObject[], + collection: RawArray, expr: AnyVal, options: Options ): RawArray => { diff --git a/src/operators/expression/index.ts b/src/operators/expression/index.ts index 3e868f1b..41046946 100644 --- a/src/operators/expression/index.ts +++ b/src/operators/expression/index.ts @@ -9,6 +9,7 @@ export * from "./date"; export * from "./literal"; export * from "./misc"; export * from "./object"; +export * from "./percentile"; export * from "./set"; export * from "./string"; export * from "./trignometry"; diff --git a/src/operators/expression/percentile.ts b/src/operators/expression/percentile.ts new file mode 100644 index 00000000..2b16398d --- /dev/null +++ b/src/operators/expression/percentile.ts @@ -0,0 +1,20 @@ +import { computeValue, ExpressionOperator, Options } from "../../core"; +import { AnyVal, RawArray, RawObject } from "../../types"; +import { $percentile as __percentile } from "../accumulator/percentile"; + +/** + * Returns an array of scalar values that correspond to specified percentile values. + * + * @param obj The current object + * @param expr The operator expression + * @param options Options to use for processing + * @returns {Array} + */ +export const $percentile: ExpressionOperator = ( + obj: RawObject, + expr: { input: AnyVal; p: RawArray }, + options: Options +): number[] => { + const input = computeValue(obj, expr.input, null, options) as RawArray; + return __percentile(input, { ...expr, input: "$$CURRENT" }, options); +}; diff --git a/test/operators/accumulator/percentile.test.ts b/test/operators/accumulator/percentile.test.ts new file mode 100644 index 00000000..71a778b2 --- /dev/null +++ b/test/operators/accumulator/percentile.test.ts @@ -0,0 +1,133 @@ +import { aggregate } from "../../../src"; +import { initOptions, ProcessingMode } from "../../../src/core"; +import { $percentile } from "../../../src/operators/accumulator"; +import { DEFAULT_OPTS, testPath } from "../../support"; + +const options = initOptions({ + ...DEFAULT_OPTS, + processingMode: ProcessingMode.CLONE_INPUT +}); + +describe(testPath("accumulator/percentile"), () => { + const input = [ + { studentId: "2345", test01: 62, test02: 81, test03: 80 }, + { studentId: "2356", test01: 60, test02: 83, test03: 79 }, + { studentId: "2358", test01: 67, test02: 82, test03: 78 }, + { studentId: "2367", test01: 64, test02: 72, test03: 77 }, + { studentId: "2369", test01: 60, test02: 53, test03: 72 } + ]; + //60 60 62 64 67 + it("Calculate a Single Value", () => { + const result = $percentile( + input, + { + input: "$test01", + p: [0.95] + }, + options + ); + + expect(result).toEqual([67]); + }); + + describe("Calculate Multiple Values", () => { + it.each([ + ["$test01", [62, 64, 67, 67]], + ["$test02", [81, 82, 83, 83]], + ["$test03", [78, 79, 80, 80]] + ])("should comput for %p", (expr, expected) => { + const result = $percentile( + input, + { + input: expr, + p: [0.5, 0.75, 0.9, 0.95] + }, + options + ); + expect(result).toEqual(expected); + }); + + it("computes alternative percentiles", () => { + const result = $percentile( + input, + { + input: "$test03", + p: [0.9, 0.5, 0.75, 0.95] + }, + options + ); + expect(result).toEqual([80, 78, 79, 80]); + }); + }); + + it("should Use $percentile in a $project Stage", () => { + const result = aggregate( + input, + [ + { + $project: { + _id: 0, + studentId: 1, + testPercentiles: { + $percentile: { + input: ["$test01", "$test02", "$test03"], + p: [0.5, 0.95], + method: "approximate" + } + } + } + } + ], + options + ); + + expect(result).toEqual([ + { studentId: "2345", testPercentiles: [80, 81] }, + { studentId: "2356", testPercentiles: [79, 83] }, + { studentId: "2358", testPercentiles: [78, 82] }, + { studentId: "2367", testPercentiles: [72, 77] }, + { studentId: "2369", testPercentiles: [60, 72] } + ]); + }); + + it("should Use $percentile in a $setWindowField Stage", () => { + const result = aggregate( + input, + [ + { + $setWindowFields: { + sortBy: { test01: 1 }, + output: { + test01_95percentile: { + $percentile: { + input: "$test01", + p: [0.95], + method: "approximate" + }, + window: { + range: [-3, 3] + } + } + } + } + }, + { + $project: { + _id: 0, + studentId: 1, + test01_95percentile: 1 + } + } + ], + options + ); + + expect(result).toEqual([ + { studentId: "2356", test01_95percentile: [62] }, + { studentId: "2369", test01_95percentile: [62] }, + { studentId: "2345", test01_95percentile: [64] }, + { studentId: "2367", test01_95percentile: [67] }, + { studentId: "2358", test01_95percentile: [67] } + ]); + }); +});