Skip to content

Commit

Permalink
feat: add outlier crate with DBSCAN implementation (#79)
Browse files Browse the repository at this point in the history
  • Loading branch information
sd2k authored Jun 5, 2024
1 parent 8b474c8 commit 6c57192
Show file tree
Hide file tree
Showing 12 changed files with 2,116 additions and 1 deletion.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,15 @@ augurs-core = { version = "0.1.2", path = "crates/augurs-core" }
augurs-ets = { version = "0.1.2", path = "crates/augurs-ets" }
augurs-forecaster = { version = "0.1.2", path = "crates/augurs-forecaster" }
augurs-mstl = { version = "0.1.2", path = "crates/augurs-mstl" }
augurs-outlier = { version = "0.1.2", path = "crates/augurs-outlier" }
augurs-seasons = { version = "0.1.2", path = "crates/augurs-seasons" }
augurs-testing = { path = "crates/augurs-testing" }

distrs = "0.2.1"
itertools = "0.13.0"
serde = { version = "1.0.166", features = ["derive"] }
thiserror = "1.0.40"
tinyvec = "1.6.0"
tracing = "0.1.37"

assert_approx_eq = "1.1.0"
Expand Down
3 changes: 2 additions & 1 deletion crates/augurs-js/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ augurs-core = { workspace = true }
augurs-ets = { workspace = true, features = ["mstl"] }
augurs-forecaster.workspace = true
augurs-mstl = { workspace = true }
augurs-outlier = { workspace = true }
augurs-seasons = { workspace = true }
# The `console_error_panic_hook` crate provides better debugging of panics by
# logging them with `console.error`. This is great for development, but requires
Expand All @@ -33,5 +34,5 @@ js-sys = "0.3.64"
serde.workspace = true
serde-wasm-bindgen = "0.6.0"
tracing-wasm = { version = "0.2.1", optional = true }
tsify = { version = "0.4.5", default_features = false, features = ["js"] }
tsify = { version = "0.4.5", default-features = false, features = ["js"] }
wasm-bindgen = "0.2.87"
1 change: 1 addition & 0 deletions crates/augurs-js/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use wasm_bindgen::prelude::*;
mod changepoints;
pub mod ets;
pub mod mstl;
mod outlier;
pub mod seasons;

/// Initialize the logger and panic hook.
Expand Down
279 changes: 279 additions & 0 deletions crates/augurs-js/src/outlier.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
use std::collections::HashSet;

use augurs_outlier::OutlierDetector as _;
use js_sys::Float64Array;
use serde::{Deserialize, Serialize};
use tsify::Tsify;

use wasm_bindgen::prelude::*;

// Enums representing outlier detectors and 'loaded' outlier detectors
// (i.e. detectors that have already preprocessed some data and are
// ready to detect).

#[derive(Debug)]
enum Detector {
Dbscan(augurs_outlier::DBSCANDetector),
}

impl Detector {
/// Preprocess the data for the detector.
///
/// This is provided as a separate method to allow for the
/// preprocessed data to be cached in the future.
fn preprocess(&self, y: Float64Array, nTimestamps: usize) -> LoadedDetector {
match self {
Self::Dbscan(detector) => {
let vec = y.to_vec();
let y: Vec<_> = vec.chunks(nTimestamps).map(Into::into).collect();
let data = detector.preprocess(&y);
LoadedDetector::Dbscan {
detector: detector.clone(),
data,
}
}
}
}

/// Preprocess and perform outlier detection on the data.
fn detect(&self, y: Float64Array, nTimestamps: usize) -> OutlierResult {
match self {
Self::Dbscan(detector) => {
let vec = y.to_vec();
let y: Vec<_> = vec.chunks(nTimestamps).map(Into::into).collect();
let data = detector.preprocess(&y);
detector.detect(&data).into()
}
}
}
}

#[derive(Debug)]
enum LoadedDetector {
Dbscan {
detector: augurs_outlier::DBSCANDetector,
data: <augurs_outlier::DBSCANDetector as augurs_outlier::OutlierDetector>::PreprocessedData,
},
}

impl LoadedDetector {
fn detect(&self) -> augurs_outlier::OutlierResult {
match self {
Self::Dbscan { detector, data } => detector.detect(data),
}
}
}

// The public API for the outlier detector, exposed via the Javascript bindings.

/// Options for the DBSCAN outlier detector.
#[derive(Debug, Default, Deserialize, Tsify)]
#[tsify(from_wasm_abi)]
pub struct DBSCANDetectorOptions {
/// A scale-invariant sensitivity parameter.
///
/// This must be in (0, 1) and will be used to estimate a sensible
/// value of epsilon based on the data.
pub sensitivity: f64,
}

#[derive(Debug, Default, Deserialize, Tsify)]
#[tsify(from_wasm_abi)]
pub struct MADDetectorOptions {
/// A scale-invariant sensitivity parameter.
///
/// This must be in (0, 1) and will be used to estimate a sensible
/// value of epsilon based on the data.
pub sensitivity: f64,
}

#[derive(Debug, Deserialize, Tsify)]
#[tsify(from_wasm_abi)]
/// Options for outlier detectors.
pub enum OutlierDetectorOptions {
#[serde(rename = "dbscan")]
Dbscan(DBSCANDetectorOptions),
#[serde(rename = "mad")]
Mad(MADDetectorOptions),
}

/// A detector for detecting outlying time series in a group of series.
#[derive(Debug)]
#[wasm_bindgen]
pub struct OutlierDetector {
// Hide the internal detector type from the public API.
detector: Detector,
}

#[wasm_bindgen]
impl OutlierDetector {
/// Create a new outlier detector using the DBSCAN algorithm.
#[wasm_bindgen]
pub fn dbscan(options: DBSCANDetectorOptions) -> Result<OutlierDetector, JsError> {
Ok(Self {
detector: Detector::Dbscan(augurs_outlier::DBSCANDetector::with_sensitivity(
options.sensitivity,
)?),
})
}

/// Detect outlying time series in a group of series.
///
/// Note: if you plan to run the detector multiple times on the same data,
/// you should use the `preprocess` method to cache the preprocessed data,
/// then call `detect` on the `LoadedOutlierDetector` returned by `preprocess`.
#[wasm_bindgen]
pub fn detect(&self, y: Float64Array, n_timestamps: usize) -> OutlierResult {
self.detector.detect(y, n_timestamps)
}

/// Preprocess the data for the detector.
///
/// The returned value is a 'loaded' outlier detector, which can be used
/// to detect outliers without needing to preprocess the data again.
///
/// This is useful if you plan to run the detector multiple times on the same data.
#[wasm_bindgen]
pub fn preprocess(&self, y: Float64Array, n_timestamps: usize) -> LoadedOutlierDetector {
LoadedOutlierDetector {
detector: self.detector.preprocess(y, n_timestamps),
}
}
}

/// A 'loaded' outlier detector, ready to detect outliers.
///
/// This is returned by the `preprocess` method of `OutlierDetector`,
/// and holds the preprocessed data for the detector.
#[derive(Debug)]
#[wasm_bindgen]
pub struct LoadedOutlierDetector {
detector: LoadedDetector,
}

#[wasm_bindgen]
impl LoadedOutlierDetector {
#[wasm_bindgen]
pub fn detect(&self) -> OutlierResult {
self.detector.detect().into()
}

/// Update the detector with new options.
///
/// # Errors
///
/// This method will return an error if the detector and options types
/// are incompatible.
#[wasm_bindgen(js_name = "updateDetector")]
pub fn update_detector(&mut self, options: OutlierDetectorOptions) -> Result<(), JsError> {
match (&mut self.detector, options) {
(
LoadedDetector::Dbscan {
ref mut detector, ..
},
OutlierDetectorOptions::Dbscan(options),
) => {
// This isn't ideal because it doesn't maintain any other state of the detector,
// but it's the best we can do without adding an `update` method to the `OutlierDetector`
// trait, which would in turn require some sort of config associated type.
let _ = std::mem::replace(
detector,
augurs_outlier::DBSCANDetector::with_sensitivity(options.sensitivity)?,
);
}
_ => return Err(JsError::new("Mismatch between detector and options")),
}
Ok(())
}
}

/// A band indicating the min and max value considered outlying
/// at each timestamp.
#[derive(Debug, Clone, Serialize, Tsify)]
#[tsify(into_wasm_abi)]
struct ClusterBand {
/// The minimum value considered outlying at each timestamp.
min: Vec<f64>,
/// The maximum value considered outlying at each timestamp.
max: Vec<f64>,
}

impl From<augurs_outlier::Band> for ClusterBand {
fn from(b: augurs_outlier::Band) -> Self {
Self {
min: b.min,
max: b.max,
}
}
}

/// A potentially outlying series.
#[derive(Debug, Clone, Serialize, Tsify)]
#[serde(rename_all = "camelCase")]
#[tsify(into_wasm_abi)]
struct OutlierSeries {
/// Whether the series is an outlier for at least one of the samples.
is_outlier: bool,
/// The intervals of the series that are considered outliers.
outlier_intervals: Vec<OutlierInterval>,
/// The outlier scores of the series for each sample.
scores: Vec<f64>,
}

impl From<augurs_outlier::Series> for OutlierSeries {
fn from(s: augurs_outlier::Series) -> Self {
Self {
is_outlier: s.is_outlier,
outlier_intervals: convert_intervals(s.outlier_intervals),
scores: s.scores,
}
}
}

/// An interval for which a series is outlying.
#[derive(Debug, Clone, Serialize, Tsify)]
#[tsify(into_wasm_abi)]
struct OutlierInterval {
/// The start index of the interval.
start: usize,
/// The end index of the interval, if any.
end: Option<usize>,
}

fn convert_intervals(intervals: augurs_outlier::OutlierIntervals) -> Vec<OutlierInterval> {
let mut out = Vec::with_capacity(intervals.indices.len() / 2);
if intervals.indices.is_empty() {
return out;
}
for chunk in intervals.indices.chunks(2) {
out.push(OutlierInterval {
start: chunk[0],
end: chunk.get(1).copied(),
});
}
out
}

/// The result of applying an outlier detection algorithm to a group of time series.
#[derive(Debug, Serialize, Tsify)]
#[serde(rename_all = "camelCase")]
#[tsify(into_wasm_abi)]
pub struct OutlierResult {
/// The indexes of the series considered outliers.
outlying_series: HashSet<usize>,
/// The results of the detection for each series.
series_results: Vec<OutlierSeries>,
/// The band indicating the min and max value considered outlying
/// at each timestamp.
cluster_band: ClusterBand,
}

impl From<augurs_outlier::OutlierResult> for OutlierResult {
fn from(r: augurs_outlier::OutlierResult) -> Self {
Self {
outlying_series: r.outlying_series,
series_results: r.series_results.into_iter().map(Into::into).collect(),
cluster_band: r.cluster_band.into(),
}
}
}
19 changes: 19 additions & 0 deletions crates/augurs-outlier/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[package]
name = "augurs-outlier"
license.workspace = true
authors.workspace = true
documentation.workspace = true
repository.workspace = true
version.workspace = true
edition.workspace = true
keywords.workspace = true

[dependencies]
rayon = { version = "1.10.0", optional = true }
rustc-hash = "1.1.0"
serde = { workspace = true, features = ["derive"], optional = true }
tinyvec = { workspace = true, features = ["std"] }
tracing.workspace = true

[features]
parallel = ["rayon"]
1 change: 1 addition & 0 deletions crates/augurs-outlier/LICENSE-APACHE
1 change: 1 addition & 0 deletions crates/augurs-outlier/LICENSE-MIT
31 changes: 31 additions & 0 deletions crates/augurs-outlier/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Outlier detection.

This crate provides implementations of time series _outlier detection_, the problem of determining whether one time series behaves differently to others in a group. (This is different to _anomaly detection_, which aims to determine if one or more samples appears to be different within a time series).

Two implementations are planned:

- DBSCAN: implemented
- Median Absolute Difference (MAD): not yet implemented (see [GitHub issue](https://github.com/grafana/augurs/issues/82))

# Example

```rust
use augurs_outlier::{OutlierDetector, DBSCANDetector};

// Each slice inside `data` is a time series.
// The third one behaves differently at indexes 2 and 3.
let data: &[&[f64]] = &[
&[1.0, 2.0, 1.5, 2.3],
&[1.9, 2.2, 1.2, 2.4],
&[1.5, 2.1, 6.4, 8.5],
];
let detector = DBSCANDetector::with_sensitivity(0.5)
.expect("sensitivity is between 0.0 and 1.0");
let processed = detector.preprocess(data);
let outliers = detector.detect(&processed);

assert_eq!(outliers.outlying_series.len(), 1);
assert!(outliers.outlying_series.contains(&2));
assert!(outliers.series_results[2].is_outlier);
assert_eq!(outliers.series_results[2].scores, vec![0.0, 0.0, 1.0, 1.0]);
```
Loading

0 comments on commit 6c57192

Please sign in to comment.