Skip to content

Commit

Permalink
fix!: add serde derives for more types (#112)
Browse files Browse the repository at this point in the history
  • Loading branch information
sd2k authored Sep 22, 2024
1 parent d4b7e51 commit d46c954
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 85 deletions.
1 change: 1 addition & 0 deletions crates/augurs-core/src/distance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ impl std::error::Error for DistanceMatrixError {}

/// A matrix representing the distances between pairs of items.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct DistanceMatrix {
matrix: Vec<Vec<f64>>,
}
Expand Down
28 changes: 14 additions & 14 deletions crates/augurs-js/src/outlier.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::HashSet;
use std::collections::BTreeSet;

use augurs_outlier::OutlierDetector as _;
use js_sys::Float64Array;
Expand Down Expand Up @@ -271,7 +271,12 @@ impl From<augurs_outlier::Series> for OutlierSeries {
fn from(s: augurs_outlier::Series) -> Self {
Self {
is_outlier: s.is_outlier,
outlier_intervals: convert_intervals(s.outlier_intervals),
outlier_intervals: s
.outlier_intervals
.intervals
.into_iter()
.map(Into::into)
.collect(),
scores: s.scores,
}
}
Expand All @@ -287,18 +292,13 @@ struct OutlierInterval {
end: Option<usize>,
}

fn convert_intervals(intervals: augurs_outlier::OutlierIntervals) -> Vec<OutlierInterval> {
let mut out = Vec::with_capacity(intervals.indices.len() / 2);
if intervals.indices.is_empty() {
return out;
}
for chunk in intervals.indices.chunks(2) {
out.push(OutlierInterval {
start: chunk[0],
end: chunk.get(1).copied(),
});
impl From<augurs_outlier::OutlierInterval> for OutlierInterval {
fn from(i: augurs_outlier::OutlierInterval) -> Self {
Self {
start: i.start,
end: i.end,
}
}
out
}

/// The result of applying an outlier detection algorithm to a group of time series.
Expand All @@ -307,7 +307,7 @@ fn convert_intervals(intervals: augurs_outlier::OutlierIntervals) -> Vec<Outlier
#[tsify(into_wasm_abi)]
pub struct OutlierOutput {
/// The indexes of the series considered outliers.
outlying_series: HashSet<usize>,
outlying_series: BTreeSet<usize>,
/// The results of the detection for each series.
series_results: Vec<OutlierSeries>,
/// The band indicating the min and max value considered outlying
Expand Down
3 changes: 3 additions & 0 deletions crates/augurs-outlier/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,8 @@ thiserror.workspace = true
tinyvec = { workspace = true, features = ["std"] }
tracing.workspace = true

[dev-dependencies]
serde_json = "1.0.128"

[features]
parallel = ["rayon"]
26 changes: 14 additions & 12 deletions crates/augurs-outlier/src/dbscan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ impl SortedData {

#[cfg(test)]
mod tests {
use crate::{OutlierDetector, OutlierOutput};
use crate::{testing::flatten_intervals, OutlierDetector, OutlierOutput};

use super::*;

Expand Down Expand Up @@ -516,12 +516,13 @@ mod tests {
// in the matrix.
for (j, series) in results.series_results.iter().enumerate() {
let mut outlier_state = false;
let mut outlier_indices = series.outlier_intervals.indices.iter();
let mut next_idx = outlier_indices.next();
let outlier_indices = flatten_intervals(&series.outlier_intervals.intervals);
let mut iter = outlier_indices.iter();
let mut next_idx = iter.next();
for (i, item) in matrix.iter_mut().enumerate() {
if next_idx.map_or(false, |next_idx| i >= *next_idx) {
outlier_state = !outlier_state;
next_idx = outlier_indices.next();
next_idx = iter.next();
}
item[j] = outlier_state;
}
Expand Down Expand Up @@ -598,18 +599,19 @@ mod tests {

assert!(results.series_results[0]
.outlier_intervals
.indices
.intervals
.is_empty());
assert!(results.series_results[1]
.outlier_intervals
.indices
.intervals
.is_empty());
assert_eq!(results.series_results[2].outlier_intervals.indices[0], 40);
assert_eq!(results.series_results[2].outlier_intervals.indices[1], 42);
assert_eq!(results.series_results[2].outlier_intervals.indices[2], 140);
assert_eq!(results.series_results[2].outlier_intervals.indices[3], 142);
assert_eq!(results.series_results[2].outlier_intervals.indices[4], 240);
assert_eq!(results.series_results[2].outlier_intervals.indices[5], 242);
let indices = flatten_intervals(&results.series_results[2].outlier_intervals.intervals);
assert_eq!(indices[0], 40);
assert_eq!(indices[1], 42);
assert_eq!(indices[2], 140);
assert_eq!(indices[3], 142);
assert_eq!(indices[4], 240);
assert_eq!(indices[5], 242);
assert!(results.cluster_band.is_some());
}

Expand Down
154 changes: 98 additions & 56 deletions crates/augurs-outlier/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![doc = include_str!("../README.md")]
#![warn(missing_docs)]

use std::collections::HashSet;
use std::collections::BTreeSet;

mod dbscan;
mod error;
Expand Down Expand Up @@ -38,9 +38,13 @@ impl Band {

/// The result of applying an outlier detection algorithm to a time series.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "camelCase"))]
pub struct OutlierOutput {
/// The indexes of the series considered outliers.
pub outlying_series: HashSet<usize>,
///
/// This is a `BTreeSet` to ensure that the order of the series is preserved.
pub outlying_series: BTreeSet<usize>,

/// The results of the detection for each series.
pub series_results: Vec<Series>,
Expand Down Expand Up @@ -76,6 +80,8 @@ impl OutlierOutput {

/// A potentially outlying series.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "camelCase"))]
pub struct Series {
/// Whether the series is an outlier for at least one of the samples.
pub is_outlier: bool,
Expand Down Expand Up @@ -124,33 +130,22 @@ impl Series {
}

/// A list of outlier intervals for a single series.
// We manually implement [`Serialize`] for this struct, serializing
// just the `timestamps` field as an array.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "camelCase", transparent))]
pub struct OutlierIntervals {
/// A list of indices, where 'even' elements are the start and
/// 'odd' elements are the end of an outlier interval.
pub indices: Vec<usize>,
/// The list of outlier intervals.
pub intervals: Vec<OutlierInterval>,

/// Are we expecting a start or end timestamp to be pushed next?
#[cfg_attr(feature = "serde", serde(skip))]
expecting_end: bool,
}

impl OutlierIntervals {
// fn new(idx: usize) -> Self {
// // We're expecting at least two indices, so we might
// // as well allocate it now.
// let mut indices = Vec::with_capacity(2);
// indices.push(idx);
// Self {
// indices,
// expecting_end: true,
// }
// }

fn empty() -> Self {
Self {
indices: Vec::new(),
intervals: Vec::new(),
expecting_end: false,
}
}
Expand All @@ -160,7 +155,11 @@ impl OutlierIntervals {
!self.expecting_end,
"Expected end of outlier interval, got start"
);
self.indices.push(ts);

self.intervals.push(OutlierInterval {
start: ts,
end: None,
});
self.expecting_end = true;
}

Expand All @@ -169,11 +168,33 @@ impl OutlierIntervals {
self.expecting_end,
"Expected start of outlier interval, got end"
);
self.indices.push(ts);

match self.intervals.last_mut() {
Some(x @ OutlierInterval { end: None, .. }) => {
x.end = Some(ts);
}
_ => unreachable!("tried to add end to an open-ended interval"),
};
self.expecting_end = false;
}
}

/// A single outlier interval.
///
/// An outlier interval is a contiguous range of indices in a time series
/// where an outlier is detected.
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "camelCase"))]
pub struct OutlierInterval {
/// The start index of the interval.
pub start: usize,
/// The end index of the interval, if it exists.
///
/// If the interval is open-ended, this will be `None`.
pub end: Option<usize>,
}

/// An outlier detection algorithm.
pub trait OutlierDetector {
/// The preprocessed data used by the outlier detection algorithm.
Expand Down Expand Up @@ -214,38 +235,59 @@ pub trait OutlierDetector {
fn detect(&self, y: &Self::PreprocessedData) -> Result<OutlierOutput, Error>;
}

// fn transpose(data: &[&[f64]]) -> Vec<Vec<f64>> {
// let mut transposed = vec![vec![]; data.len()];
// for row in data {
// transposed.reserve(data.len());
// for (i, value) in row.iter().enumerate() {
// transposed[i].push(*value);
// }
// }
// transposed
// }

// #[cfg(test)]
// mod test {
// use super::*;

// struct DummyDetector;

// impl OutlierDetector for DummyDetector {
// fn detect(&self, y: &[InputSeries<'_>]) -> OutlierResult {
// let serieses = y
// .iter()
// .map(|series| {
// let is_outlier = series.iter().any(|&x| x > 10.0);
// let scores = series.to_vec();
// Series::new(is_outlier, scores)
// })
// .collect();
// let band = Band {
// min: vec![-1.0; y[0].len()],
// max: vec![1.0; y[0].len()],
// };
// OutlierResult::new(serieses, band)
// }
// }
// }
#[cfg(test)]
mod test {
use super::*;

struct DummyDetector;

impl OutlierDetector for DummyDetector {
type PreprocessedData = Vec<Vec<f64>>;

fn preprocess(&self, y: &[&[f64]]) -> Result<Self::PreprocessedData, Error> {
Ok(y.iter().map(|x| x.to_vec()).collect())
}

fn detect(&self, y: &Self::PreprocessedData) -> Result<OutlierOutput, Error> {
let serieses = y
.iter()
.map(|series| {
let mut intervals = OutlierIntervals::empty();
intervals.add_start(1);
Series {
is_outlier: series.iter().any(|&x| x > 10.0),
scores: series.to_vec(),
outlier_intervals: intervals,
}
})
.collect();
let band = Band {
min: vec![-1.0; y[0].len()],
max: vec![1.0; y[0].len()],
};
Ok(OutlierOutput::new(serieses, Some(band)))
}
}

#[cfg(feature = "serde")]
#[test]
fn serialize() {
let mut outlier_intervals = OutlierIntervals::empty();
outlier_intervals.add_start(1);
let series = Series {
is_outlier: true,
scores: vec![1.0, 2.0, 3.0],
outlier_intervals,
};
let output = OutlierOutput {
outlying_series: BTreeSet::from([0, 1]),
series_results: vec![series],
cluster_band: None,
};
let serialized = serde_json::to_string(&output).unwrap();
assert_eq!(
serialized,
r#"{"outlyingSeries":[0,1],"seriesResults":[{"isOutlier":true,"outlierIntervals":[{"start":1,"end":null}],"scores":[1.0,2.0,3.0]}],"clusterBand":null}"#
);
}
}
7 changes: 4 additions & 3 deletions crates/augurs-outlier/src/mad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ mod test {
use itertools::Itertools;
use rv::prelude::*;

use crate::{MADDetector, OutlierDetector};
use crate::{testing::flatten_intervals, MADDetector, OutlierDetector};

use super::Medians;

Expand Down Expand Up @@ -711,8 +711,9 @@ mod test {
);
let output = result.unwrap();
assert_eq!(output.series_results.len(), 1, "case {} failed", tc.name);
let got_intervals = &output.series_results[0].outlier_intervals.indices;
assert_eq!(intervals, got_intervals, "case {} failed", tc.name);
let got_intervals =
flatten_intervals(&output.series_results[0].outlier_intervals.intervals);
assert_eq!(intervals, &got_intervals, "case {} failed", tc.name);
}
Err(exp) => {
assert!(result.is_err(), "case {} failed", tc.name);
Expand Down
16 changes: 16 additions & 0 deletions crates/augurs-outlier/src/testing.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use crate::OutlierInterval;

pub const SERIES: &[&[f64]] = &[
&[
84.57766308278907,
Expand Down Expand Up @@ -873,3 +875,17 @@ pub const SERIES: &[&[f64]] = &[
90.21207715366646,
],
];

/// Convert an `OutlierIntervals` to a list of indices.
pub(crate) fn flatten_intervals(intervals: &[OutlierInterval]) -> Vec<usize> {
intervals
.iter()
.flat_map(|x| {
let mut out = vec![x.start];
if let Some(end) = x.end {
out.push(end);
}
out
})
.collect()
}

0 comments on commit d46c954

Please sign in to comment.