Skip to content

Commit 20df95c

Browse files
committed
Rework multi-bucket features to better encapsulate functionality and support additional features in future
1 parent c32772a commit 20df95c

25 files changed

+1056
-467
lines changed

include/maths/CTimeSeriesModel.h

Lines changed: 24 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ class CPrior;
2626
class CTimeSeriesDecompositionInterface;
2727
class CTimeSeriesAnomalyModel;
2828
class CUnivariateTimeSeriesChangeDetector;
29+
template<typename>
30+
class CTimeSeriesMultibucketFeature;
2931
struct SChangeDescription;
3032
struct SDistributionRestoreParams;
3133
struct SModelRestoreParams;
@@ -57,11 +59,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel {
5759
using TDoubleWeightsAry = maths_t::TDoubleWeightsAry;
5860
using TDecompositionPtr = std::shared_ptr<CTimeSeriesDecompositionInterface>;
5961
using TDecayRateController2Ary = boost::array<CDecayRateController, 2>;
60-
61-
public:
62-
//! The default length of the sliding window of residuals used to compute
63-
//! multibucket features.
64-
static const std::size_t MULTIBUCKET_FEATURES_WINDOW_LENGTH;
62+
using TMultibucketFeature = CTimeSeriesMultibucketFeature<double>;
6563

6664
public:
6765
//! \param[in] params The model parameters.
@@ -70,20 +68,18 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel {
7068
//! \param[in] residualModel The prior for the time series residual model.
7169
//! \param[in] controllers Optional decay rate controllers for the trend
7270
//! and residual model.
71+
//! \param[in] multibucketFeature The multi-bucket feature to analyse if any.
7372
//! \param[in] modelAnomalies If true we use a separate model to capture
7473
//! the characteristics of anomalous time periods.
75-
//! \param[in] multibucketFeaturesWindowLength The length of the sliding window
76-
//! of residuals used to compute multibucket features.
7774
CUnivariateTimeSeriesModel(const CModelParams& params,
7875
std::size_t id,
7976
const CTimeSeriesDecompositionInterface& trendModel,
8077
const CPrior& residualModel,
8178
const TDecayRateController2Ary* controllers = nullptr,
82-
bool modelAnomalies = true,
83-
std::size_t multibucketFeaturesWindowLength = MULTIBUCKET_FEATURES_WINDOW_LENGTH);
79+
const TMultibucketFeature* multibucketFeature = nullptr,
80+
bool modelAnomalies = true);
8481
CUnivariateTimeSeriesModel(const SModelRestoreParams& params,
85-
core::CStateRestoreTraverser& traverser,
86-
std::size_t multibucketFeaturesWindowLength = MULTIBUCKET_FEATURES_WINDOW_LENGTH);
82+
core::CStateRestoreTraverser& traverser);
8783
~CUnivariateTimeSeriesModel();
8884

8985
const CUnivariateTimeSeriesModel& operator=(const CUnivariateTimeSeriesModel&) = delete;
@@ -203,9 +199,6 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel {
203199

204200
//! Get the residual model.
205201
const CPrior& residualModel() const;
206-
207-
//! Get the sliding window mean residual model.
208-
const CPrior* residualMeanModel() const;
209202
//@}
210203

211204
private:
@@ -215,9 +208,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel {
215208
using TDouble1VecDoubleWeightsAry1VecPr =
216209
std::pair<TDouble1Vec, maths_t::TDoubleWeightsAry1Vec>;
217210
using TDouble2VecWeightsAryVec = std::vector<TDouble2VecWeightsAry>;
218-
using TFloatMeanAccumulator = CBasicStatistics::SSampleMean<CFloatStorage>::TAccumulator;
219-
using TTimeFloatMeanAccumulatorPr = std::pair<core_t::TTime, TFloatMeanAccumulator>;
220-
using TTimeFloatMeanAccumulatorPrCBuf = boost::circular_buffer<TTimeFloatMeanAccumulatorPr>;
211+
using TMultibucketFeaturePtr = std::unique_ptr<TMultibucketFeature>;
221212
using TDecayRateController2AryPtr = std::unique_ptr<TDecayRateController2Ary>;
222213
using TPriorPtr = std::shared_ptr<CPrior>;
223214
using TAnomalyModelPtr = std::unique_ptr<CTimeSeriesAnomalyModel>;
@@ -269,9 +260,6 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel {
269260
const TDouble2Vec1Vec& value,
270261
SModelProbabilityResult& result) const;
271262

272-
//! Get the sliding window mean residual.
273-
TDouble1VecDoubleWeightsAry1VecPr residualMean() const;
274-
275263
//! Get the models for the correlations and the models of the correlated
276264
//! time series.
277265
bool correlationModels(TSize1Vec& correlated,
@@ -301,19 +289,16 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel {
301289
//! \note This can be temporarily be shared with the change detector.
302290
TDecompositionPtr m_TrendModel;
303291

304-
//! A sliding window of the most recent prediction residuals.
305-
TTimeFloatMeanAccumulatorPrCBuf m_RecentResiduals;
306-
307292
//! The time series' residual model.
308293
//!
309294
//! \note This can be temporarily be shared with the change detector.
310295
TPriorPtr m_ResidualModel;
311296

312-
//! A model of the mean of the recent residuals.
313-
//!
314-
//! This models a feature constructed from the mean of residuals in
315-
//! a sliding window.
316-
TPriorPtr m_ResidualMeanModel;
297+
//! The multi-bucket feature to use.
298+
TMultibucketFeaturePtr m_MultibucketFeature;
299+
300+
//! A model of the multi-bucket feature.
301+
TPriorPtr m_MultibucketFeatureModel;
317302

318303
//! A model for time periods when the basic model can't predict the
319304
//! value of the time series.
@@ -554,32 +539,26 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel {
554539
using TDecompositionPtr = std::shared_ptr<CTimeSeriesDecompositionInterface>;
555540
using TDecompositionPtr10Vec = core::CSmallVector<TDecompositionPtr, 10>;
556541
using TDecayRateController2Ary = boost::array<CDecayRateController, 2>;
557-
558-
public:
559-
//! The default length of the sliding window of residuals used to compute
560-
//! multibucket features.
561-
static const std::size_t MULTIBUCKET_FEATURES_WINDOW_LENGTH;
542+
using TMultibucketFeature = CTimeSeriesMultibucketFeature<TDouble10Vec>;
562543

563544
public:
564545
//! \param[in] params The model parameters.
565546
//! \param[in] trendModel The time series trend decomposition.
566547
//! \param[in] residualModel The prior for the time series residual model.
567548
//! \param[in] controllers Optional decay rate controllers for the trend
568549
//! and residual model.
550+
//! \param[in] multibucketFeature The multi-bucket feature to analyse if any.
569551
//! \param[in] modelAnomalies If true we use a separate model to capture
570552
//! the characteristics of anomalous time periods.
571-
//! \param[in] multibucketFeaturesWindowLength The length of the sliding window
572-
//! of residuals used to compute multibucket features.
573553
CMultivariateTimeSeriesModel(const CModelParams& params,
574554
const CTimeSeriesDecompositionInterface& trendModel,
575555
const CMultivariatePrior& residualModel,
576556
const TDecayRateController2Ary* controllers = nullptr,
577-
bool modelAnomalies = true,
578-
std::size_t multibucketFeaturesWindowLength = MULTIBUCKET_FEATURES_WINDOW_LENGTH);
557+
const TMultibucketFeature* multibucketFeature = nullptr,
558+
bool modelAnomalies = true);
579559
CMultivariateTimeSeriesModel(const CMultivariateTimeSeriesModel& other);
580560
CMultivariateTimeSeriesModel(const SModelRestoreParams& params,
581-
core::CStateRestoreTraverser& traverser,
582-
std::size_t multibucketFeaturesWindowLength = MULTIBUCKET_FEATURES_WINDOW_LENGTH);
561+
core::CStateRestoreTraverser& traverser);
583562
~CMultivariateTimeSeriesModel();
584563

585564
const CMultivariateTimeSeriesModel& operator=(const CMultivariateTimeSeriesModel&) = delete;
@@ -696,9 +675,6 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel {
696675

697676
//! Get the residual model.
698677
const CMultivariatePrior& residualModel() const;
699-
700-
//! Get the sliding window mean residual model.
701-
const CMultivariatePrior* residualMeanModel() const;
702678
//@}
703679

704680
private:
@@ -711,6 +687,7 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel {
711687
using TVectorMeanAccumulator = CBasicStatistics::SSampleMean<TVector>::TAccumulator;
712688
using TTimeVectorMeanAccumulatorPr = std::pair<core_t::TTime, TVectorMeanAccumulator>;
713689
using TTimeVectorMeanAccumulatorPrCBuf = boost::circular_buffer<TTimeVectorMeanAccumulatorPr>;
690+
using TMultibucketFeaturePtr = std::unique_ptr<TMultibucketFeature>;
714691
using TDecayRateController2AryPtr = std::unique_ptr<TDecayRateController2Ary>;
715692
using TMultivariatePriorPtr = std::unique_ptr<CMultivariatePrior>;
716693
using TAnomalyModelPtr = std::unique_ptr<CTimeSeriesAnomalyModel>;
@@ -735,9 +712,6 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel {
735712
//! decomposition.
736713
void reinitializeStateGivenNewComponent();
737714

738-
//! Get the sliding window mean residual.
739-
TDouble10Vec1VecDouble10VecWeightsAry1VecPr residualMean() const;
740-
741715
//! Get the model dimension.
742716
std::size_t dimension() const;
743717

@@ -755,17 +729,14 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel {
755729
//! The time series trend decomposition.
756730
TDecompositionPtr10Vec m_TrendModel;
757731

758-
//! A sliding window of the most recent prediction residuals.
759-
TTimeVectorMeanAccumulatorPrCBuf m_RecentResiduals;
760-
761732
//! The time series' residual model.
762733
TMultivariatePriorPtr m_ResidualModel;
763734

764-
//! A model of the mean of the recent residuals.
765-
//!
766-
//! This models a feature constructed from the mean of residuals in
767-
//! a sliding window.
768-
TMultivariatePriorPtr m_ResidualMeanModel;
735+
//! The multi-bucket feature to use.
736+
TMultibucketFeaturePtr m_MultibucketFeature;
737+
738+
//! A model of the multi-bucket feature.
739+
TMultivariatePriorPtr m_MultibucketFeatureModel;
769740

770741
//! A model for time periods when the basic model can't predict the
771742
//! value of the time series.
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
#ifndef INCLUDED_ml_maths_CTimeSeriesMultibucketFeatureSerialiser_h
8+
#define INCLUDED_ml_maths_CTimeSeriesMultibucketFeatureSerialiser_h
9+
10+
#include <maths/ImportExport.h>
11+
12+
#include <memory>
13+
14+
namespace ml {
15+
namespace core {
16+
template<typename, std::size_t>
17+
class CSmallVector;
18+
class CStatePersistInserter;
19+
class CStateRestoreTraverser;
20+
}
21+
namespace maths {
22+
template<typename>
23+
class CTimeSeriesMultibucketFeature;
24+
struct SModelRestoreParams;
25+
26+
//! \brief Reflection for CTimeSeriesMultibucketFeature sub-classes.
27+
//!
28+
//! DESCRIPTION:\n
29+
//! Encapsulate the conversion of arbitrary CTimeSeriesMultibucketFeature
30+
//! sub-classes to/from textual state. In particular, the field name
31+
//! associated with type of feature is then in one file.
32+
//!
33+
//! IMPLEMENTATION DECISIONS:\n
34+
//! The serialisation format must be a hierarchical format that supports
35+
//! name/value pairs where the value may be a nested set of name/value
36+
//! pairs. Text format is used to make it easier to provide backwards
37+
//! compatibility in the future as the classes evolve.
38+
class MATHS_EXPORT CTimeSeriesMultibucketFeatureSerialiser {
39+
public:
40+
using TDouble10Vec = core::CSmallVector<double, 10>;
41+
using TUnivariateFeature = CTimeSeriesMultibucketFeature<double>;
42+
using TMultivariateFeature = CTimeSeriesMultibucketFeature<TDouble10Vec>;
43+
using TUnivariateFeaturePtr = std::unique_ptr<TUnivariateFeature>;
44+
using TMultivariateFeaturePtr = std::unique_ptr<TMultivariateFeature>;
45+
46+
public:
47+
//! Construct the appropriate CTimeSeriesMultibucketFeature sub-class
48+
//! from its state document representation. Sets \p result to NULL on
49+
//! failure.
50+
bool operator()(TUnivariateFeaturePtr& result, core::CStateRestoreTraverser& traverser) const;
51+
52+
//! Construct the appropriate CTimeSeriesMultibucketFeature sub-class
53+
//! from its state document representation. Sets \p result to NULL on
54+
//! failure.
55+
bool operator()(TMultivariateFeaturePtr& result,
56+
core::CStateRestoreTraverser& traverser) const;
57+
58+
//! Persist \p feature by passing information to the supplied inserter
59+
void operator()(const TUnivariateFeaturePtr& feature,
60+
core::CStatePersistInserter& inserter) const;
61+
62+
//! Persist \p feature by passing information to the supplied inserter
63+
void operator()(const TMultivariateFeaturePtr& feature,
64+
core::CStatePersistInserter& inserter) const;
65+
};
66+
}
67+
}
68+
69+
#endif // INCLUDED_ml_maths_CTimeSeriesMultibucketFeatureSerialiser_h

0 commit comments

Comments
 (0)