From 1040889181e7345af7bcab88a889e3551b435215 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 25 May 2018 14:26:38 +0100 Subject: [PATCH] [ML] Improve change point detection for long bucket lengths (#95) --- include/maths/CBasicStatistics.h | 7 +- include/maths/CTimeSeriesChangeDetector.h | 24 ++- .../maths/CTimeSeriesDecompositionDetail.h | 12 +- lib/maths/CAdaptiveBucketing.cc | 10 +- lib/maths/CBasicStatistics.cc | 70 +++++--- lib/maths/CModel.cc | 6 +- lib/maths/CPeriodicityHypothesisTests.cc | 8 +- lib/maths/CTimeSeriesChangeDetector.cc | 116 ++++++------ lib/maths/CTimeSeriesDecomposition.cc | 12 +- lib/maths/CTimeSeriesDecompositionDetail.cc | 32 +++- lib/maths/CTimeSeriesModel.cc | 15 +- lib/maths/unittest/CBasicStatisticsTest.cc | 44 ++++- lib/maths/unittest/CBasicStatisticsTest.h | 1 + .../unittest/CTimeSeriesChangeDetectorTest.cc | 25 ++- .../unittest/CTimeSeriesChangeDetectorTest.h | 3 +- lib/maths/unittest/CTimeSeriesModelTest.cc | 168 ++++++++---------- 16 files changed, 323 insertions(+), 230 deletions(-) diff --git a/include/maths/CBasicStatistics.h b/include/maths/CBasicStatistics.h index 25b0405ae2..dfcf624fe8 100644 --- a/include/maths/CBasicStatistics.h +++ b/include/maths/CBasicStatistics.h @@ -67,10 +67,13 @@ class MATHS_EXPORT CBasicStatistics { } //! Compute the sample mean. - static double mean(const TDoubleVec& sample); + static double mean(const TDoubleVec& data); //! Compute the sample median. - static double median(const TDoubleVec& dataIn); + static double median(const TDoubleVec& data); + + //! Compute the median absolute deviation. + static double mad(const TDoubleVec& data); //! Compute the maximum of \p first, \p second and \p third. template diff --git a/include/maths/CTimeSeriesChangeDetector.h b/include/maths/CTimeSeriesChangeDetector.h index c249636d83..2cccd135ec 100644 --- a/include/maths/CTimeSeriesChangeDetector.h +++ b/include/maths/CTimeSeriesChangeDetector.h @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -90,11 +91,17 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector { //! if there has been. TOptionalChangeDescription change(); - //! The function used to decide whether to accept a change. - //! A change is accepted at a value of 1.0 for this function. + //! Get an rough estimate of the chance that the change will + //! eventually be accepted. + double probabilityWillAccept() const; + + //! Evaluate the function used to decide whether to accept + //! a change. + //! + //! A change is accepted for values >= 1.0. //! - //! \param[out] change Filled in with the index of the change - //! the most likely change. + //! \param[out] change Filled in with the index of the most + //! likely change. double decisionFunction(std::size_t& change) const; //! Add \p samples to the change detector. @@ -117,6 +124,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector { using TChangeModelPtr = std::shared_ptr; using TChangeModelPtr5Vec = core::CSmallVector; using TMinMaxAccumulator = CBasicStatistics::CMinMax; + using TRegression = CRegression::CLeastSquaresOnline<1, double>; private: //! The minimum amount of time we need to observe before @@ -135,8 +143,12 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector { //! The count of samples added to the change models. std::size_t m_SampleCount; - //! The current evidence of a change. - double m_CurrentEvidenceOfChange; + //! The current value of the decision function. + double m_DecisionFunction; + + //! A least squares fit to the log of the inverse decision + //! function as a function of time. + TRegression m_LogInvDecisionFunctionTrend; //! The change models. TChangeModelPtr5Vec m_ChangeModels; diff --git a/include/maths/CTimeSeriesDecompositionDetail.h b/include/maths/CTimeSeriesDecompositionDetail.h index 37545ecbd9..34aac7e1c6 100644 --- a/include/maths/CTimeSeriesDecompositionDetail.h +++ b/include/maths/CTimeSeriesDecompositionDetail.h @@ -202,8 +202,16 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! Test to see whether any seasonal components are present. void test(const SAddValue& message); - //! Clear the test identified by \p test. - void clear(ETest test, core_t::TTime time); + //! Clear the test if the shift is large compared to the median + //! absolute deviation in the window. + //! + //! There is no point in continuing to use the historical window + //! if the signal has changed significantly w.r.t. the possible + //! magnitude of any seasonal component. Çonversely, if we detect + //! a small change we don't want to throw a lot of history: since, + //! depending on the false positive rate, we may never accumulate + //! enough history to detect long seasonal components. + void maybeClear(core_t::TTime time, double shift); //! Age the test to account for the interval \p end - \p start //! elapsed time. diff --git a/lib/maths/CAdaptiveBucketing.cc b/lib/maths/CAdaptiveBucketing.cc index 74f6040889..3e7ed9e58a 100644 --- a/lib/maths/CAdaptiveBucketing.cc +++ b/lib/maths/CAdaptiveBucketing.cc @@ -290,11 +290,11 @@ void CAdaptiveBucketing::refine(core_t::TTime time) { // positions. Once they have stabilized on their desired location // for the trend, we are able to detect this by comparing the // time averaged desired displacement and the absolute desired - // displacement. The lower the ratio the smaller more smoothing - // we apply. Note we want to damp the noise out because the - // process of adjusting the buckets end points loses a small - // amount of information, see the comments at the start of - // refresh for more details. + // displacement. The lower the ratio the more smoothing we apply. + // Note we want to damp the noise out because the process of + // adjusting the buckets end points loses a small amount of + // information, see the comments at the start of refresh for + // more details. double alpha{ ALPHA * (CBasicStatistics::mean(m_MeanAbsDesiredDisplacement) == 0.0 ? 1.0 diff --git a/lib/maths/CBasicStatistics.cc b/lib/maths/CBasicStatistics.cc index 4e3675208f..c0044d8719 100644 --- a/lib/maths/CBasicStatistics.cc +++ b/lib/maths/CBasicStatistics.cc @@ -13,36 +13,19 @@ namespace ml { namespace maths { +namespace { -double CBasicStatistics::mean(const TDoubleDoublePr& samples) { - return 0.5 * (samples.first + samples.second); -} - -double CBasicStatistics::mean(const TDoubleVec& sample) { - return std::accumulate(sample.begin(), sample.end(), 0.0) / - static_cast(sample.size()); -} +//! Compute the median reordering \p samples in the process. +double medianInPlace(std::vector& data) { + std::size_t size{data.size()}; -double CBasicStatistics::median(const TDoubleVec& dataIn) { - if (dataIn.empty()) { - return 0.0; - } - - std::size_t size{dataIn.size()}; - if (size == 1) { - return dataIn[0]; - } - - TDoubleVec data{dataIn}; - - // If data size is even (1,2,3,4) then take mean of 2,3 = 2.5 - // If data size is odd (1,2,3,4,5) then take middle value = 3 - double median{0.0}; + // If sample size is even (1,2,3,4) then take mean of 2,3 = 2.5 + // If sample size is odd (1,2,3,4,5) then take middle value = 3 + bool useMean{size % 2 == 0}; // For an odd number of elements, this will get the median element into // place. For an even number of elements, it will get the second element // of the middle pair into place. - bool useMean{size % 2 == 0}; size_t index{size / 2}; std::nth_element(data.begin(), data.begin() + index, data.end()); @@ -52,12 +35,43 @@ double CBasicStatistics::median(const TDoubleVec& dataIn) { // before the nth one in the vector. auto left = std::max_element(data.begin(), data.begin() + index); - median = (*left + data[index]) / 2.0; - } else { - median = data[index]; + return (*left + data[index]) / 2.0; + } + + return data[index]; +} +} + +double CBasicStatistics::mean(const TDoubleDoublePr& data) { + return 0.5 * (data.first + data.second); +} + +double CBasicStatistics::mean(const TDoubleVec& data) { + return std::accumulate(data.begin(), data.end(), 0.0) / + static_cast(data.size()); +} + +double CBasicStatistics::median(const TDoubleVec& data_) { + if (data_.empty()) { + return 0.0; + } + if (data_.size() == 1) { + return data_[0]; } + TDoubleVec data{data_}; + return medianInPlace(data); +} - return median; +double CBasicStatistics::mad(const TDoubleVec& data_) { + if (data_.size() < 2) { + return 0.0; + } + TDoubleVec data{data_}; + double median{medianInPlace(data)}; + for (auto& datum : data) { + datum = std::fabs(datum - median); + } + return medianInPlace(data); } const char CBasicStatistics::INTERNAL_DELIMITER(':'); diff --git a/lib/maths/CModel.cc b/lib/maths/CModel.cc index 5fd2ab1d2d..afaffe6ea0 100644 --- a/lib/maths/CModel.cc +++ b/lib/maths/CModel.cc @@ -74,8 +74,8 @@ CModelParams::CModelParams(core_t::TTime bucketLength, core_t::TTime maximumTimeToTestForChange) : m_BucketLength(bucketLength), m_LearnRate(learnRate), m_DecayRate(decayRate), m_MinimumSeasonalVarianceScale(minimumSeasonalVarianceScale), - m_MinimumTimeToDetectChange(std::max(minimumTimeToDetectChange, 12 * bucketLength)), - m_MaximumTimeToTestForChange(std::max(maximumTimeToTestForChange, 48 * bucketLength)), + m_MinimumTimeToDetectChange(std::max(minimumTimeToDetectChange, 6 * bucketLength)), + m_MaximumTimeToTestForChange(std::max(maximumTimeToTestForChange, 12 * bucketLength)), m_ProbabilityBucketEmpty(0.0) { } @@ -100,7 +100,7 @@ double CModelParams::minimumSeasonalVarianceScale() const { } bool CModelParams::testForChange(core_t::TTime changeInterval) const { - return changeInterval >= std::max(3 * m_BucketLength, 10 * core::constants::MINUTE); + return changeInterval >= std::max(3 * m_BucketLength, core::constants::HOUR); } core_t::TTime CModelParams::minimumTimeToDetectChange(void) const { diff --git a/lib/maths/CPeriodicityHypothesisTests.cc b/lib/maths/CPeriodicityHypothesisTests.cc index 8cadc39e36..225b09b441 100644 --- a/lib/maths/CPeriodicityHypothesisTests.cc +++ b/lib/maths/CPeriodicityHypothesisTests.cc @@ -1434,7 +1434,7 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec& windows, } double scale{1.0 / stats.s_M}; - LOG_TRACE(<< "scale = " << scale); + LOG_TRACE(<< " scale = " << scale); double v{residualVariance(trend, scale)}; v = varianceAtPercentile(v, df1, 50.0 + CONFIDENCE_INTERVAL / 2.0); @@ -1503,12 +1503,10 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec& windows, std::size_t n{static_cast( std::ceil(Rt * static_cast(windowLength / period_)))}; double at{stats.s_At * std::sqrt(v0 / scale)}; - LOG_TRACE(<< " n = " << n << ", at = " << at << ", v = " << v); + LOG_TRACE(<< " n = " << n << ", at = " << at << ", v = " << v); TMeanAccumulator level; for (const auto& value : values) { - if (CBasicStatistics::count(value) > 0.0) { - level.add(CBasicStatistics::mean(value)); - } + level.add(CBasicStatistics::mean(value), CBasicStatistics::count(value)); } TMinAmplitudeVec amplitudes(period, {n, CBasicStatistics::mean(level)}); periodicTrend(values, window, m_BucketLength, amplitudes); diff --git a/lib/maths/CTimeSeriesChangeDetector.cc b/lib/maths/CTimeSeriesChangeDetector.cc index 1265e34326..33b5e088fb 100644 --- a/lib/maths/CTimeSeriesChangeDetector.cc +++ b/lib/maths/CTimeSeriesChangeDetector.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -37,12 +38,12 @@ using namespace time_series_change_detector_detail; namespace { using TDouble1Vec = core::CSmallVector; using TOptionalChangeDescription = CUnivariateTimeSeriesChangeDetector::TOptionalChangeDescription; -const std::string MINIMUM_TIME_TO_DETECT{"a"}; -const std::string MAXIMUM_TIME_TO_DETECT{"b"}; -const std::string MINIMUM_DELTA_BIC_TO_DETECT{"c"}; +const std::string MINIMUM_TIME_TO_DETECT_TAG{"a"}; +const std::string MAXIMUM_TIME_TO_DETECT_TAG{"b"}; +const std::string MINIMUM_DELTA_BIC_TO_DETECT_TAG{"c"}; const std::string RESIDUAL_MODEL_MODE_TAG{"d"}; const std::string SAMPLE_COUNT_TAG{"e"}; -const std::string CURRENT_EVIDENCE_OF_CHANGE{"f"}; +const std::string DECISION_FUNCTION_TAG{"f"}; const std::string MIN_TIME_TAG{"g"}; const std::string MAX_TIME_TAG{"h"}; const std::string CHANGE_MODEL_TAG{"i"}; @@ -51,12 +52,15 @@ const std::string EXPECTED_LOG_LIKELIHOOD_TAG{"k"}; const std::string SHIFT_TAG{"l"}; const std::string SCALE_TAG{"m"}; const std::string RESIDUAL_MODEL_TAG{"n"}; +const std::string LOG_INVERSE_DECISION_FUNCTION_TREND_TAG{"p"}; const std::size_t EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS{4u}; const double EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER{0.9}; -const std::size_t COUNT_TO_INITIALIZE{5u}; +const std::size_t COUNT_TO_INITIALIZE{3u}; const double MINIMUM_SCALE{0.1}; const double MAXIMUM_SCALE{10.0}; const double WINSORISATION_DERATE{1.0}; +const double MAXIMUM_DECISION_FUNCTION{32.0}; +const double LOG_INV_MAXIMUM_DECISION_FUNCTION{-CTools::fastLog(MAXIMUM_DECISION_FUNCTION)}; } SChangeDescription::SChangeDescription(EDescription description, double value, const TPriorPtr& residualModel) @@ -86,7 +90,7 @@ CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector( core_t::TTime maximumTimeToDetect, double minimumDeltaBicToDetect) : m_MinimumTimeToDetect{minimumTimeToDetect}, m_MaximumTimeToDetect{maximumTimeToDetect}, - m_MinimumDeltaBicToDetect{minimumDeltaBicToDetect}, m_SampleCount{0}, m_CurrentEvidenceOfChange{0.0} { + m_MinimumDeltaBicToDetect{minimumDeltaBicToDetect}, m_SampleCount{0}, m_DecisionFunction{0.0} { m_ChangeModels.push_back( std::make_shared(trendModel, residualModel)); m_ChangeModels.push_back( @@ -95,7 +99,6 @@ CUnivariateTimeSeriesChangeDetector::CUnivariateTimeSeriesChangeDetector( trendModel, residualModel, -core::constants::HOUR)); m_ChangeModels.push_back(std::make_shared( trendModel, residualModel, +core::constants::HOUR)); - if (trendModel->seasonalComponents().size() > 0) { m_ChangeModels.push_back(std::make_shared( trendModel, residualModel)); @@ -108,11 +111,14 @@ bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser( auto model = m_ChangeModels.begin(); do { const std::string name{traverser.name()}; - RESTORE_BUILT_IN(MINIMUM_TIME_TO_DETECT, m_MinimumTimeToDetect) - RESTORE_BUILT_IN(MAXIMUM_TIME_TO_DETECT, m_MaximumTimeToDetect) - RESTORE_BUILT_IN(MINIMUM_DELTA_BIC_TO_DETECT, m_MinimumDeltaBicToDetect) + RESTORE_BUILT_IN(MINIMUM_TIME_TO_DETECT_TAG, m_MinimumTimeToDetect) + RESTORE_BUILT_IN(MAXIMUM_TIME_TO_DETECT_TAG, m_MaximumTimeToDetect) + RESTORE_BUILT_IN(MINIMUM_DELTA_BIC_TO_DETECT_TAG, m_MinimumDeltaBicToDetect) RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount) - RESTORE_BUILT_IN(CURRENT_EVIDENCE_OF_CHANGE, m_CurrentEvidenceOfChange) + RESTORE_BUILT_IN(DECISION_FUNCTION_TAG, m_DecisionFunction) + RESTORE(LOG_INVERSE_DECISION_FUNCTION_TREND_TAG, + traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, + &m_LogInvDecisionFunctionTrend, _1))) RESTORE_SETUP_TEARDOWN(MIN_TIME_TAG, core_t::TTime time, core::CStringUtils::stringToType(traverser.value(), time), m_TimeRange.add(time)) @@ -127,13 +133,16 @@ bool CUnivariateTimeSeriesChangeDetector::acceptRestoreTraverser( } void CUnivariateTimeSeriesChangeDetector::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertValue(MINIMUM_TIME_TO_DETECT, m_MinimumTimeToDetect); - inserter.insertValue(MAXIMUM_TIME_TO_DETECT, m_MaximumTimeToDetect); - inserter.insertValue(MINIMUM_DELTA_BIC_TO_DETECT, m_MinimumDeltaBicToDetect, + inserter.insertValue(MINIMUM_TIME_TO_DETECT_TAG, m_MinimumTimeToDetect); + inserter.insertValue(MAXIMUM_TIME_TO_DETECT_TAG, m_MaximumTimeToDetect); + inserter.insertValue(MINIMUM_DELTA_BIC_TO_DETECT_TAG, m_MinimumDeltaBicToDetect, core::CIEEE754::E_SinglePrecision); inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount); - inserter.insertValue(CURRENT_EVIDENCE_OF_CHANGE, m_CurrentEvidenceOfChange, + inserter.insertValue(DECISION_FUNCTION_TAG, m_DecisionFunction, core::CIEEE754::E_SinglePrecision); + inserter.insertLevel(LOG_INVERSE_DECISION_FUNCTION_TREND_TAG, + boost::bind(&TRegression::acceptPersistInserter, + &m_LogInvDecisionFunctionTrend, _1)); if (m_TimeRange.initialized()) { inserter.insertValue(MIN_TIME_TAG, m_TimeRange.min()); inserter.insertValue(MAX_TIME_TAG, m_TimeRange.max()); @@ -145,18 +154,25 @@ void CUnivariateTimeSeriesChangeDetector::acceptPersistInserter(core::CStatePers } TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change() { - if (m_TimeRange.range() > m_MinimumTimeToDetect) { - std::size_t candidate{}; - double p{this->decisionFunction(candidate)}; - - if (p > 1.0) { - return m_ChangeModels[candidate]->change(); - } - - m_CurrentEvidenceOfChange = m_ChangeModels[0]->bic() - - m_ChangeModels[candidate]->bic(); + std::size_t best{}; + m_DecisionFunction = this->decisionFunction(best); + if (m_DecisionFunction > 0.0) { + double x{static_cast(m_TimeRange.range()) / + static_cast(m_MaximumTimeToDetect)}; + double y{CTools::fastLog(1.0 / m_DecisionFunction)}; + m_LogInvDecisionFunctionTrend.add(x, y); } - return TOptionalChangeDescription(); + if (m_TimeRange.range() > m_MinimumTimeToDetect && m_DecisionFunction > 1.0) { + return m_ChangeModels[best]->change(); + } + return TOptionalChangeDescription{}; +} + +double CUnivariateTimeSeriesChangeDetector::probabilityWillAccept() const { + double prediction{std::exp(-std::max(m_LogInvDecisionFunctionTrend.predict(1.0), + LOG_INV_MAXIMUM_DECISION_FUNCTION))}; + return CTools::logisticFunction(std::max(m_DecisionFunction, prediction), + 0.1, 1.0, -1.0); } double CUnivariateTimeSeriesChangeDetector::decisionFunction(std::size_t& change) const { @@ -180,40 +196,37 @@ double CUnivariateTimeSeriesChangeDetector::decisionFunction(std::size_t& change noChangeBic - candidates[1].first}; double expectedEvidence{noChangeBic - (*candidates[0].second)->expectedBic()}; - double x[]{evidences[0] / m_MinimumDeltaBicToDetect, - 2.0 * (evidences[0] - evidences[1]) / m_MinimumDeltaBicToDetect, - evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence, - static_cast(m_TimeRange.range() - m_MinimumTimeToDetect) / - static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect)}; + double x[]{ + evidences[0] / m_MinimumDeltaBicToDetect, + 2.0 * (evidences[0] - evidences[1]) / m_MinimumDeltaBicToDetect, + evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence, + std::max(static_cast(m_TimeRange.range() - m_MinimumTimeToDetect), 0.0) / + static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect)}; double p{CTools::logisticFunction(x[0], 0.05, 1.0) * CTools::logisticFunction(x[1], 0.1, 1.0) * (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0)) * CTools::logisticFunction(x[3], 0.2, 0.5)}; - LOG_TRACE("p(" << (*candidates[0].second)->change()->print() << ") = " << p - << " | x = " << core::CContainerPrinter::print(x)); + LOG_TRACE(<< "df(" << (*candidates[0].second)->change()->print() + << ") = " << MAXIMUM_DECISION_FUNCTION * p + << " | x = " << core::CContainerPrinter::print(x)); change = candidates[0].second - m_ChangeModels.begin(); - // Note 0.03125 = 0.5^5. This is chosen so that this function - // is equal to one when each of the decision criteria are at - // the centre of the sigmoid functions and the time range is - // equal to "minimum time to detect". This means we'll (just) - // accept the change if all of the individual hard decision - // criteria are satisfied. + // Note the maximum decision function value is chosen so that + // this function is equal to one when each of the decision + // criteria are at the centre of the sigmoid functions and + // the time range is equal to "minimum time to detect". This + // means we'll (just) accept the change if each "hard" decision + // criterion is individually satisfied. - return p / 0.03125; + return MAXIMUM_DECISION_FUNCTION * p; } bool CUnivariateTimeSeriesChangeDetector::stopTesting() const { core_t::TTime range{m_TimeRange.range()}; - if (range > m_MinimumTimeToDetect) { - double scale{0.5 + CTools::logisticFunction(2.0 * m_CurrentEvidenceOfChange / m_MinimumDeltaBicToDetect, - 0.2, 1.0)}; - return static_cast(range) > - m_MinimumTimeToDetect + - scale * static_cast(m_MaximumTimeToDetect - m_MinimumTimeToDetect); - } - return false; + return (range > m_MinimumTimeToDetect) && + (range > m_MaximumTimeToDetect || m_LogInvDecisionFunctionTrend.count() == 0.0 || + m_LogInvDecisionFunctionTrend.predict(1.0) > 2.0); } void CUnivariateTimeSeriesChangeDetector::addSamples(const TTimeDoublePr1Vec& samples, @@ -243,7 +256,8 @@ uint64_t CUnivariateTimeSeriesChangeDetector::checksum(uint64_t seed) const { seed = CChecksum::calculate(seed, m_MinimumDeltaBicToDetect); seed = CChecksum::calculate(seed, m_TimeRange); seed = CChecksum::calculate(seed, m_SampleCount); - seed = CChecksum::calculate(seed, m_CurrentEvidenceOfChange); + seed = CChecksum::calculate(seed, m_DecisionFunction); + seed = CChecksum::calculate(seed, m_LogInvDecisionFunctionTrend); return CChecksum::calculate(seed, m_ChangeModels); } @@ -662,12 +676,8 @@ void CUnivariateTimeShiftModel::addSamples(const std::size_t count, for (std::size_t i = 0u; i < samples_.size(); ++i) { core_t::TTime time{samples_[i].first}; double value{samples_[i].second}; - double seasonalScale{maths_t::seasonalVarianceScale(weights[i])}; double sample{this->trendModel().detrend(time + m_Shift, value, 0.0)}; - double weight{winsorisation::tailWeight( - residualModel, WINSORISATION_DERATE, seasonalScale, sample)}; samples.push_back(sample); - maths_t::setWinsorisationWeight(weight, weights[i]); } residualModel.addSamples(samples, weights); diff --git a/lib/maths/CTimeSeriesDecomposition.cc b/lib/maths/CTimeSeriesDecomposition.cc index dbd352d3a4..77b93b5558 100644 --- a/lib/maths/CTimeSeriesDecomposition.cc +++ b/lib/maths/CTimeSeriesDecomposition.cc @@ -250,14 +250,18 @@ bool CTimeSeriesDecomposition::applyChange(core_t::TTime time, m_Components.useTrendForPrediction(); switch (change.s_Description) { - case SChangeDescription::E_LevelShift: + case SChangeDescription::E_LevelShift: { + double meanShift{std::fabs(change.s_Value[0])}; + m_PeriodicityTest.maybeClear(time, meanShift); m_Components.shiftLevel(time, value, change.s_Value[0]); - m_PeriodicityTest.clear(CPeriodicityTest::E_Short, time); break; - case SChangeDescription::E_LinearScale: + } + case SChangeDescription::E_LinearScale: { + double meanShift{std::fabs(change.s_Value[0] * this->meanValue(time))}; + m_PeriodicityTest.maybeClear(time, meanShift); m_Components.linearScale(time, change.s_Value[0]); - m_PeriodicityTest.clear(CPeriodicityTest::E_Short, time); break; + } case SChangeDescription::E_TimeShift: m_TimeShift += static_cast(change.s_Value[0]); break; diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index 053bea1179..03eee8b95c 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -70,14 +70,18 @@ using TComponent5Vec = CPeriodicityHypothesisTestsResult::TComponent5Vec; using TSeasonalComponentPtrVec = std::vector; using TCalendarComponentPtrVec = std::vector; -const core_t::TTime DAY = core::constants::DAY; -const core_t::TTime WEEK = core::constants::WEEK; -const core_t::TTime MONTH = 4 * WEEK; +const core_t::TTime DAY{core::constants::DAY}; +const core_t::TTime WEEK{core::constants::WEEK}; +const core_t::TTime MONTH{4 * WEEK}; + +//! Multiplier to correct for bias using MAD to estimate standard +//! deviation (for normally distributed data). +const double MAD_TO_SD_MULTIPLIER{1.4826}; //! We scale the time used for the regression model to improve //! the condition of the design matrix. double scaleTime(core_t::TTime time, core_t::TTime origin) { - return static_cast(time - origin) / static_cast(core::constants::WEEK); + return static_cast(time - origin) / static_cast(WEEK); } //! Get the aging factor to apply for \p dt elapsed time. @@ -598,10 +602,22 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::test(const SAddValue& mes } } -void CTimeSeriesDecompositionDetail::CPeriodicityTest::clear(ETest test, core_t::TTime time) { - if (m_Windows[test] != nullptr) { - m_Windows[test].reset(this->newWindow(test)); - m_Windows[test]->initialize(time); +void CTimeSeriesDecompositionDetail::CPeriodicityTest::maybeClear(core_t::TTime time, + double shift) { + for (auto test : {E_Short, E_Long}) { + if (m_Windows[test] != nullptr) { + TDoubleVec values; + values.reserve(m_Windows[test]->size()); + for (const auto& value : m_Windows[test]->values()) { + if (CBasicStatistics::count(value) > 0.0) { + values.push_back(CBasicStatistics::mean(value)); + } + } + if (shift > MAD_TO_SD_MULTIPLIER * CBasicStatistics::mad(values)) { + m_Windows[test].reset(this->newWindow(test)); + m_Windows[test]->initialize(time); + } + } } } diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 9f36b780ad..8bde49c498 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -68,7 +68,7 @@ const double MINIMUM_CORRELATE_PRIOR_SAMPLE_COUNT{24.0}; const std::size_t SLIDING_WINDOW_SIZE{12u}; const TSize10Vec NOTHING_TO_MARGINALIZE; const TSizeDoublePr10Vec NOTHING_TO_CONDITION; -const double CHANGE_P_VALUE{1e-5}; +const double CHANGE_P_VALUE{5e-4}; //! Optionally randomly sample from \p indices. TOptionalSize randomlySample(CPRNG::CXorOShiro128Plus& rng, @@ -188,13 +188,8 @@ double pValueFromWeight(double weight) { //! Computes a Winsorisation weight based on the chance that the //! time series is currently undergoing a change. double changeWeight(const TChangeDetectorPtr& detector) { - if (detector != nullptr) { - std::size_t dummy; - return std::max(CTools::logisticFunction(detector->decisionFunction(dummy), - 0.1, 1.0, -1.0), - MINIMUM_WEIGHT); - } - return 1.0; + return detector != nullptr ? std::max(detector->probabilityWillAccept(), MINIMUM_WEIGHT) + : 1.0; } } @@ -1358,6 +1353,7 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& par winsorisation::pValueFromWeight(weight) <= CHANGE_P_VALUE) { m_CurrentChangeInterval += this->params().bucketLength(); if (this->params().testForChange(m_CurrentChangeInterval)) { + LOG_TRACE(<< "Starting to test for change at " << time); m_ChangeDetector = std::make_shared( m_TrendModel, m_ResidualModel, minimumTimeToDetect, maximumTimeToTest); m_CurrentChangeInterval = 0; @@ -1373,8 +1369,7 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& par if (m_ChangeDetector->stopTesting()) { m_ChangeDetector.reset(); } else if (auto change = m_ChangeDetector->change()) { - LOG_DEBUG(<< "Detected " << change->print() << " at " - << values[median].first); + LOG_DEBUG(<< "Detected " << change->print() << " at " << time); m_ChangeDetector.reset(); return this->applyChange(*change); } diff --git a/lib/maths/unittest/CBasicStatisticsTest.cc b/lib/maths/unittest/CBasicStatisticsTest.cc index 63d8e115f6..5c70ec9037 100644 --- a/lib/maths/unittest/CBasicStatisticsTest.cc +++ b/lib/maths/unittest/CBasicStatisticsTest.cc @@ -27,6 +27,7 @@ namespace { +using TDoubleVec = std::vector; using TMeanAccumulator = ml::maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = ml::maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using TMeanVarSkewAccumulator = @@ -72,6 +73,8 @@ CppUnit::Test* CBasicStatisticsTest::suite() { &CBasicStatisticsTest::testCovariancesLedoitWolf)); suiteOfTests->addTest(new CppUnit::TestCaller( "CBasicStatisticsTest::testMedian", &CBasicStatisticsTest::testMedian)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBasicStatisticsTest::testMad", &CBasicStatisticsTest::testMad)); suiteOfTests->addTest(new CppUnit::TestCaller( "CBasicStatisticsTest::testOrderStatistics", &CBasicStatisticsTest::testOrderStatistics)); suiteOfTests->addTest(new CppUnit::TestCaller( @@ -93,8 +96,6 @@ void CBasicStatisticsTest::testMean() { } void CBasicStatisticsTest::testCentralMoments() { - using TDoubleVec = std::vector; - LOG_DEBUG(<< "Test mean double"); { double samples[] = {0.9, 10.0, 5.6, 1.23, -12.3, 7.2, 0.0, 1.2}; @@ -703,7 +704,6 @@ void CBasicStatisticsTest::testCentralMoments() { void CBasicStatisticsTest::testVectorCentralMoments() { using TDouble2Vec = ml::core::CSmallVector; - using TDoubleVec = std::vector; { TMeanAccumulator2Vec moments1(2); @@ -958,7 +958,6 @@ void CBasicStatisticsTest::testCovariances() { } void CBasicStatisticsTest::testCovariancesLedoitWolf() { - using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; using TVector2 = ml::maths::CVectorNx1; using TVector2Vec = std::vector; @@ -1092,6 +1091,41 @@ void CBasicStatisticsTest::testMedian() { } } +void CBasicStatisticsTest::testMad() { + using TSizeVec = std::vector; + + // Edge cases 0, 1, 2 elements and > half values equal. + TDoubleVec samples; + samples.assign({5.0}); + CPPUNIT_ASSERT_EQUAL(0.0, ml::maths::CBasicStatistics::mad(samples)); + samples.assign({5.0, 6.0}); + CPPUNIT_ASSERT_EQUAL(0.5, ml::maths::CBasicStatistics::mad(samples)); + samples.assign({6.0, 6.0, 6.0, 2.0, -100.0}); + CPPUNIT_ASSERT_EQUAL(0.0, ml::maths::CBasicStatistics::mad(samples)); + samples.assign({6.0, 6.0, 6.0, 6.0, -100.0, 1.0}); + CPPUNIT_ASSERT_EQUAL(0.0, ml::maths::CBasicStatistics::mad(samples)); + + // Odd/even number of samples. + samples.assign({12.2, 11.8, 1.0, 30.2, 5.9, 209.0, -390.3, 37.0}); + CPPUNIT_ASSERT_DOUBLES_EQUAL(14.6, ml::maths::CBasicStatistics::mad(samples), 1e-15); + samples.assign({12.2, 11.8, 1.0, 30.2, 5.9, 209.0, -390.3, 37.0, 51.2}); + CPPUNIT_ASSERT_DOUBLES_EQUAL(18.0, ml::maths::CBasicStatistics::mad(samples), 1e-15); + + // Random. + ml::test::CRandomNumbers rng; + TSizeVec size; + for (std::size_t test = 0; test < 100; ++test) { + rng.generateUniformSamples(1, 40, 1, size); + rng.generateUniformSamples(0.0, 100.0, size[0], samples); + double mad{ml::maths::CBasicStatistics::mad(samples)}; + double median{ml::maths::CBasicStatistics::median(samples)}; + for (auto& sample : samples) { + sample = std::fabs(sample - median); + } + CPPUNIT_ASSERT_EQUAL(ml::maths::CBasicStatistics::median(samples), mad); + } +} + void CBasicStatisticsTest::testOrderStatistics() { // Test that the order statistics accumulators work for finding min and max // elements of a collection. @@ -1282,8 +1316,6 @@ void CBasicStatisticsTest::testOrderStatistics() { } void CBasicStatisticsTest::testMinMax() { - using TDoubleVec = std::vector; - TDoubleVec positive{1.0, 2.7, 4.0, 0.3, 11.7}; TDoubleVec negative{-3.7, -0.8, -18.2, -0.8}; TDoubleVec mixed{1.3, -8.0, 2.1}; diff --git a/lib/maths/unittest/CBasicStatisticsTest.h b/lib/maths/unittest/CBasicStatisticsTest.h index b1cad731e8..3cb1f906ba 100644 --- a/lib/maths/unittest/CBasicStatisticsTest.h +++ b/lib/maths/unittest/CBasicStatisticsTest.h @@ -16,6 +16,7 @@ class CBasicStatisticsTest : public CppUnit::TestFixture { void testCovariances(); void testCovariancesLedoitWolf(); void testMedian(); + void testMad(); void testOrderStatistics(); void testMinMax(); diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc index c87863fd81..27871b0f7c 100644 --- a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.cc @@ -145,14 +145,16 @@ void CTimeSeriesChangeDetectorTest::testLevelShift() { TGeneratorVec trends{constant, ramp, smoothDaily, weekends, spikeyDaily}; this->testChange( trends, maths::SChangeDescription::E_LevelShift, - [](TGenerator trend, core_t::TTime time) { return trend(time) + 0.5; }, 5.0, 16.0); + [](TGenerator trend, core_t::TTime time) { return trend(time) + 0.5; }, + 5.0, 0.0, 15.0); } void CTimeSeriesChangeDetectorTest::testLinearScale() { TGeneratorVec trends{smoothDaily, spikeyDaily}; this->testChange( trends, maths::SChangeDescription::E_LinearScale, - [](TGenerator trend, core_t::TTime time) { return 3.0 * trend(time); }, 3.0, 16.0); + [](TGenerator trend, core_t::TTime time) { return 3.0 * trend(time); }, + 3.0, 0.0, 15.0); } void CTimeSeriesChangeDetectorTest::testTimeShift() { @@ -161,12 +163,12 @@ void CTimeSeriesChangeDetectorTest::testTimeShift() { [](TGenerator trend, core_t::TTime time) { return trend(time - core::constants::HOUR); }, - -static_cast(core::constants::HOUR), 24.0); + -static_cast(core::constants::HOUR), 0.05, 23.0); this->testChange(trends, maths::SChangeDescription::E_TimeShift, [](TGenerator trend, core_t::TTime time) { return trend(time + core::constants::HOUR); }, - +static_cast(core::constants::HOUR), 24.0); + +static_cast(core::constants::HOUR), 0.05, 23.0); } void CTimeSeriesChangeDetectorTest::testPersist() { @@ -253,12 +255,14 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec& trends, maths::SChangeDescription::EDescription description, TChange applyChange, double expectedChange, - double expectedMeanBucketsToDetectChange) { + double maximumFalseNegatives, + double maximumMeanBucketsToDetectChange) { using TOptionalSize = boost::optional; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; test::CRandomNumbers rng; + double falseNegatives{0.0}; TMeanAccumulator meanBucketsToDetect; TDoubleVec samples; @@ -315,11 +319,16 @@ void CTimeSeriesChangeDetectorTest::testChange(const TGeneratorVec& trends, time += BUCKET_LENGTH; } - CPPUNIT_ASSERT(bucketsToDetect); - meanBucketsToDetect.add(static_cast(*bucketsToDetect)); + if (bucketsToDetect == boost::none) { + falseNegatives += 0.01; + } else { + meanBucketsToDetect.add(static_cast(*bucketsToDetect)); + } } + LOG_DEBUG(<< "false negatives = " << falseNegatives); LOG_DEBUG(<< "buckets to detect = " << maths::CBasicStatistics::mean(meanBucketsToDetect)); + CPPUNIT_ASSERT(falseNegatives <= maximumFalseNegatives); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanBucketsToDetect) < - expectedMeanBucketsToDetectChange); + maximumMeanBucketsToDetectChange); } diff --git a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h index 350cf927cc..bc46b2a631 100644 --- a/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h +++ b/lib/maths/unittest/CTimeSeriesChangeDetectorTest.h @@ -33,7 +33,8 @@ class CTimeSeriesChangeDetectorTest : public CppUnit::TestFixture { ml::maths::SChangeDescription::EDescription description, TChange applyChange, double expectedChange, - double expectedMeanBucketsToDetectChange); + double maximumFalseNegatives, + double maximumMeanBucketsToDetectChange); }; #endif // INCLUDED_CTimeSeriesChangeDetectorTest_h diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index c70fc186ae..dabedade13 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -189,6 +189,62 @@ void reinitializePrior(double learnRate, (*controllers)[1].reset(); } } + +class CChangeDebug { +public: + static const bool ENABLED{false}; + +public: + CChangeDebug(std::string file = "results.m") : m_File(file) { + if (ENABLED) { + m_Actual << "a = ["; + m_ModelBounds << "p = ["; + m_Forecast << "f = ["; + } + } + ~CChangeDebug() { + if (ENABLED) { + std::ofstream file; + file.open(m_File); + file << m_Actual.str() << "];\n"; + file << m_ModelBounds.str() << "];\n"; + file << m_Forecast.str() << "];\n"; + file << "hold on;\n"; + file << "plot(a);\n"; + file << "plot(p);\n"; + file << "plot([rows(a)-rows(f)+1:rows(a)],f);\n"; + } + } + void addValue(double value) { + if (ENABLED) { + m_Actual << value << std::endl; + } + } + void addValueAndPrediction(core_t::TTime time, + double value, + const maths::CUnivariateTimeSeriesModel& model) { + if (ENABLED) { + m_Actual << value << std::endl; + auto x = model.confidenceInterval( + time, 90.0, maths_t::CUnitWeights::unit(1)); + if (x.size() == 3) { + m_ModelBounds << x[0][0] << "," << x[1][0] << "," << x[2][0] << std::endl; + } + } + } + void addForecast(const maths::SErrorBar& errorBar) { + if (ENABLED) { + m_Forecast << errorBar.s_LowerBound << "," << errorBar.s_Predicted + << "," << errorBar.s_UpperBound << std::endl; + } + } + +private: + std::string m_File; + std::ostringstream m_Actual; + std::ostringstream m_ModelBounds; + std::ostringstream m_Forecast; +}; } void CTimeSeriesModelTest::testClone() { @@ -1936,20 +1992,6 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; - //std::ostringstream actual, modelBounds; - //actual << "r = ["; - //modelBounds << "x = ["; - //auto updateTestDebug = [&](core_t::TTime time, double value, - // const maths::CUnivariateTimeSeriesModel &model) - // { - // actual << value << std::endl; - // auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); - // if (x.size() == 3) - // { - // modelBounds << x[0][0] << "," << x[1][0] << "," << x[2][0] << std::endl; - // } - // }; - test::CRandomNumbers rng; LOG_DEBUG("Univariate: Piecwise Constant"); @@ -1960,6 +2002,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; + CChangeDebug debug("piecewise_constant.m"); // Add some data to the model. @@ -1972,7 +2015,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { level, 2.0, 300 + static_cast(2.0 * dl), samples); for (auto sample : samples) { updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); time += bucketLength; } } @@ -1980,7 +2023,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { rng.generateNormalSamples(level, 2.0, 100, samples); for (auto sample : samples) { updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); time += bucketLength; } @@ -1994,29 +2037,20 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { level, 2.0, 300 + static_cast(2.0 * dl), samples); expected.insert(expected.end(), samples.begin(), samples.end()); } - //std::for_each(expected.begin(), expected.end(), - // [&actual](double sample) { actual << sample << std::endl; }); + std::for_each(expected.begin(), expected.end(), + [&debug](double sample) { debug.addValue(sample); }); - //std::ofstream file; - //file.open("forecast.m"); - //file << actual.str() << "];"; - //file << modelBounds.str() << "];"; - //file << "y = ["; TDouble3VecVec forecast; auto pushErrorBar = [&](const maths::SErrorBar& errorBar) { forecast.push_back({errorBar.s_LowerBound, errorBar.s_Predicted, errorBar.s_UpperBound}); - //file << errorBar.s_LowerBound << "," - // << errorBar.s_Predicted << "," - // << errorBar.s_UpperBound << std::endl; + debug.addForecast(errorBar); }; std::string m; model.forecast(time, time + 800 * bucketLength, 90.0, {-1000.0}, {1000.0}, pushErrorBar, m); - //file << "];"; - double outOfBounds{0.0}; for (std::size_t i = 0u; i < forecast.size(); ++i) { CPPUNIT_ASSERT_DOUBLES_EQUAL(expected[i], forecast[i][1], 0.1 * expected[i]); @@ -2036,6 +2070,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { auto controllers = decayRateControllers(1); maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(), &controllers}; + CChangeDebug debug("saw_tooth.m"); // Add some data to the model. @@ -2047,7 +2082,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { while (value < 95.0) { rng.generateNormalSamples(0.0, 2.0, 1, noise); updateModel(time, value + noise[0], model); - //updateTestDebug(time, value + noise[0], model); + debug.addValueAndPrediction(time, value + noise[0], model); time += bucketLength; value += slope; } @@ -2057,7 +2092,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { for (std::size_t i = 0u; i < 1500; ++i) { rng.generateNormalSamples(0.0, 2.0, 1, noise); updateModel(time, value + noise[0], model); - //updateTestDebug(time, value + noise[0], model); + debug.addValueAndPrediction(time, value + noise[0], model); time += bucketLength; value += slope; } @@ -2070,7 +2105,7 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { while (expected.size() < 2000 && value < 95.0) { rng.generateNormalSamples(0.0, 2.0, 1, noise); expected.push_back(value + noise[0]); - //actual << value + noise[0] << std::endl; + debug.addValue(value + noise[0]); value += slope; } value = 5.0; @@ -2078,26 +2113,17 @@ void CTimeSeriesModelTest::testStepChangeDiscontinuities() { // Test forecasting. - //std::ofstream file; - //file.open("forecast.m"); - //file << actual.str() << "];"; - //file << modelBounds.str() << "];"; - //file << "y = ["; TDouble3VecVec forecast; auto pushErrorBar = [&](const maths::SErrorBar& errorBar) { forecast.push_back({errorBar.s_LowerBound, errorBar.s_Predicted, errorBar.s_UpperBound}); - //file << errorBar.s_LowerBound << "," - // << errorBar.s_Predicted << "," - // << errorBar.s_UpperBound << std::endl; + debug.addForecast(errorBar); }; std::string m; model.forecast(time, time + 2000 * bucketLength, 90.0, {-1000.0}, {1000.0}, pushErrorBar, m); - //file << "];"; - double outOfBounds{0.0}; for (std::size_t i = 0u; i < forecast.size(); ++i) { outOfBounds += static_cast(expected[i] < forecast[i][0] || @@ -2125,20 +2151,6 @@ void CTimeSeriesModelTest::testLinearScaling() { {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; - //std::ostringstream actual, modelBounds; - //actual << "r = ["; - //modelBounds << "x = ["; - //auto updateTestDebug = [&](core_t::TTime time, double value, - // const maths::CUnivariateTimeSeriesModel &model) - // { - // actual << value << std::endl; - // auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); - // if (x.size() == 3) - // { - // modelBounds << x[0][0] << "," << x[1][0] << "," << x[2][0] << std::endl; - // } - // }; - test::CRandomNumbers rng; double noiseVariance{3.0}; @@ -2148,6 +2160,7 @@ void CTimeSeriesModelTest::testLinearScaling() { auto controllers = decayRateControllers(1); maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; + CChangeDebug debug; core_t::TTime time{0}; TDoubleVec samples; @@ -2155,7 +2168,7 @@ void CTimeSeriesModelTest::testLinearScaling() { for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); time += bucketLength; } @@ -2165,14 +2178,14 @@ void CTimeSeriesModelTest::testLinearScaling() { for (auto sample : samples) { sample = 0.3 * (12.0 + 10.0 * smoothDaily(time) + sample); updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 1500, samples); for (auto sample : samples) { sample = 0.3 * (12.0 + 10.0 * smoothDaily(time) + sample); updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); auto x = model.confidenceInterval( time, 90.0, maths_t::CUnitWeights::unit(1)); CPPUNIT_ASSERT(::fabs(sample - x[1][0]) < 1.2 * std::sqrt(noiseVariance)); @@ -2186,25 +2199,20 @@ void CTimeSeriesModelTest::testLinearScaling() { for (auto sample : samples) { sample = 2.0 * (12.0 + 10.0 * smoothDaily(time)) + sample; updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 400, samples); for (auto sample : samples) { sample = 2.0 * (12.0 + 10.0 * smoothDaily(time)) + sample; updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); auto x = model.confidenceInterval( time, 90.0, maths_t::CUnitWeights::unit(1)); CPPUNIT_ASSERT(std::fabs(sample - x[1][0]) < 3.1 * std::sqrt(noiseVariance)); CPPUNIT_ASSERT(std::fabs(x[2][0] - x[0][0]) < 3.3 * std::sqrt(noiseVariance)); time += bucketLength; } - - //std::ofstream file; - //file.open("bounds.m"); - //file << actual.str() << "];"; - //file << modelBounds.str() << "];"; } void CTimeSeriesModelTest::testDaylightSaving() { @@ -2217,20 +2225,6 @@ void CTimeSeriesModelTest::testDaylightSaving() { {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; - //std::ostringstream actual, modelBounds; - //actual << "r = ["; - //modelBounds << "x = ["; - //auto updateTestDebug = [&](core_t::TTime time, double value, - // const maths::CUnivariateTimeSeriesModel &model) - // { - // actual << value << std::endl; - // auto x = model.confidenceInterval(time, 90.0, {maths_t::E_SampleCountWeight}, {{1.0}}); - // if (x.size() == 3) - // { - // modelBounds << x[0][0] << "," << x[1][0] << "," << x[2][0] << std::endl; - // } - // }; - test::CRandomNumbers rng; core_t::TTime hour{core::constants::HOUR}; @@ -2241,6 +2235,7 @@ void CTimeSeriesModelTest::testDaylightSaving() { auto controllers = decayRateControllers(1); maths::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; + CChangeDebug debug; core_t::TTime time{0}; TDoubleVec samples; @@ -2248,7 +2243,7 @@ void CTimeSeriesModelTest::testDaylightSaving() { for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); time += bucketLength; } @@ -2258,14 +2253,14 @@ void CTimeSeriesModelTest::testDaylightSaving() { for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time + hour); updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 1500, samples); for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time + hour); updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); CPPUNIT_ASSERT_EQUAL(hour, model.trendModel().timeShift()); auto x = model.confidenceInterval( time, 90.0, maths_t::CUnitWeights::unit(1)); @@ -2280,14 +2275,14 @@ void CTimeSeriesModelTest::testDaylightSaving() { for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 400, samples); for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); - //updateTestDebug(time, sample, model); + debug.addValueAndPrediction(time, sample, model); CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), model.trendModel().timeShift()); auto x = model.confidenceInterval( time, 90.0, maths_t::CUnitWeights::unit(1)); @@ -2295,11 +2290,6 @@ void CTimeSeriesModelTest::testDaylightSaving() { CPPUNIT_ASSERT(std::fabs(x[2][0] - x[0][0]) < 3.9 * std::sqrt(noiseVariance)); time += bucketLength; } - - //std::ofstream file; - //file.open("bounds.m"); - //file << actual.str() << "];"; - //file << modelBounds.str() << "];"; } CppUnit::Test* CTimeSeriesModelTest::suite() {