From 7a0767c5a0c3358ae90af16e3569dc940560f2aa Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@users.noreply.github.com>
Date: Thu, 2 Nov 2023 22:59:59 +0000
Subject: [PATCH 1/6] [ML] Improve forecasting for time series with step
 changes (#2591)

We model the level of a time series which we've observed having step discontinuities via a Markov process
for forecasting. Specifically, we estimate the historical step size distribution and the distribution of the steps
in time and as a function of the time series value. For this second part we use an online naive Bayes model
to estimate the probability that at any given point in a roll out for forecasting we will get a step.

This approach generally works well unless we're in the tails of the distribution values we've observed for
the time series historically when we roll out. In this case, our prediction probability are very sensitive to the
tail behaviour of the distributions we fit to the time series values where we saw a step and sometimes we
predict far too many steps as a result. We can detect this case: when we're in the tails of time series value
distribution.

This change does this and stops predicting changes in such cases, which avoids pathologies. This fixes #2466.
---
 docs/CHANGELOG.asciidoc                       |   7 ++
 include/maths/common/CNaiveBayes.h            |  74 +++++++++---
 lib/core/CStateRestoreTraverser.cc            |   3 +-
 lib/maths/common/CNaiveBayes.cc               | 111 +++++++++---------
 lib/maths/common/unittest/CLbfgsTest.cc       |   4 +-
 lib/maths/common/unittest/CNaiveBayesTest.cc  |  76 +++++++++---
 lib/maths/time_series/CTrendComponent.cc      |  43 ++++++-
 .../unittest/CTrendComponentTest.cc           |  83 +++++++++++--
 lib/model/unittest/CMetricDataGathererTest.cc |   5 +-
 .../CMetricPopulationDataGathererTest.cc      |   7 +-
 lib/model/unittest/CResourceMonitorTest.cc    |   3 -
 11 files changed, 309 insertions(+), 107 deletions(-)
diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
index 6162fee0bb..a7c3d4c8e5 100644
--- a/docs/CHANGELOG.asciidoc
+++ b/docs/CHANGELOG.asciidoc
@@ -28,6 +28,13 @@
 
 //=== Regressions
 
+== {es} version 7.17.15
+
+=== Enhancements
+
+* Improve forecasting for time series with step changes. (See {ml-pull}#2591[2591],
+  issue: {ml-issue}2466[#2466]).
+
 == {es} version 7.17.13
 
 === Enhancements
diff --git a/include/maths/common/CNaiveBayes.h b/include/maths/common/CNaiveBayes.h
index d9948766e7..6cf60aec37 100644
--- a/include/maths/common/CNaiveBayes.h
+++ b/include/maths/common/CNaiveBayes.h
@@ -154,19 +154,43 @@ class MATHS_COMMON_EXPORT CNaiveBayesFeatureDensityFromPrior final
     TPriorPtr m_Prior;
 };
 
+//! \brief Enables using custom feature weights in class prediction.
+class CNaiveBayesFeatureWeight {
+public:
+    virtual ~CNaiveBayesFeatureWeight() = default;
+    virtual void add(std::size_t class_, double logLikelihood) = 0;
+    virtual double calculate() const = 0;
+};
+
 //! \brief Implements a Naive Bayes classifier.
 class MATHS_COMMON_EXPORT CNaiveBayes {
 public:
+    using TDoubleDoublePr = std::pair<double, double>;
     using TDoubleSizePr = std::pair<double, std::size_t>;
     using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
+    using TDoubleSizePrVecDoublePr = std::pair<TDoubleSizePrVec, double>;
     using TDouble1Vec = core::CSmallVector<double, 1>;
     using TDouble1VecVec = std::vector<TDouble1Vec>;
-    using TOptionalDouble = boost::optional<double>;
+    using TFeatureWeightProvider = std::function<CNaiveBayesFeatureWeight&()>;
+
+private:
+    //! \brief All features have unit weight in class prediction.
+    class CUnitFeatureWeight : public CNaiveBayesFeatureWeight {
+    public:
+        void add(std::size_t, double) override {}
+        double calculate() const override { return 1.0; }
+    };
+
+    class CUnitFeatureWeightProvider {
+    public:
+        CUnitFeatureWeight& operator()() const { return m_UnitWeight; }
+
+    private:
+        mutable CUnitFeatureWeight m_UnitWeight;
+    };
 
 public:
-    explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
-                         double decayRate = 0.0,
-                         TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble());
+    explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate = 0.0);
     CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
                 const SDistributionRestoreParams& params,
                 core::CStateRestoreTraverser& traverser);
@@ -184,6 +208,9 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     //! Check if any training data has been added initialized.
     bool initialized() const;
 
+    //! Get the number of classes.
+    std::size_t numberClasses() const;
+
     //! This can be used to optionally seed the class counts
     //! with \p counts. These are added on to data class counts
     //! to compute the class posterior probabilities.
@@ -210,27 +237,53 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     //!
     //! \param[in] n The number of class probabilities to estimate.
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    TDoubleSizePrVec highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const;
+    TDoubleSizePrVecDoublePr highestClassProbabilities(
+        std::size_t n,
+        const TDouble1VecVec& x,
+        const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
 
     //! Get the probability of the class labeled \p label for \p x.
     //!
     //! \param[in] label The label of the class of interest.
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
+    //! conditional distributions.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    double classProbability(std::size_t label, const TDouble1VecVec& x) const;
+    TDoubleDoublePr classProbability(std::size_t label,
+                                     const TDouble1VecVec& x,
+                                     const TFeatureWeightProvider& weightProvider =
+                                         CUnitFeatureWeightProvider{}) const;
 
     //! Get the probabilities of all the classes for \p x.
     //!
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
+    //! A feature is missing is indicated by passing an empty vector
+    //! for that feature.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    TDoubleSizePrVec classProbabilities(const TDouble1VecVec& x) const;
+    TDoubleSizePrVecDoublePr
+    classProbabilities(const TDouble1VecVec& x,
+                       const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
 
     //! Debug the memory used by this object.
     void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const;
@@ -298,13 +351,6 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     bool validate(const TDouble1VecVec& x) const;
 
 private:
-    //! It is not always appropriate to use features with very low
-    //! probability in all classes to discriminate: the class choice
-    //! will be very sensitive to the underlying conditional density
-    //! model. This is a cutoff (for the minimum maximum class log
-    //! likelihood) in order to use a feature.
-    TOptionalDouble m_MinMaxLogLikelihoodToUseFeature;
-
     //! Controls the rate at which data are aged out.
     double m_DecayRate;
 
diff --git a/lib/core/CStateRestoreTraverser.cc b/lib/core/CStateRestoreTraverser.cc
index bad15cea60..a78ed3261f 100644
--- a/lib/core/CStateRestoreTraverser.cc
+++ b/lib/core/CStateRestoreTraverser.cc
@@ -18,8 +18,7 @@ namespace core {
 CStateRestoreTraverser::CStateRestoreTraverser() : m_BadState(false) {
 }
 
-CStateRestoreTraverser::~CStateRestoreTraverser() {
-}
+CStateRestoreTraverser::~CStateRestoreTraverser() = default;
 
 bool CStateRestoreTraverser::haveBadState() const {
     return m_BadState;
diff --git a/lib/maths/common/CNaiveBayes.cc b/lib/maths/common/CNaiveBayes.cc
index 3a0792121a..54b7a1b126 100644
--- a/lib/maths/common/CNaiveBayes.cc
+++ b/lib/maths/common/CNaiveBayes.cc
@@ -36,8 +36,6 @@ namespace {
 const core::TPersistenceTag PRIOR_TAG{"a", "prior"};
 const core::TPersistenceTag CLASS_LABEL_TAG{"b", "class_label"};
 const core::TPersistenceTag CLASS_MODEL_TAG{"c", "class_model"};
-const core::TPersistenceTag MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG{
-    "d", "min_max_likelihood_to_use_feature"};
 const core::TPersistenceTag COUNT_TAG{"e", "count"};
 const core::TPersistenceTag CONDITIONAL_DENSITY_FROM_PRIOR_TAG{"f", "conditional_density_from_prior"};
 }
@@ -135,24 +133,26 @@ std::string CNaiveBayesFeatureDensityFromPrior::print() const {
     return result;
 }
 
-CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
-                         double decayRate,
-                         TOptionalDouble minMaxLogLikelihoodToUseFeature)
-    : m_MinMaxLogLikelihoodToUseFeature{minMaxLogLikelihoodToUseFeature},
-      m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
+CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate)
+    : m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
 }
 
 CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
                          const SDistributionRestoreParams& params,
                          core::CStateRestoreTraverser& traverser)
     : m_DecayRate{params.s_DecayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
-    traverser.traverseSubLevel(std::bind(&CNaiveBayes::acceptRestoreTraverser, this,
-                                         std::cref(params), std::placeholders::_1));
+    // If we persist before we create class conditional distributions we will
+    // not have anything to restore and hasSubLevel will be false. Trying to
+    // restore sets the traverser state to bad so we need to handle explicitly.
+    if (traverser.hasSubLevel() && traverser.traverseSubLevel([&](auto& traverser_) {
+            return this->acceptRestoreTraverser(params, traverser_);
+        }) == false) {
+        traverser.setBadState();
+    }
 }
 
 CNaiveBayes::CNaiveBayes(const CNaiveBayes& other)
-    : m_MinMaxLogLikelihoodToUseFeature{other.m_MinMaxLogLikelihoodToUseFeature},
-      m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
+    : m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
     for (const auto& class_ : other.m_ClassConditionalDensities) {
         m_ClassConditionalDensities.emplace(class_.first, class_.second);
     }
@@ -170,9 +170,6 @@ bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams& param
                                                  std::ref(class_), std::cref(params),
                                                  std::placeholders::_1)),
             m_ClassConditionalDensities.emplace(label, std::move(class_)))
-        RESTORE_SETUP_TEARDOWN(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, double value,
-                               core::CStringUtils::stringToType(traverser.value(), value),
-                               m_MinMaxLogLikelihoodToUseFeature.reset(value))
     } while (traverser.next());
     return true;
 }
@@ -195,12 +192,6 @@ void CNaiveBayes::acceptPersistInserter(core::CStatePersistInserter& inserter) c
                                                         std::ref(class_->second),
                                                         std::placeholders::_1));
     }
-
-    if (m_MinMaxLogLikelihoodToUseFeature) {
-        inserter.insertValue(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG,
-                             *m_MinMaxLogLikelihoodToUseFeature,
-                             core::CIEEE754::E_SinglePrecision);
-    }
 }
 
 CNaiveBayes& CNaiveBayes::operator=(const CNaiveBayes& other) {
@@ -215,11 +206,10 @@ void CNaiveBayes::swap(CNaiveBayes& other) {
     std::swap(m_DecayRate, other.m_DecayRate);
     m_Exemplar.swap(other.m_Exemplar);
     m_ClassConditionalDensities.swap(other.m_ClassConditionalDensities);
-    std::swap(m_MinMaxLogLikelihoodToUseFeature, other.m_MinMaxLogLikelihoodToUseFeature);
 }
 
 bool CNaiveBayes::initialized() const {
-    return m_ClassConditionalDensities.size() > 0 &&
+    return m_ClassConditionalDensities.empty() == false &&
            std::all_of(m_ClassConditionalDensities.begin(),
                        m_ClassConditionalDensities.end(),
                        [](const std::pair<std::size_t, CClass>& class_) {
@@ -227,6 +217,10 @@ bool CNaiveBayes::initialized() const {
                        });
 }
 
+std::size_t CNaiveBayes::numberClasses() const {
+    return m_ClassConditionalDensities.size();
+}
+
 void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec& counts) {
     for (const auto& count : counts) {
         m_ClassConditionalDensities.emplace(count.second, CClass{count.first});
@@ -234,7 +228,7 @@ void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec& counts) {
 }
 
 void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec& x) {
-    if (!this->validate(x)) {
+    if (this->validate(x) == false) {
         return;
     }
 
@@ -249,7 +243,7 @@ void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec&
 
     bool updateCount{false};
     for (std::size_t i = 0; i < x.size(); ++i) {
-        if (x[i].size() > 0) {
+        if (x[i].empty() == false) {
             class_.conditionalDensities()[i]->add(x[i]);
             updateCount = true;
         }
@@ -280,62 +274,74 @@ void CNaiveBayes::propagateForwardsByTime(double time) {
     }
 }
 
-CNaiveBayes::TDoubleSizePrVec
-CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const {
-    TDoubleSizePrVec p(this->classProbabilities(x));
+CNaiveBayes::TDoubleSizePrVecDoublePr
+CNaiveBayes::highestClassProbabilities(std::size_t n,
+                                       const TDouble1VecVec& x,
+                                       const TFeatureWeightProvider& weightProvider) const {
+    auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
     n = std::min(n, p.size());
-    std::sort(p.begin(), p.begin() + n, std::greater<TDoubleSizePr>());
-    return TDoubleSizePrVec{p.begin(), p.begin() + n};
+    std::sort(p.begin(), p.begin() + n, std::greater<>());
+    return {TDoubleSizePrVec{p.begin(), p.begin() + n}, minFeatureWeight};
 }
 
-double CNaiveBayes::classProbability(std::size_t label, const TDouble1VecVec& x) const {
-    TDoubleSizePrVec p(this->classProbabilities(x));
+CNaiveBayes::TDoubleDoublePr
+CNaiveBayes::classProbability(std::size_t label,
+                              const TDouble1VecVec& x,
+                              const TFeatureWeightProvider& weightProvider) const {
+    auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
     auto i = std::find_if(p.begin(), p.end(), [label](const TDoubleSizePr& p_) {
         return p_.second == label;
     });
-    return i == p.end() ? 0.0 : i->first;
+    return {i == p.end() ? 0.0 : i->first, minFeatureWeight};
 }
 
-CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecVec& x) const {
-    if (!this->validate(x)) {
-        return {};
+CNaiveBayes::TDoubleSizePrVecDoublePr
+CNaiveBayes::classProbabilities(const TDouble1VecVec& x,
+                                const TFeatureWeightProvider& weightProvider) const {
+    if (this->validate(x) == false) {
+        return {{}, 0.0};
     }
     if (m_ClassConditionalDensities.empty()) {
         LOG_ERROR(<< "Trying to compute class probabilities without supplying training data");
-        return {};
+        return {{}, 0.0};
     }
 
     using TDoubleVec = std::vector<double>;
-    using TMaxAccumulator = CBasicStatistics::SMax<double>::TAccumulator;
 
     TDoubleSizePrVec p;
     p.reserve(m_ClassConditionalDensities.size());
     for (const auto& class_ : m_ClassConditionalDensities) {
         p.emplace_back(CTools::fastLog(class_.second.count()), class_.first);
     }
+    double minFeatureWeight{1.0};
 
     TDoubleVec logLikelihoods;
     for (std::size_t i = 0; i < x.size(); ++i) {
-        if (x[i].size() > 0) {
-            TMaxAccumulator maxLogLikelihood;
+        if (x[i].empty() == false) {
+            auto& featureWeight = weightProvider();
             logLikelihoods.clear();
             for (const auto& class_ : m_ClassConditionalDensities) {
                 const auto& density = class_.second.conditionalDensities()[i];
                 double logLikelihood{density->logValue(x[i])};
                 double logMaximumLikelihood{density->logMaximumValue()};
-                maxLogLikelihood.add(logLikelihood - logMaximumLikelihood);
                 logLikelihoods.push_back(logLikelihood);
+                featureWeight.add(class_.first, logLikelihood - logMaximumLikelihood);
             }
-            double weight{1.0};
-            if (m_MinMaxLogLikelihoodToUseFeature) {
-                weight = CTools::logisticFunction(
-                    (maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) /
-                        std::fabs(*m_MinMaxLogLikelihoodToUseFeature),
-                    0.1);
-            }
+
+            // We compute the class c_i probability using
+            //
+            //    p(c_i | x) = exp(sum_i{w_j * log(L(x_j | c_i))}) / Z * p(c_i).
+            //
+            // Any feature whose weight < 1 has its significance dropped in class
+            // selection, effectively we use the w_i'th root of the log-likelihood
+            // which tends to 1 for all values if w_i is small enough. This can be
+            // used to ignore features that for which x is the extreme tails of the
+            // class conditional distribution.
+            double featureWeight_{featureWeight.calculate()};
             for (std::size_t j = 0; j < logLikelihoods.size(); ++j) {
-                p[j].first += weight * logLikelihoods[j];
+                p[j].first += featureWeight_ * logLikelihoods[j];
             }
+            minFeatureWeight = std::min(minFeatureWeight, featureWeight_);
         }
     }
 
@@ -349,7 +355,7 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecV
         pc.first /= Z;
     }
 
-    return p;
+    return {std::move(p), minFeatureWeight};
 }
 
 void CNaiveBayes::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const {
@@ -363,8 +369,7 @@ std::size_t CNaiveBayes::memoryUsage() const {
            core::CMemory::dynamicSize(m_ClassConditionalDensities);
 }
 
-uint64_t CNaiveBayes::checksum(uint64_t seed) const {
-    CChecksum::calculate(seed, m_MinMaxLogLikelihoodToUseFeature);
+std::uint64_t CNaiveBayes::checksum(std::uint64_t seed) const {
     CChecksum::calculate(seed, m_DecayRate);
     CChecksum::calculate(seed, m_Exemplar);
     return CChecksum::calculate(seed, m_ClassConditionalDensities);
@@ -386,7 +391,7 @@ std::string CNaiveBayes::print() const {
 bool CNaiveBayes::validate(const TDouble1VecVec& x) const {
     auto class_ = m_ClassConditionalDensities.begin();
     if (class_ != m_ClassConditionalDensities.end() &&
-        class_->second.conditionalDensities().size() > 0 &&
+        class_->second.conditionalDensities().empty() == false &&
         class_->second.conditionalDensities().size() != x.size()) {
         LOG_ERROR(<< "Unexpected feature vector: " << core::CContainerPrinter::print(x));
         return false;
@@ -423,7 +428,7 @@ bool CNaiveBayes::CClass::acceptRestoreTraverser(const SDistributionRestoreParam
 void CNaiveBayes::CClass::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
     inserter.insertValue(COUNT_TAG, m_Count, core::CIEEE754::E_SinglePrecision);
     for (const auto& density : m_ConditionalDensities) {
-        if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get())) {
+        if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get()) != nullptr) {
             inserter.insertLevel(CONDITIONAL_DENSITY_FROM_PRIOR_TAG,
                                  std::bind(&CNaiveBayesFeatureDensity::acceptPersistInserter,
                                            density.get(), std::placeholders::_1));
diff --git a/lib/maths/common/unittest/CLbfgsTest.cc b/lib/maths/common/unittest/CLbfgsTest.cc
index f7de9827e2..38076718c4 100644
--- a/lib/maths/common/unittest/CLbfgsTest.cc
+++ b/lib/maths/common/unittest/CLbfgsTest.cc
@@ -228,12 +228,12 @@ BOOST_AUTO_TEST_CASE(testConstrainedMinimize) {
         std::tie(x, fx) = lbfgs.constrainedMinimize(f, g, a, b, x0, 0.2);
 
         BOOST_REQUIRE_EQUAL(fx, static_cast<double>(f(x)));
-        BOOST_REQUIRE_CLOSE_ABSOLUTE(static_cast<double>(f(xmin)), fx, 1e-3);
+        BOOST_REQUIRE_CLOSE_ABSOLUTE(static_cast<double>(f(xmin)), fx, 5e-3);
 
         ferror += std::fabs(fx - f(xmin)) / 100.0;
     }
 
-    BOOST_REQUIRE_CLOSE_ABSOLUTE(0.0, ferror, 1e-5);
+    BOOST_REQUIRE_CLOSE_ABSOLUTE(0.0, ferror, 5e-5);
 }
 
 BOOST_AUTO_TEST_CASE(testMinimizeWithVerySmallGradient) {
diff --git a/lib/maths/common/unittest/CNaiveBayesTest.cc b/lib/maths/common/unittest/CNaiveBayesTest.cc
index 8d691ea198..e76a3fec7c 100644
--- a/lib/maths/common/unittest/CNaiveBayesTest.cc
+++ b/lib/maths/common/unittest/CNaiveBayesTest.cc
@@ -28,11 +28,13 @@
 #include <cmath>
 #include <memory>
 
+namespace {
 BOOST_AUTO_TEST_SUITE(CNaiveBayesTest)
 
 using namespace ml;
 
 using TDoubleVec = std::vector<double>;
+using TDoubleVecVec = std::vector<TDoubleVec>;
 using TDouble1Vec = core::CSmallVector<double, 1>;
 using TDouble1VecVec = std::vector<TDouble1Vec>;
 using TDoubleSizePr = std::pair<double, std::size_t>;
@@ -40,6 +42,12 @@ using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
 using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator;
 using TMeanVarAccumulator = maths::common::CBasicStatistics::SSampleMeanVar<double>::TAccumulator;
 
+class CTestFeatureWeight : public maths::common::CNaiveBayesFeatureWeight {
+public:
+    void add(std::size_t, double) override {}
+    double calculate() const override { return 1e-3; }
+};
+
 BOOST_AUTO_TEST_CASE(testClassification) {
     // We'll test classification using Gaussian naive Bayes. We
     // test:
@@ -55,7 +63,7 @@ BOOST_AUTO_TEST_CASE(testClassification) {
 
     test::CRandomNumbers rng;
 
-    TDoubleVec trainingData[4];
+    TDoubleVecVec trainingData(4);
     rng.generateNormalSamples(0.0, 12.0, 100, trainingData[0]);
     rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]);
     rng.generateNormalSamples(3.0, 14.0, 200, trainingData[2]);
@@ -91,7 +99,7 @@ BOOST_AUTO_TEST_CASE(testClassification) {
         //   - P(1) = (initialCount + 100) / (2*initialCount + 300)
         //   - P(2) = (initialCount + 200) / (2*initialCount + 300)
 
-        TDoubleSizePrVec probabilities(nb.highestClassProbabilities(2, {{}, {}}));
+        auto[probabilities, confidence](nb.highestClassProbabilities(2, {{}, {}}));
 
         double P1{(initialCount + 100.0) / (2.0 * initialCount + 300.0)};
         double P2{(initialCount + 200.0) / (2.0 * initialCount + 300.0)};
@@ -156,19 +164,22 @@ BOOST_AUTO_TEST_CASE(testClassification) {
                       maths::common::CTools::safePdf(class1[1], xtest[i + 1])};
             double p2{P2 * maths::common::CTools::safePdf(class2[0], xtest[i]) *
                       maths::common::CTools::safePdf(class2[1], xtest[i + 1])};
-            probabilities = nb.highestClassProbabilities(2, {{xtest[i]}, {xtest[i + 1]}});
+            std::tie(probabilities, confidence) =
+                nb.highestClassProbabilities(2, {{xtest[i]}, {xtest[i + 1]}});
             test(p1, p2, probabilities, meanErrors[0]);
 
             // Miss out the first feature value.
             p1 = P1 * maths::common::CTools::safePdf(class1[1], xtest[i + 1]);
             p2 = P2 * maths::common::CTools::safePdf(class2[1], xtest[i + 1]);
-            probabilities = nb.highestClassProbabilities(2, {{}, {xtest[i + 1]}});
+            std::tie(probabilities, confidence) =
+                nb.highestClassProbabilities(2, {{}, {xtest[i + 1]}});
             test(p1, p2, probabilities, meanErrors[1]);
 
             // Miss out the second feature value.
             p1 = P1 * maths::common::CTools::safePdf(class1[0], xtest[i]);
             p2 = P2 * maths::common::CTools::safePdf(class2[0], xtest[i]);
-            probabilities = nb.highestClassProbabilities(2, {{xtest[i]}, {}});
+            std::tie(probabilities, confidence) =
+                nb.highestClassProbabilities(2, {{xtest[i]}, {}});
             test(p1, p2, probabilities, meanErrors[2]);
         }
 
@@ -193,7 +204,7 @@ BOOST_AUTO_TEST_CASE(testUninitialized) {
     maths::common::CNaiveBayes nb{maths::common::CNaiveBayes{
         maths::common::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}};
 
-    TDoubleVec trainingData[2];
+    TDoubleVecVec trainingData(2);
 
     for (std::size_t i = 0; i < 2; ++i) {
         BOOST_REQUIRE_EQUAL(false, nb.initialized());
@@ -224,7 +235,7 @@ BOOST_AUTO_TEST_CASE(testPropagationByTime) {
         maths::common::CNaiveBayes{
             maths::common::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}};
 
-    TDoubleVec trainingData[4];
+    TDoubleVecVec trainingData(4);
     for (std::size_t i = 0; i < 1000; ++i) {
         double x{static_cast<double>(i)};
         rng.generateNormalSamples(0.02 * x - 14.0, 16.0, 1, trainingData[0]);
@@ -247,32 +258,68 @@ BOOST_AUTO_TEST_CASE(testPropagationByTime) {
 
     {
         TDoubleSizePrVec probabilities[]{
-            nb[0].highestClassProbabilities(2, {{-10.0}, {-10.0}}),
-            nb[1].highestClassProbabilities(2, {{-10.0}, {-10.0}})};
+            nb[0].highestClassProbabilities(2, {{-10.0}, {-10.0}}).first,
+            nb[1].highestClassProbabilities(2, {{-10.0}, {-10.0}}).first};
         LOG_DEBUG(<< "Aged class probabilities = "
                   << core::CContainerPrinter::print(probabilities[0]));
         LOG_DEBUG(<< "Class probabilities = "
                   << core::CContainerPrinter::print(probabilities[1]));
-        BOOST_REQUIRE_EQUAL(std::size_t(2), probabilities[0][0].second);
+        BOOST_REQUIRE_EQUAL(2, probabilities[0][0].second);
         BOOST_TEST_REQUIRE(probabilities[0][0].first > 0.99);
         BOOST_REQUIRE_EQUAL(std::size_t(1), probabilities[1][0].second);
         BOOST_TEST_REQUIRE(probabilities[1][0].first > 0.95);
     }
     {
         TDoubleSizePrVec probabilities[]{
-            nb[0].highestClassProbabilities(2, {{10.0}, {10.0}}),
-            nb[1].highestClassProbabilities(2, {{10.0}, {10.0}})};
+            nb[0].highestClassProbabilities(2, {{10.0}, {10.0}}).first,
+            nb[1].highestClassProbabilities(2, {{10.0}, {10.0}}).first};
         LOG_DEBUG(<< "Aged class probabilities = "
                   << core::CContainerPrinter::print(probabilities[0]));
         LOG_DEBUG(<< "Class probabilities = "
                   << core::CContainerPrinter::print(probabilities[1]));
-        BOOST_REQUIRE_EQUAL(std::size_t(1), probabilities[0][0].second);
+        BOOST_REQUIRE_EQUAL(1, probabilities[0][0].second);
         BOOST_TEST_REQUIRE(probabilities[0][0].first > 0.99);
         BOOST_REQUIRE_EQUAL(std::size_t(2), probabilities[1][0].second);
         BOOST_TEST_REQUIRE(probabilities[1][0].first > 0.95);
     }
 }
 
+BOOST_AUTO_TEST_CASE(testExtrapolation) {
+    // Test that:
+    //   1. Applying low feature weights means the conditional probabilies
+    //      of all classes tend to 1 / "number of classes",
+    //   2. That the number returned as a confidence in the probabilities
+    //      is the minimum feature weight.
+
+    test::CRandomNumbers rng;
+
+    TDoubleVecVec trainingData(2);
+    rng.generateNormalSamples(0.0, 12.0, 100, trainingData[0]);
+    rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]);
+
+    maths::common::CNormalMeanPrecConjugate normal{
+        maths::common::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)};
+    maths::common::CNaiveBayes nb{maths::common::CNaiveBayesFeatureDensityFromPrior(normal)};
+
+    for (auto x : trainingData[0]) {
+        nb.addTrainingDataPoint(0, {{x}});
+    }
+    for (auto x : trainingData[1]) {
+        nb.addTrainingDataPoint(1, {{x}});
+    }
+
+    auto weightProvider = [weight = CTestFeatureWeight()]() mutable->maths::common::CNaiveBayesFeatureWeight& {
+        return weight;
+    };
+    auto[probabilities, confidence] = nb.classProbabilities({{30.0}}, weightProvider);
+    LOG_DEBUG(<< "p = " << probabilities << ", confidence = " << confidence);
+
+    BOOST_REQUIRE_EQUAL(2, probabilities.size());
+    BOOST_REQUIRE_CLOSE_ABSOLUTE(0.5, probabilities[0].first, 1e-2);
+    BOOST_REQUIRE_CLOSE_ABSOLUTE(0.5, probabilities[1].first, 1e-2);
+    BOOST_REQUIRE_EQUAL(1e-3, confidence);
+}
+
 BOOST_AUTO_TEST_CASE(testMemoryUsage) {
     // Check invariants.
 
@@ -316,7 +363,7 @@ BOOST_AUTO_TEST_CASE(testMemoryUsage) {
 BOOST_AUTO_TEST_CASE(testPersist) {
     test::CRandomNumbers rng;
 
-    TDoubleVec trainingData[4];
+    TDoubleVecVec trainingData(4);
     rng.generateNormalSamples(0.0, 12.0, 100, trainingData[0]);
     rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]);
     rng.generateNormalSamples(3.0, 14.0, 200, trainingData[2]);
@@ -367,3 +414,4 @@ BOOST_AUTO_TEST_CASE(testPersist) {
 }
 
 BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/lib/maths/time_series/CTrendComponent.cc b/lib/maths/time_series/CTrendComponent.cc
index e46160e461..1258875cf6 100644
--- a/lib/maths/time_series/CTrendComponent.cc
+++ b/lib/maths/time_series/CTrendComponent.cc
@@ -54,6 +54,27 @@ const core_t::TTime UNSET_TIME{0};
 const std::size_t NO_CHANGE_LABEL{0};
 const std::size_t LEVEL_CHANGE_LABEL{1};
 
+class CChangeForecastFeatureWeight : public common::CNaiveBayesFeatureWeight {
+public:
+    void add(std::size_t class_, double logLikelihood) override {
+        if (class_ == NO_CHANGE_LABEL) {
+            m_LogLikelihood = logLikelihood;
+        }
+    }
+
+    double calculate() const override {
+        // Downweight features for which we don't have sufficient examples
+        // of the time series not changing.
+        // Note that m_LogLikelihood = 0.5 * (x - m)^2 / sigma^2 so 4.5
+        // corresponds to the case the feature value is at the 3 sigma
+        // point of the conditional distribution.
+        return common::CTools::logisticFunction((4.5 + m_LogLikelihood) / 4.5, 0.1);
+    }
+
+private:
+    double m_LogLikelihood{0.0};
+};
+
 //! Get the desired weight for the regression model.
 double modelWeight(double targetDecayRate, double modelDecayRate) {
     return targetDecayRate == modelDecayRate
@@ -92,7 +113,7 @@ common::CNaiveBayesFeatureDensityFromPrior naiveBayesExemplar(double decayRate)
 
 common::CNaiveBayes initialProbabilityOfChangeModel(double decayRate) {
     return common::CNaiveBayes{naiveBayesExemplar(decayRate),
-                               TIME_SCALES[NUMBER_MODELS - 1] * decayRate, -20.0};
+                               TIME_SCALES[NUMBER_MODELS - 1] * decayRate};
 }
 
 common::CNormalMeanPrecConjugate initialMagnitudeOfChangeModel(double decayRate) {
@@ -296,11 +317,11 @@ void CTrendComponent::shiftLevel(double shift,
         m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL,
                                                              {{dt}, {value}});
     }
+    m_TimeOfLastLevelChange = time;
     for (std::size_t i = segments[last]; i < values.size(); ++i, time += bucketLength) {
         this->dontShiftLevel(time, common::CBasicStatistics::mean(values[i]));
     }
     m_MagnitudeOfLevelChangeModel.addSamples({magnitude}, maths_t::CUnitWeights::SINGLE_UNIT);
-    m_TimeOfLastLevelChange = time;
 }
 
 void CTrendComponent::dontShiftLevel(core_t::TTime time, double value) {
@@ -789,14 +810,26 @@ CTrendComponent::TDouble3Vec
 CTrendComponent::CForecastLevel::forecast(core_t::TTime time, double prediction, double confidence) {
     TDouble3Vec result{0.0, 0.0, 0.0};
 
-    if (m_Probability.initialized()) {
+    if (m_Probability.initialized() && m_Probability.numberClasses() > 1) {
         common::CSampling::uniformSample(0.0, 1.0, m_Levels.size(), m_Uniform01);
         bool reorder{false};
+        auto weightProvider = [weight =
+                                   CChangeForecastFeatureWeight{}]() mutable->common::CNaiveBayesFeatureWeight& {
+            weight = CChangeForecastFeatureWeight{};
+            return weight;
+        };
         for (std::size_t i = 0; i < m_Levels.size(); ++i) {
             double dt{static_cast<double>(time - m_TimesOfLastChange[i])};
             double x{m_Levels[i] + prediction};
-            double p{m_Probability.classProbability(LEVEL_CHANGE_LABEL, {{dt}, {x}})};
-            m_ProbabilitiesOfChange[i] = std::max(m_ProbabilitiesOfChange[i], p);
+            auto[p, pConfidence] = m_Probability.classProbability(
+                LEVEL_CHANGE_LABEL, {{dt}, {x}}, weightProvider);
+            // Here we decide whether to increase the probability we should have
+            // seen a step change for this rollout. If we are no longer confident
+            // in our predicted probability we do not predict changes based on
+            // the principle of least surprise.
+            if (pConfidence > 0.5) {
+                m_ProbabilitiesOfChange[i] = std::max(m_ProbabilitiesOfChange[i], p);
+            }
             if (m_Uniform01[i] < m_ProbabilitiesOfChange[i]) {
                 double stepMean{m_Magnitude.marginalLikelihoodMean()};
                 double stepVariance{m_Magnitude.marginalLikelihoodVariance()};
diff --git a/lib/maths/time_series/unittest/CTrendComponentTest.cc b/lib/maths/time_series/unittest/CTrendComponentTest.cc
index af9f91bf26..43bb0879f0 100644
--- a/lib/maths/time_series/unittest/CTrendComponentTest.cc
+++ b/lib/maths/time_series/unittest/CTrendComponentTest.cc
@@ -27,16 +27,16 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/tuple/tuple.hpp>
 
+#include <algorithm>
 #include <cmath>
 #include <fstream>
 #include <numeric>
 
+namespace {
 BOOST_AUTO_TEST_SUITE(CTrendComponentTest)
 
 using namespace ml;
 
-namespace {
-
 using TDoubleVec = std::vector<double>;
 using TDoubleVecVec = std::vector<TDoubleVec>;
 using TDouble1Vec = core::CSmallVector<double, 1>;
@@ -225,7 +225,6 @@ auto forecastErrors(ITR actual,
     return std::make_pair(maths::common::CBasicStatistics::mean(meanError),
                           maths::common::CBasicStatistics::mean(meanErrorAt95));
 }
-}
 
 BOOST_AUTO_TEST_CASE(testValueAndVariance) {
     // Check that the prediction bias is small in the long run
@@ -362,6 +361,71 @@ BOOST_AUTO_TEST_CASE(testForecast) {
     BOOST_TEST_REQUIRE(errorAt95 < 0.001);
 }
 
+BOOST_AUTO_TEST_CASE(testStepChangeForecasting) {
+    // A randomized test that forecasts of time series with step changes
+    // don't explode. We previously sometimes ran into issues when we
+    // extrapolated the feature distributions we use to predict steps.
+    // In such cases we would predict far too many steps leading to
+    // overly wide forecast bounds and unrealistic predictions.
+
+    using TSizeVec = std::vector<std::size_t>;
+
+    test::CRandomNumbers rng;
+    double interval{20.0};
+
+    maths::time_series::CTrendComponent::TFloatMeanAccumulatorVec values;
+
+    for (std::size_t t = 0; t < 100; ++t) {
+        TSizeVec changePoints;
+        rng.generateUniformSamples(0, 1000, 6, changePoints);
+        std::sort(changePoints.begin(), changePoints.end());
+        changePoints.push_back(1000);
+        TDoubleVec levels;
+        rng.generateUniformSamples(-0.5 * interval, 0.5 * interval, 7, levels);
+
+        maths::time_series::CTrendComponent trendModel{0.012};
+
+        TDoubleVec noise;
+        auto level = levels.begin();
+        auto changePoint = changePoints.begin();
+        core_t::TTime time{1672531200};
+        for (std::size_t i = 0; i < 1000; ++i, time += BUCKET_LENGTH) {
+            rng.generateNormalSamples(0.0, 0.25, 1, noise);
+            double value{*level + noise[0]};
+            trendModel.add(time, value);
+            values.emplace_back().add(value);
+            if (i == *changePoint) {
+                ++level;
+                ++changePoint;
+                double shift{*level - *(level - 1)};
+                core_t::TTime valuesStartTime{
+                    time - static_cast<core_t::TTime>(values.size()) * BUCKET_LENGTH};
+                TSizeVec segments{0, *changePoint - *(changePoint - 1) - 1,
+                                  *changePoint - *(changePoint - 1)};
+                TDoubleVec shifts{0.0, *level - *(level - 1)};
+                trendModel.shiftLevel(shift, valuesStartTime, BUCKET_LENGTH,
+                                      values, segments, shifts);
+                values.clear();
+            } else {
+                trendModel.dontShiftLevel(time, value);
+            }
+        }
+
+        TDouble3VecVec forecast;
+        trendModel.forecast(time, time + 200 * BUCKET_LENGTH, BUCKET_LENGTH, 90.0, false,
+                            [](core_t::TTime) { return TDouble3Vec(3, 0.0); },
+                            [&forecast](core_t::TTime, const TDouble3Vec& value) {
+                                forecast.push_back(value);
+                            });
+
+        // Check that the prediction is in the switching interval and
+        // the forecast confidence interval isn't too wide.
+        BOOST_TEST_REQUIRE(forecast.back()[1] > -0.75 * interval);
+        BOOST_TEST_REQUIRE(forecast.back()[1] < 0.75 * interval);
+        BOOST_TEST_REQUIRE(forecast.back()[2] - forecast.back()[0] < 3.5 * interval);
+    }
+}
+
 BOOST_AUTO_TEST_CASE(testPersist) {
     // Check that serialization is idempotent.
 
@@ -395,9 +459,9 @@ BOOST_AUTO_TEST_CASE(testPersist) {
     maths::common::SDistributionRestoreParams params{maths_t::E_ContinuousData, 0.1};
 
     maths::time_series::CTrendComponent restoredComponent{0.1};
-    traverser.traverseSubLevel(
-        std::bind(&maths::time_series::CTrendComponent::acceptRestoreTraverser,
-                  &restoredComponent, std::cref(params), std::placeholders::_1));
+    traverser.traverseSubLevel([&](auto& traverser_) {
+        return restoredComponent.acceptRestoreTraverser(params, traverser_);
+    });
 
     BOOST_REQUIRE_EQUAL(origComponent.checksum(), restoredComponent.checksum());
 
@@ -442,9 +506,9 @@ BOOST_AUTO_TEST_CASE(testUpgradeTo7p1) {
     core::CRapidXmlParser parser;
     BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(xml));
     core::CRapidXmlStateRestoreTraverser traverser(parser);
-    traverser.traverseSubLevel(
-        std::bind(&maths::time_series::CTrendComponent::acceptRestoreTraverser,
-                  &component, std::cref(params), std::placeholders::_1));
+    traverser.traverseSubLevel([&](auto& traverser_) {
+        return component.acceptRestoreTraverser(params, traverser_);
+    });
 
     test::CRandomNumbers rng;
 
@@ -457,3 +521,4 @@ BOOST_AUTO_TEST_CASE(testUpgradeTo7p1) {
 }
 
 BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc
index 845612fea6..ff9e3ba616 100644
--- a/lib/model/unittest/CMetricDataGathererTest.cc
+++ b/lib/model/unittest/CMetricDataGathererTest.cc
@@ -1469,13 +1469,13 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) {
     TStrVec influencerNames(std::begin(influencerNames_), std::end(influencerNames_));
     CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING,
                            EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING,
-                           influencerNames, KEY, features, startTime, 2u);
+                           influencerNames, KEY, features, startTime, 2);
 
     addPerson("p1", gatherer, m_ResourceMonitor, influencerNames.size());
     addPerson("p2", gatherer, m_ResourceMonitor, influencerNames.size());
 
     core_t::TTime bucketStart = startTime;
-    for (std::size_t i = 0u, b = 0; i < boost::size(data); ++i) {
+    for (std::size_t i = 0; i < std::size(data); ++i) {
         if (data[i].get<0>() >= bucketStart + bucketLength) {
             LOG_DEBUG(<< "*** processing bucket ***");
             TFeatureSizeFeatureDataPrVecPrVec featureData;
@@ -1514,7 +1514,6 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) {
             }
 
             bucketStart += bucketLength;
-            ++b;
         }
         for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) {
             addArrival(gatherer, m_ResourceMonitor, data[i].get<0>(),
diff --git a/lib/model/unittest/CMetricPopulationDataGathererTest.cc b/lib/model/unittest/CMetricPopulationDataGathererTest.cc
index e8f82d5c71..a175d9ee8a 100644
--- a/lib/model/unittest/CMetricPopulationDataGathererTest.cc
+++ b/lib/model/unittest/CMetricPopulationDataGathererTest.cc
@@ -977,10 +977,14 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) {
     TStrVec influencerNames(std::begin(influencerNames_), std::end(influencerNames_));
     CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING,
                            EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING,
-                           influencerNames, searchKey, features, startTime, 2u);
+                           influencerNames, searchKey, features, startTime, 2);
 
     core_t::TTime bucketStart = startTime;
+<<<<<<< HEAD
     for (std::size_t i = 0u, b = 0; i < boost::size(data); ++i) {
+=======
+    for (std::size_t i = 0; i < std::size(data); ++i) {
+>>>>>>> 5441e736 ([ML] Improve forecasting for time series with step changes (#2591))
         if (data[i].s_Time >= bucketStart + bucketLength) {
             LOG_DEBUG(<< "*** processing bucket ***");
             TFeatureSizeSizePrFeatureDataPrVecPrVec featureData;
@@ -1019,7 +1023,6 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) {
             }
 
             bucketStart += bucketLength;
-            ++b;
         }
         addArrival(data[i], gatherer, m_ResourceMonitor);
     }
diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc
index e6fa4a14ef..ad04250285 100644
--- a/lib/model/unittest/CResourceMonitorTest.cc
+++ b/lib/model/unittest/CResourceMonitorTest.cc
@@ -54,8 +54,6 @@ class CTestFixture {
         CHierarchicalResults results;
         std::string pervasive("IShouldNotBeRemoved");
 
-        std::size_t numBuckets = 0;
-
         for (core_t::TTime time = firstTime;
              time < static_cast<core_t::TTime>(firstTime + bucketLength * buckets);
              time += (bucketLength / std::max(std::size_t(1), newPeoplePerBucket))) {
@@ -64,7 +62,6 @@ class CTestFixture {
                 detector.buildResults(bucketStart, bucketStart + bucketLength, results);
                 monitor.pruneIfRequired(bucketStart);
                 monitor.updateMoments(monitor.totalMemory(), bucketStart, bucketLength);
-                ++numBuckets;
                 newBucket = true;
             }
 

From 06c910d261d537e319f2e432d032f83738978add Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Fri, 10 Nov 2023 13:44:50 +0000
Subject: [PATCH 2/6] Can't use structured bindings on 7.17

---
 lib/maths/common/CNaiveBayes.cc              | 8 ++++++--
 lib/maths/common/unittest/CNaiveBayesTest.cc | 8 ++++++--
 lib/maths/time_series/CTrendComponent.cc     | 4 +++-
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/lib/maths/common/CNaiveBayes.cc b/lib/maths/common/CNaiveBayes.cc
index 54b7a1b126..b78712d393 100644
--- a/lib/maths/common/CNaiveBayes.cc
+++ b/lib/maths/common/CNaiveBayes.cc
@@ -278,7 +278,9 @@ CNaiveBayes::TDoubleSizePrVecDoublePr
 CNaiveBayes::highestClassProbabilities(std::size_t n,
                                        const TDouble1VecVec& x,
                                        const TFeatureWeightProvider& weightProvider) const {
-    auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
+    TDoubleSizePrVec p;
+    double minFeatureWeight;
+    std::tie(p, minFeatureWeight) = this->classProbabilities(x, weightProvider);
     n = std::min(n, p.size());
     std::sort(p.begin(), p.begin() + n, std::greater<>());
     return {TDoubleSizePrVec{p.begin(), p.begin() + n}, minFeatureWeight};
@@ -288,7 +290,9 @@ CNaiveBayes::TDoubleDoublePr
 CNaiveBayes::classProbability(std::size_t label,
                               const TDouble1VecVec& x,
                               const TFeatureWeightProvider& weightProvider) const {
-    auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
+    TDoubleSizePrVec p;
+    double minFeatureWeight;
+    std::tie(p, minFeatureWeight) = this->classProbabilities(x, weightProvider);
     auto i = std::find_if(p.begin(), p.end(), [label](const TDoubleSizePr& p_) {
         return p_.second == label;
     });
diff --git a/lib/maths/common/unittest/CNaiveBayesTest.cc b/lib/maths/common/unittest/CNaiveBayesTest.cc
index e76a3fec7c..32b6319803 100644
--- a/lib/maths/common/unittest/CNaiveBayesTest.cc
+++ b/lib/maths/common/unittest/CNaiveBayesTest.cc
@@ -99,7 +99,9 @@ BOOST_AUTO_TEST_CASE(testClassification) {
         //   - P(1) = (initialCount + 100) / (2*initialCount + 300)
         //   - P(2) = (initialCount + 200) / (2*initialCount + 300)
 
-        auto[probabilities, confidence](nb.highestClassProbabilities(2, {{}, {}}));
+        TDoubleSizePrVec probabilities;
+        double confidence;
+        std::tie(probabilities, confidence) = nb.highestClassProbabilities(2, {{}, {}});
 
         double P1{(initialCount + 100.0) / (2.0 * initialCount + 300.0)};
         double P2{(initialCount + 200.0) / (2.0 * initialCount + 300.0)};
@@ -311,7 +313,9 @@ BOOST_AUTO_TEST_CASE(testExtrapolation) {
     auto weightProvider = [weight = CTestFeatureWeight()]() mutable->maths::common::CNaiveBayesFeatureWeight& {
         return weight;
     };
-    auto[probabilities, confidence] = nb.classProbabilities({{30.0}}, weightProvider);
+    TDoubleSizePrVec probabilities;
+    double confidence;
+    std::tie(probabilities, confidence) = nb.classProbabilities({{30.0}}, weightProvider);
     LOG_DEBUG(<< "p = " << probabilities << ", confidence = " << confidence);
 
     BOOST_REQUIRE_EQUAL(2, probabilities.size());
diff --git a/lib/maths/time_series/CTrendComponent.cc b/lib/maths/time_series/CTrendComponent.cc
index 1258875cf6..bd7cc194a2 100644
--- a/lib/maths/time_series/CTrendComponent.cc
+++ b/lib/maths/time_series/CTrendComponent.cc
@@ -821,7 +821,9 @@ CTrendComponent::CForecastLevel::forecast(core_t::TTime time, double prediction,
         for (std::size_t i = 0; i < m_Levels.size(); ++i) {
             double dt{static_cast<double>(time - m_TimesOfLastChange[i])};
             double x{m_Levels[i] + prediction};
-            auto[p, pConfidence] = m_Probability.classProbability(
+            double p;
+            double pConfidence;
+            std::tie(p, pConfidence) = m_Probability.classProbability(
                 LEVEL_CHANGE_LABEL, {{dt}, {x}}, weightProvider);
             // Here we decide whether to increase the probability we should have
             // seen a step change for this rollout. If we are no longer confident

From 0787ee4dbf80887d9a63a4b48548de1cb4a72e54 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Fri, 10 Nov 2023 16:28:34 +0000
Subject: [PATCH 3/6] Fix bad merge and remove C++17 dependencies

---
 lib/maths/common/unittest/CNaiveBayesTest.cc            | 4 +++-
 lib/maths/time_series/unittest/CTrendComponentTest.cc   | 3 ++-
 lib/model/unittest/CMetricPopulationDataGathererTest.cc | 6 +-----
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/lib/maths/common/unittest/CNaiveBayesTest.cc b/lib/maths/common/unittest/CNaiveBayesTest.cc
index 32b6319803..cc8f84f116 100644
--- a/lib/maths/common/unittest/CNaiveBayesTest.cc
+++ b/lib/maths/common/unittest/CNaiveBayesTest.cc
@@ -9,6 +9,7 @@
  * limitation.
  */
 
+#include "core/CContainerPrinter.h"
 #include <core/CLogger.h>
 #include <core/CRapidXmlParser.h>
 #include <core/CRapidXmlStatePersistInserter.h>
@@ -316,7 +317,8 @@ BOOST_AUTO_TEST_CASE(testExtrapolation) {
     TDoubleSizePrVec probabilities;
     double confidence;
     std::tie(probabilities, confidence) = nb.classProbabilities({{30.0}}, weightProvider);
-    LOG_DEBUG(<< "p = " << probabilities << ", confidence = " << confidence);
+    LOG_DEBUG(<< "p = " << core::CContainerPrinter::print(probabilities)
+              << ", confidence = " << confidence);
 
     BOOST_REQUIRE_EQUAL(2, probabilities.size());
     BOOST_REQUIRE_CLOSE_ABSOLUTE(0.5, probabilities[0].first, 1e-2);
diff --git a/lib/maths/time_series/unittest/CTrendComponentTest.cc b/lib/maths/time_series/unittest/CTrendComponentTest.cc
index 43bb0879f0..7cc2bcb82e 100644
--- a/lib/maths/time_series/unittest/CTrendComponentTest.cc
+++ b/lib/maths/time_series/unittest/CTrendComponentTest.cc
@@ -393,7 +393,8 @@ BOOST_AUTO_TEST_CASE(testStepChangeForecasting) {
             rng.generateNormalSamples(0.0, 0.25, 1, noise);
             double value{*level + noise[0]};
             trendModel.add(time, value);
-            values.emplace_back().add(value);
+            values.emplace_back();
+            values.back().add(value);
             if (i == *changePoint) {
                 ++level;
                 ++changePoint;
diff --git a/lib/model/unittest/CMetricPopulationDataGathererTest.cc b/lib/model/unittest/CMetricPopulationDataGathererTest.cc
index a175d9ee8a..fe7872f30d 100644
--- a/lib/model/unittest/CMetricPopulationDataGathererTest.cc
+++ b/lib/model/unittest/CMetricPopulationDataGathererTest.cc
@@ -980,11 +980,7 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) {
                            influencerNames, searchKey, features, startTime, 2);
 
     core_t::TTime bucketStart = startTime;
-<<<<<<< HEAD
-    for (std::size_t i = 0u, b = 0; i < boost::size(data); ++i) {
-=======
-    for (std::size_t i = 0; i < std::size(data); ++i) {
->>>>>>> 5441e736 ([ML] Improve forecasting for time series with step changes (#2591))
+    for (std::size_t i = 0; i < boost::size(data); ++i) {
         if (data[i].s_Time >= bucketStart + bucketLength) {
             LOG_DEBUG(<< "*** processing bucket ***");
             TFeatureSizeSizePrFeatureDataPrVecPrVec featureData;

From e84ac7b06a251a4e6640e30a2718fd1b48c369a0 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Sun, 12 Nov 2023 22:33:43 +0000
Subject: [PATCH 4/6] Typo

---
 lib/maths/common/CNaiveBayes.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/maths/common/CNaiveBayes.cc b/lib/maths/common/CNaiveBayes.cc
index b78712d393..e833fc763a 100644
--- a/lib/maths/common/CNaiveBayes.cc
+++ b/lib/maths/common/CNaiveBayes.cc
@@ -339,8 +339,8 @@ CNaiveBayes::classProbabilities(const TDouble1VecVec& x,
             // Any feature whose weight < 1 has its significance dropped in class
             // selection, effectively we use the w_i'th root of the log-likelihood
             // which tends to 1 for all values if w_i is small enough. This can be
-            // used to ignore features that for which x is the extreme tails of the
-            // class conditional distribution.
+            // used to ignore features for which x is the extreme tails of the class
+            // conditional distribution.
             double featureWeight_{featureWeight.calculate()};
             for (std::size_t j = 0; j < logLikelihoods.size(); ++j) {
                 p[j].first += featureWeight_ * logLikelihoods[j];

From e14f12081e5647e15c1af8d115980ea12d569afd Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Wed, 15 Nov 2023 14:01:07 +0000
Subject: [PATCH 5/6] Another C++17 feature

---
 lib/model/unittest/CMetricDataGathererTest.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc
index ff9e3ba616..45cc9d3651 100644
--- a/lib/model/unittest/CMetricDataGathererTest.cc
+++ b/lib/model/unittest/CMetricDataGathererTest.cc
@@ -1475,7 +1475,7 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) {
     addPerson("p2", gatherer, m_ResourceMonitor, influencerNames.size());
 
     core_t::TTime bucketStart = startTime;
-    for (std::size_t i = 0; i < std::size(data); ++i) {
+    for (std::size_t i = 0; i < boost::size(data); ++i) {
         if (data[i].get<0>() >= bucketStart + bucketLength) {
             LOG_DEBUG(<< "*** processing bucket ***");
             TFeatureSizeFeatureDataPrVecPrVec featureData;

From 0466432528eaf03e42c0334022487434f58d249a Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Wed, 15 Nov 2023 14:01:52 +0000
Subject: [PATCH 6/6] Update version

---
 docs/CHANGELOG.asciidoc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
index a7c3d4c8e5..0522063150 100644
--- a/docs/CHANGELOG.asciidoc
+++ b/docs/CHANGELOG.asciidoc
@@ -28,7 +28,7 @@
 
 //=== Regressions
 
-== {es} version 7.17.15
+== {es} version 7.17.16
 
 === Enhancements