diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
index 4eb6042e8c..ec93e71152 100644
--- a/docs/CHANGELOG.asciidoc
+++ b/docs/CHANGELOG.asciidoc
@@ -33,6 +33,8 @@
 === Enhancements
 
 * Upgrade Boost libraries to version 1.83. (See {ml-pull}2560[#2560].)
+* Improve forecasting for time series with step changes. (See {ml-pull}#2591[2591],
+  issue: {ml-issue}2466[#2466]).
 
 === Bug Fixes
 
diff --git a/include/maths/common/CNaiveBayes.h b/include/maths/common/CNaiveBayes.h
index 180f5c45f3..9abb686522 100644
--- a/include/maths/common/CNaiveBayes.h
+++ b/include/maths/common/CNaiveBayes.h
@@ -154,19 +154,43 @@ class MATHS_COMMON_EXPORT CNaiveBayesFeatureDensityFromPrior final
     TPriorPtr m_Prior;
 };
 
+//! \brief Enables using custom feature weights in class prediction.
+class CNaiveBayesFeatureWeight {
+public:
+    virtual ~CNaiveBayesFeatureWeight() = default;
+    virtual void add(std::size_t class_, double logLikelihood) = 0;
+    virtual double calculate() const = 0;
+};
+
 //! \brief Implements a Naive Bayes classifier.
 class MATHS_COMMON_EXPORT CNaiveBayes {
 public:
+    using TDoubleDoublePr = std::pair<double, double>;
     using TDoubleSizePr = std::pair<double, std::size_t>;
     using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
+    using TDoubleSizePrVecDoublePr = std::pair<TDoubleSizePrVec, double>;
     using TDouble1Vec = core::CSmallVector<double, 1>;
     using TDouble1VecVec = std::vector<TDouble1Vec>;
-    using TOptionalDouble = std::optional<double>;
+    using TFeatureWeightProvider = std::function<CNaiveBayesFeatureWeight&()>;
+
+private:
+    //! \brief All features have unit weight in class prediction.
+    class CUnitFeatureWeight : public CNaiveBayesFeatureWeight {
+    public:
+        void add(std::size_t, double) override {}
+        double calculate() const override { return 1.0; }
+    };
+
+    class CUnitFeatureWeightProvider {
+    public:
+        CUnitFeatureWeight& operator()() const { return m_UnitWeight; }
+
+    private:
+        mutable CUnitFeatureWeight m_UnitWeight;
+    };
 
 public:
-    explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
-                         double decayRate = 0.0,
-                         TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble());
+    explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate = 0.0);
     CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
                 const SDistributionRestoreParams& params,
                 core::CStateRestoreTraverser& traverser);
@@ -184,6 +208,9 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     //! Check if any training data has been added initialized.
     bool initialized() const;
 
+    //! Get the number of classes.
+    std::size_t numberClasses() const;
+
     //! This can be used to optionally seed the class counts
     //! with \p counts. These are added on to data class counts
     //! to compute the class posterior probabilities.
@@ -210,27 +237,53 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     //!
     //! \param[in] n The number of class probabilities to estimate.
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    TDoubleSizePrVec highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const;
+    TDoubleSizePrVecDoublePr highestClassProbabilities(
+        std::size_t n,
+        const TDouble1VecVec& x,
+        const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
 
     //! Get the probability of the class labeled \p label for \p x.
     //!
     //! \param[in] label The label of the class of interest.
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
+    //! conditional distributions.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    double classProbability(std::size_t label, const TDouble1VecVec& x) const;
+    TDoubleDoublePr classProbability(std::size_t label,
+                                     const TDouble1VecVec& x,
+                                     const TFeatureWeightProvider& weightProvider =
+                                         CUnitFeatureWeightProvider{}) const;
 
     //! Get the probabilities of all the classes for \p x.
     //!
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
+    //! A feature is missing is indicated by passing an empty vector
+    //! for that feature.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    TDoubleSizePrVec classProbabilities(const TDouble1VecVec& x) const;
+    TDoubleSizePrVecDoublePr
+    classProbabilities(const TDouble1VecVec& x,
+                       const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
 
     //! Debug the memory used by this object.
     void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const;
@@ -298,13 +351,6 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     bool validate(const TDouble1VecVec& x) const;
 
 private:
-    //! It is not always appropriate to use features with very low
-    //! probability in all classes to discriminate: the class choice
-    //! will be very sensitive to the underlying conditional density
-    //! model. This is a cutoff (for the minimum maximum class log
-    //! likelihood) in order to use a feature.
-    TOptionalDouble m_MinMaxLogLikelihoodToUseFeature;
-
     //! Controls the rate at which data are aged out.
     double m_DecayRate;
 
diff --git a/lib/core/CStateRestoreTraverser.cc b/lib/core/CStateRestoreTraverser.cc
index bad15cea60..a78ed3261f 100644
--- a/lib/core/CStateRestoreTraverser.cc
+++ b/lib/core/CStateRestoreTraverser.cc
@@ -18,8 +18,7 @@ namespace core {
 CStateRestoreTraverser::CStateRestoreTraverser() : m_BadState(false) {
 }
 
-CStateRestoreTraverser::~CStateRestoreTraverser() {
-}
+CStateRestoreTraverser::~CStateRestoreTraverser() = default;
 
 bool CStateRestoreTraverser::haveBadState() const {
     return m_BadState;
diff --git a/lib/maths/common/CNaiveBayes.cc b/lib/maths/common/CNaiveBayes.cc
index d1a23cba38..9c29cefcc1 100644
--- a/lib/maths/common/CNaiveBayes.cc
+++ b/lib/maths/common/CNaiveBayes.cc
@@ -40,8 +40,6 @@ namespace {
 const core::TPersistenceTag PRIOR_TAG{"a", "prior"};
 const core::TPersistenceTag CLASS_LABEL_TAG{"b", "class_label"};
 const core::TPersistenceTag CLASS_MODEL_TAG{"c", "class_model"};
-const core::TPersistenceTag MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG{
-    "d", "min_max_likelihood_to_use_feature"};
 const core::TPersistenceTag COUNT_TAG{"e", "count"};
 const core::TPersistenceTag CONDITIONAL_DENSITY_FROM_PRIOR_TAG{"f", "conditional_density_from_prior"};
 }
@@ -141,18 +139,18 @@ std::string CNaiveBayesFeatureDensityFromPrior::print() const {
     return result;
 }
 
-CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
-                         double decayRate,
-                         TOptionalDouble minMaxLogLikelihoodToUseFeature)
-    : m_MinMaxLogLikelihoodToUseFeature{minMaxLogLikelihoodToUseFeature},
-      m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
+CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate)
+    : m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
 }
 
 CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
                          const SDistributionRestoreParams& params,
                          core::CStateRestoreTraverser& traverser)
     : m_DecayRate{params.s_DecayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
-    if (traverser.traverseSubLevel([&](auto& traverser_) {
+    // If we persist before we create class conditional distributions we will
+    // not have anything to restore and hasSubLevel will be false. Trying to
+    // restore sets the traverser state to bad so we need to handle explicitly.
+    if (traverser.hasSubLevel() && traverser.traverseSubLevel([&](auto& traverser_) {
             return this->acceptRestoreTraverser(params, traverser_);
         }) == false) {
         traverser.setBadState();
@@ -160,8 +158,7 @@ CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
 }
 
 CNaiveBayes::CNaiveBayes(const CNaiveBayes& other)
-    : m_MinMaxLogLikelihoodToUseFeature{other.m_MinMaxLogLikelihoodToUseFeature},
-      m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
+    : m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
     for (const auto& class_ : other.m_ClassConditionalDensities) {
         m_ClassConditionalDensities.emplace(class_.first, class_.second);
     }
@@ -178,9 +175,6 @@ bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams& param
                 return class_.acceptRestoreTraverser(params, traverser_);
             }),
             m_ClassConditionalDensities.emplace(label, std::move(class_)))
-        RESTORE_SETUP_TEARDOWN(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, double value,
-                               core::CStringUtils::stringToType(traverser.value(), value),
-                               m_MinMaxLogLikelihoodToUseFeature.emplace(value))
     } while (traverser.next());
     return true;
 }
@@ -203,12 +197,6 @@ void CNaiveBayes::acceptPersistInserter(core::CStatePersistInserter& inserter) c
             class_->second.acceptPersistInserter(inserter_);
         });
     }
-
-    if (m_MinMaxLogLikelihoodToUseFeature) {
-        inserter.insertValue(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG,
-                             *m_MinMaxLogLikelihoodToUseFeature,
-                             core::CIEEE754::E_SinglePrecision);
-    }
 }
 
 CNaiveBayes& CNaiveBayes::operator=(const CNaiveBayes& other) {
@@ -223,11 +211,10 @@ void CNaiveBayes::swap(CNaiveBayes& other) {
     std::swap(m_DecayRate, other.m_DecayRate);
     m_Exemplar.swap(other.m_Exemplar);
     m_ClassConditionalDensities.swap(other.m_ClassConditionalDensities);
-    std::swap(m_MinMaxLogLikelihoodToUseFeature, other.m_MinMaxLogLikelihoodToUseFeature);
 }
 
 bool CNaiveBayes::initialized() const {
-    return m_ClassConditionalDensities.size() > 0 &&
+    return m_ClassConditionalDensities.empty() == false &&
            std::all_of(m_ClassConditionalDensities.begin(),
                        m_ClassConditionalDensities.end(),
                        [](const std::pair<std::size_t, CClass>& class_) {
@@ -235,6 +222,10 @@ bool CNaiveBayes::initialized() const {
                        });
 }
 
+std::size_t CNaiveBayes::numberClasses() const {
+    return m_ClassConditionalDensities.size();
+}
+
 void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec& counts) {
     for (const auto& count : counts) {
         m_ClassConditionalDensities.emplace(count.second, CClass{count.first});
@@ -242,7 +233,7 @@ void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec& counts) {
 }
 
 void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec& x) {
-    if (!this->validate(x)) {
+    if (this->validate(x) == false) {
         return;
     }
 
@@ -257,7 +248,7 @@ void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec&
 
     bool updateCount{false};
     for (std::size_t i = 0; i < x.size(); ++i) {
-        if (x[i].size() > 0) {
+        if (x[i].empty() == false) {
             class_.conditionalDensities()[i]->add(x[i]);
             updateCount = true;
         }
@@ -288,62 +279,74 @@ void CNaiveBayes::propagateForwardsByTime(double time) {
     }
 }
 
-CNaiveBayes::TDoubleSizePrVec
-CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const {
-    TDoubleSizePrVec p(this->classProbabilities(x));
+CNaiveBayes::TDoubleSizePrVecDoublePr
+CNaiveBayes::highestClassProbabilities(std::size_t n,
+                                       const TDouble1VecVec& x,
+                                       const TFeatureWeightProvider& weightProvider) const {
+    auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
     n = std::min(n, p.size());
     std::sort(p.begin(), p.begin() + n, std::greater<>());
-    return TDoubleSizePrVec{p.begin(), p.begin() + n};
+    return {TDoubleSizePrVec{p.begin(), p.begin() + n}, minFeatureWeight};
 }
 
-double CNaiveBayes::classProbability(std::size_t label, const TDouble1VecVec& x) const {
-    TDoubleSizePrVec p(this->classProbabilities(x));
+CNaiveBayes::TDoubleDoublePr
+CNaiveBayes::classProbability(std::size_t label,
+                              const TDouble1VecVec& x,
+                              const TFeatureWeightProvider& weightProvider) const {
+    auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
     auto i = std::find_if(p.begin(), p.end(), [label](const TDoubleSizePr& p_) {
         return p_.second == label;
     });
-    return i == p.end() ? 0.0 : i->first;
+    return {i == p.end() ? 0.0 : i->first, minFeatureWeight};
 }
 
-CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecVec& x) const {
-    if (!this->validate(x)) {
-        return {};
+CNaiveBayes::TDoubleSizePrVecDoublePr
+CNaiveBayes::classProbabilities(const TDouble1VecVec& x,
+                                const TFeatureWeightProvider& weightProvider) const {
+    if (this->validate(x) == false) {
+        return {{}, 0.0};
     }
     if (m_ClassConditionalDensities.empty()) {
         LOG_ERROR(<< "Trying to compute class probabilities without supplying training data");
-        return {};
+        return {{}, 0.0};
     }
 
     using TDoubleVec = std::vector<double>;
-    using TMaxAccumulator = CBasicStatistics::SMax<double>::TAccumulator;
 
     TDoubleSizePrVec p;
     p.reserve(m_ClassConditionalDensities.size());
     for (const auto& class_ : m_ClassConditionalDensities) {
         p.emplace_back(CTools::fastLog(class_.second.count()), class_.first);
     }
+    double minFeatureWeight{1.0};
 
     TDoubleVec logLikelihoods;
     for (std::size_t i = 0; i < x.size(); ++i) {
-        if (x[i].size() > 0) {
-            TMaxAccumulator maxLogLikelihood;
+        if (x[i].empty() == false) {
+            auto& featureWeight = weightProvider();
             logLikelihoods.clear();
             for (const auto& class_ : m_ClassConditionalDensities) {
                 const auto& density = class_.second.conditionalDensities()[i];
                 double logLikelihood{density->logValue(x[i])};
                 double logMaximumLikelihood{density->logMaximumValue()};
-                maxLogLikelihood.add(logLikelihood - logMaximumLikelihood);
                 logLikelihoods.push_back(logLikelihood);
+                featureWeight.add(class_.first, logLikelihood - logMaximumLikelihood);
             }
-            double weight{1.0};
-            if (m_MinMaxLogLikelihoodToUseFeature) {
-                weight = CTools::logisticFunction(
-                    (maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) /
-                        std::fabs(*m_MinMaxLogLikelihoodToUseFeature),
-                    0.1);
-            }
+
+            // We compute the class c_i probability using
+            //
+            //    p(c_i | x) = exp(sum_i{w_j * log(L(x_j | c_i))}) / Z * p(c_i).
+            //
+            // Any feature whose weight < 1 has its significance dropped in class
+            // selection, effectively we use the w_i'th root of the log-likelihood
+            // which tends to 1 for all values if w_i is small enough. This can be
+            // used to ignore features that for which x is the extreme tails of the
+            // class conditional distribution.
+            double featureWeight_{featureWeight.calculate()};
             for (std::size_t j = 0; j < logLikelihoods.size(); ++j) {
-                p[j].first += weight * logLikelihoods[j];
+                p[j].first += featureWeight_ * logLikelihoods[j];
             }
+            minFeatureWeight = std::min(minFeatureWeight, featureWeight_);
         }
     }
 
@@ -357,7 +360,7 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecV
         pc.first /= Z;
     }
 
-    return p;
+    return {std::move(p), minFeatureWeight};
 }
 
 void CNaiveBayes::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const {
@@ -372,7 +375,6 @@ std::size_t CNaiveBayes::memoryUsage() const {
 }
 
 std::uint64_t CNaiveBayes::checksum(std::uint64_t seed) const {
-    CChecksum::calculate(seed, m_MinMaxLogLikelihoodToUseFeature);
     CChecksum::calculate(seed, m_DecayRate);
     CChecksum::calculate(seed, m_Exemplar);
     return CChecksum::calculate(seed, m_ClassConditionalDensities);
@@ -394,7 +396,7 @@ std::string CNaiveBayes::print() const {
 bool CNaiveBayes::validate(const TDouble1VecVec& x) const {
     auto class_ = m_ClassConditionalDensities.begin();
     if (class_ != m_ClassConditionalDensities.end() &&
-        class_->second.conditionalDensities().size() > 0 &&
+        class_->second.conditionalDensities().empty() == false &&
         class_->second.conditionalDensities().size() != x.size()) {
         LOG_ERROR(<< "Unexpected feature vector: " << x);
         return false;
@@ -431,7 +433,7 @@ bool CNaiveBayes::CClass::acceptRestoreTraverser(const SDistributionRestoreParam
 void CNaiveBayes::CClass::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
     inserter.insertValue(COUNT_TAG, m_Count, core::CIEEE754::E_SinglePrecision);
     for (const auto& density : m_ConditionalDensities) {
-        if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get())) {
+        if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get()) != nullptr) {
             inserter.insertLevel(CONDITIONAL_DENSITY_FROM_PRIOR_TAG,
                                  [&density](auto& inserter_) {
                                      density->acceptPersistInserter(inserter_);
diff --git a/lib/maths/common/unittest/CLbfgsTest.cc b/lib/maths/common/unittest/CLbfgsTest.cc
index f7de9827e2..38076718c4 100644
--- a/lib/maths/common/unittest/CLbfgsTest.cc
+++ b/lib/maths/common/unittest/CLbfgsTest.cc
@@ -228,12 +228,12 @@ BOOST_AUTO_TEST_CASE(testConstrainedMinimize) {
         std::tie(x, fx) = lbfgs.constrainedMinimize(f, g, a, b, x0, 0.2);
 
         BOOST_REQUIRE_EQUAL(fx, static_cast<double>(f(x)));
-        BOOST_REQUIRE_CLOSE_ABSOLUTE(static_cast<double>(f(xmin)), fx, 1e-3);
+        BOOST_REQUIRE_CLOSE_ABSOLUTE(static_cast<double>(f(xmin)), fx, 5e-3);
 
         ferror += std::fabs(fx - f(xmin)) / 100.0;
     }
 
-    BOOST_REQUIRE_CLOSE_ABSOLUTE(0.0, ferror, 1e-5);
+    BOOST_REQUIRE_CLOSE_ABSOLUTE(0.0, ferror, 5e-5);
 }
 
 BOOST_AUTO_TEST_CASE(testMinimizeWithVerySmallGradient) {
diff --git a/lib/maths/common/unittest/CNaiveBayesTest.cc b/lib/maths/common/unittest/CNaiveBayesTest.cc
index 535ff1e3c7..280e87c467 100644
--- a/lib/maths/common/unittest/CNaiveBayesTest.cc
+++ b/lib/maths/common/unittest/CNaiveBayesTest.cc
@@ -29,11 +29,13 @@
 #include <cmath>
 #include <memory>
 
+namespace {
 BOOST_AUTO_TEST_SUITE(CNaiveBayesTest)
 
 using namespace ml;
 
 using TDoubleVec = std::vector<double>;
+using TDoubleVecVec = std::vector<TDoubleVec>;
 using TDouble1Vec = core::CSmallVector<double, 1>;
 using TDouble1VecVec = std::vector<TDouble1Vec>;
 using TDoubleSizePr = std::pair<double, std::size_t>;
@@ -41,6 +43,12 @@ using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
 using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator;
 using TMeanVarAccumulator = maths::common::CBasicStatistics::SSampleMeanVar<double>::TAccumulator;
 
+class CTestFeatureWeight : public maths::common::CNaiveBayesFeatureWeight {
+public:
+    void add(std::size_t, double) override {}
+    double calculate() const override { return 1e-3; }
+};
+
 BOOST_AUTO_TEST_CASE(testClassification) {
     // We'll test classification using Gaussian naive Bayes. We
     // test:
@@ -56,7 +64,7 @@ BOOST_AUTO_TEST_CASE(testClassification) {
 
     test::CRandomNumbers rng;
 
-    TDoubleVec trainingData[4];
+    TDoubleVecVec trainingData(4);
     rng.generateNormalSamples(0.0, 12.0, 100, trainingData[0]);
     rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]);
     rng.generateNormalSamples(3.0, 14.0, 200, trainingData[2]);
@@ -92,7 +100,7 @@ BOOST_AUTO_TEST_CASE(testClassification) {
         //   - P(1) = (initialCount + 100) / (2*initialCount + 300)
         //   - P(2) = (initialCount + 200) / (2*initialCount + 300)
 
-        TDoubleSizePrVec probabilities(nb.highestClassProbabilities(2, {{}, {}}));
+        auto[probabilities, confidence](nb.highestClassProbabilities(2, {{}, {}}));
 
         double P1{(initialCount + 100.0) / (2.0 * initialCount + 300.0)};
         double P2{(initialCount + 200.0) / (2.0 * initialCount + 300.0)};
@@ -157,19 +165,22 @@ BOOST_AUTO_TEST_CASE(testClassification) {
                       maths::common::CTools::safePdf(class1[1], xtest[i + 1])};
             double p2{P2 * maths::common::CTools::safePdf(class2[0], xtest[i]) *
                       maths::common::CTools::safePdf(class2[1], xtest[i + 1])};
-            probabilities = nb.highestClassProbabilities(2, {{xtest[i]}, {xtest[i + 1]}});
+            std::tie(probabilities, confidence) =
+                nb.highestClassProbabilities(2, {{xtest[i]}, {xtest[i + 1]}});
             test(p1, p2, probabilities, meanErrors[0]);
 
             // Miss out the first feature value.
             p1 = P1 * maths::common::CTools::safePdf(class1[1], xtest[i + 1]);
             p2 = P2 * maths::common::CTools::safePdf(class2[1], xtest[i + 1]);
-            probabilities = nb.highestClassProbabilities(2, {{}, {xtest[i + 1]}});
+            std::tie(probabilities, confidence) =
+                nb.highestClassProbabilities(2, {{}, {xtest[i + 1]}});
             test(p1, p2, probabilities, meanErrors[1]);
 
             // Miss out the second feature value.
             p1 = P1 * maths::common::CTools::safePdf(class1[0], xtest[i]);
             p2 = P2 * maths::common::CTools::safePdf(class2[0], xtest[i]);
-            probabilities = nb.highestClassProbabilities(2, {{xtest[i]}, {}});
+            std::tie(probabilities, confidence) =
+                nb.highestClassProbabilities(2, {{xtest[i]}, {}});
             test(p1, p2, probabilities, meanErrors[2]);
         }
 
@@ -194,7 +205,7 @@ BOOST_AUTO_TEST_CASE(testUninitialized) {
     maths::common::CNaiveBayes nb{maths::common::CNaiveBayes{
         maths::common::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}};
 
-    TDoubleVec trainingData[2];
+    TDoubleVecVec trainingData(2);
 
     for (std::size_t i = 0; i < 2; ++i) {
         BOOST_REQUIRE_EQUAL(false, nb.initialized());
@@ -225,7 +236,7 @@ BOOST_AUTO_TEST_CASE(testPropagationByTime) {
         maths::common::CNaiveBayes{
             maths::common::CNaiveBayesFeatureDensityFromPrior(normal), 0.05}};
 
-    TDoubleVec trainingData[4];
+    TDoubleVecVec trainingData(4);
     for (std::size_t i = 0; i < 1000; ++i) {
         double x{static_cast<double>(i)};
         rng.generateNormalSamples(0.02 * x - 14.0, 16.0, 1, trainingData[0]);
@@ -248,8 +259,8 @@ BOOST_AUTO_TEST_CASE(testPropagationByTime) {
 
     {
         TDoubleSizePrVec probabilities[]{
-            nb[0].highestClassProbabilities(2, {{-10.0}, {-10.0}}),
-            nb[1].highestClassProbabilities(2, {{-10.0}, {-10.0}})};
+            nb[0].highestClassProbabilities(2, {{-10.0}, {-10.0}}).first,
+            nb[1].highestClassProbabilities(2, {{-10.0}, {-10.0}}).first};
         LOG_DEBUG(<< "Aged class probabilities = " << probabilities[0]);
         LOG_DEBUG(<< "Class probabilities = " << probabilities[1]);
         BOOST_REQUIRE_EQUAL(2, probabilities[0][0].second);
@@ -259,8 +270,8 @@ BOOST_AUTO_TEST_CASE(testPropagationByTime) {
     }
     {
         TDoubleSizePrVec probabilities[]{
-            nb[0].highestClassProbabilities(2, {{10.0}, {10.0}}),
-            nb[1].highestClassProbabilities(2, {{10.0}, {10.0}})};
+            nb[0].highestClassProbabilities(2, {{10.0}, {10.0}}).first,
+            nb[1].highestClassProbabilities(2, {{10.0}, {10.0}}).first};
         LOG_DEBUG(<< "Aged class probabilities = " << probabilities[0]);
         LOG_DEBUG(<< "Class probabilities = " << probabilities[1]);
         BOOST_REQUIRE_EQUAL(1, probabilities[0][0].second);
@@ -270,6 +281,42 @@ BOOST_AUTO_TEST_CASE(testPropagationByTime) {
     }
 }
 
+BOOST_AUTO_TEST_CASE(testExtrapolation) {
+    // Test that:
+    //   1. Applying low feature weights means the conditional probabilies
+    //      of all classes tend to 1 / "number of classes",
+    //   2. That the number returned as a confidence in the probabilities
+    //      is the minimum feature weight.
+
+    test::CRandomNumbers rng;
+
+    TDoubleVecVec trainingData(2);
+    rng.generateNormalSamples(0.0, 12.0, 100, trainingData[0]);
+    rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]);
+
+    maths::common::CNormalMeanPrecConjugate normal{
+        maths::common::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)};
+    maths::common::CNaiveBayes nb{maths::common::CNaiveBayesFeatureDensityFromPrior(normal)};
+
+    for (auto x : trainingData[0]) {
+        nb.addTrainingDataPoint(0, {{x}});
+    }
+    for (auto x : trainingData[1]) {
+        nb.addTrainingDataPoint(1, {{x}});
+    }
+
+    auto weightProvider = [weight = CTestFeatureWeight()]() mutable->maths::common::CNaiveBayesFeatureWeight& {
+        return weight;
+    };
+    auto[probabilities, confidence] = nb.classProbabilities({{30.0}}, weightProvider);
+    LOG_DEBUG(<< "p = " << probabilities << ", confidence = " << confidence);
+
+    BOOST_REQUIRE_EQUAL(2, probabilities.size());
+    BOOST_REQUIRE_CLOSE_ABSOLUTE(0.5, probabilities[0].first, 1e-2);
+    BOOST_REQUIRE_CLOSE_ABSOLUTE(0.5, probabilities[1].first, 1e-2);
+    BOOST_REQUIRE_EQUAL(1e-3, confidence);
+}
+
 BOOST_AUTO_TEST_CASE(testMemoryUsage) {
     // Check invariants.
 
@@ -313,7 +360,7 @@ BOOST_AUTO_TEST_CASE(testMemoryUsage) {
 BOOST_AUTO_TEST_CASE(testPersist) {
     test::CRandomNumbers rng;
 
-    TDoubleVec trainingData[4];
+    TDoubleVecVec trainingData(4);
     rng.generateNormalSamples(0.0, 12.0, 100, trainingData[0]);
     rng.generateNormalSamples(10.0, 16.0, 100, trainingData[1]);
     rng.generateNormalSamples(3.0, 14.0, 200, trainingData[2]);
@@ -364,3 +411,4 @@ BOOST_AUTO_TEST_CASE(testPersist) {
 }
 
 BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/lib/maths/time_series/CTrendComponent.cc b/lib/maths/time_series/CTrendComponent.cc
index 1b7f676305..78f9a27216 100644
--- a/lib/maths/time_series/CTrendComponent.cc
+++ b/lib/maths/time_series/CTrendComponent.cc
@@ -52,6 +52,27 @@ const core_t::TTime UNSET_TIME{0};
 const std::size_t NO_CHANGE_LABEL{0};
 const std::size_t LEVEL_CHANGE_LABEL{1};
 
+class CChangeForecastFeatureWeight : public common::CNaiveBayesFeatureWeight {
+public:
+    void add(std::size_t class_, double logLikelihood) override {
+        if (class_ == NO_CHANGE_LABEL) {
+            m_LogLikelihood = logLikelihood;
+        }
+    }
+
+    double calculate() const override {
+        // Downweight features for which we don't have sufficient examples
+        // of the time series not changing.
+        // Note that m_LogLikelihood = 0.5 * (x - m)^2 / sigma^2 so 4.5
+        // corresponds to the case the feature value is at the 3 sigma
+        // point of the conditional distribution.
+        return common::CTools::logisticFunction((4.5 + m_LogLikelihood) / 4.5, 0.1);
+    }
+
+private:
+    double m_LogLikelihood{0.0};
+};
+
 //! Get the desired weight for the regression model.
 double modelWeight(double targetDecayRate, double modelDecayRate) {
     return targetDecayRate == modelDecayRate
@@ -90,7 +111,7 @@ common::CNaiveBayesFeatureDensityFromPrior naiveBayesExemplar(double decayRate)
 
 common::CNaiveBayes initialProbabilityOfChangeModel(double decayRate) {
     return common::CNaiveBayes{naiveBayesExemplar(decayRate),
-                               TIME_SCALES[NUMBER_MODELS - 1] * decayRate, -20.0};
+                               TIME_SCALES[NUMBER_MODELS - 1] * decayRate};
 }
 
 common::CNormalMeanPrecConjugate initialMagnitudeOfChangeModel(double decayRate) {
@@ -294,11 +315,11 @@ void CTrendComponent::shiftLevel(double shift,
         m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL,
                                                              {{dt}, {value}});
     }
+    m_TimeOfLastLevelChange = time;
     for (std::size_t i = segments[last]; i < values.size(); ++i, time += bucketLength) {
         this->dontShiftLevel(time, common::CBasicStatistics::mean(values[i]));
     }
     m_MagnitudeOfLevelChangeModel.addSamples({magnitude}, maths_t::CUnitWeights::SINGLE_UNIT);
-    m_TimeOfLastLevelChange = time;
 }
 
 void CTrendComponent::dontShiftLevel(core_t::TTime time, double value) {
@@ -786,14 +807,26 @@ CTrendComponent::TDouble3Vec
 CTrendComponent::CForecastLevel::forecast(core_t::TTime time, double prediction, double confidence) {
     TDouble3Vec result{0.0, 0.0, 0.0};
 
-    if (m_Probability.initialized()) {
+    if (m_Probability.initialized() && m_Probability.numberClasses() > 1) {
         common::CSampling::uniformSample(0.0, 1.0, m_Levels.size(), m_Uniform01);
         bool reorder{false};
+        auto weightProvider = [weight =
+                                   CChangeForecastFeatureWeight{}]() mutable->common::CNaiveBayesFeatureWeight& {
+            weight = CChangeForecastFeatureWeight{};
+            return weight;
+        };
         for (std::size_t i = 0; i < m_Levels.size(); ++i) {
             double dt{static_cast<double>(time - m_TimesOfLastChange[i])};
             double x{m_Levels[i] + prediction};
-            double p{m_Probability.classProbability(LEVEL_CHANGE_LABEL, {{dt}, {x}})};
-            m_ProbabilitiesOfChange[i] = std::max(m_ProbabilitiesOfChange[i], p);
+            auto[p, pConfidence] = m_Probability.classProbability(
+                LEVEL_CHANGE_LABEL, {{dt}, {x}}, weightProvider);
+            // Here we decide whether to increase the probability we should have
+            // seen a step change for this rollout. If we are no longer confident
+            // in our predicted probability we do not predict changes based on
+            // the principle of least surprise.
+            if (pConfidence > 0.5) {
+                m_ProbabilitiesOfChange[i] = std::max(m_ProbabilitiesOfChange[i], p);
+            }
             if (m_Uniform01[i] < m_ProbabilitiesOfChange[i]) {
                 double stepMean{m_Magnitude.marginalLikelihoodMean()};
                 double stepVariance{m_Magnitude.marginalLikelihoodVariance()};
diff --git a/lib/maths/time_series/unittest/CTrendComponentTest.cc b/lib/maths/time_series/unittest/CTrendComponentTest.cc
index 618e5104e8..e9278b0ec5 100644
--- a/lib/maths/time_series/unittest/CTrendComponentTest.cc
+++ b/lib/maths/time_series/unittest/CTrendComponentTest.cc
@@ -26,15 +26,15 @@
 
 #include <boost/test/unit_test.hpp>
 
+#include <algorithm>
 #include <cmath>
 #include <fstream>
 
+namespace {
 BOOST_AUTO_TEST_SUITE(CTrendComponentTest)
 
 using namespace ml;
 
-namespace {
-
 using TDoubleVec = std::vector<double>;
 using TDoubleVecVec = std::vector<TDoubleVec>;
 using TDouble1Vec = core::CSmallVector<double, 1>;
@@ -223,7 +223,6 @@ auto forecastErrors(ITR actual,
     return std::make_pair(maths::common::CBasicStatistics::mean(meanError),
                           maths::common::CBasicStatistics::mean(meanErrorAt95));
 }
-}
 
 BOOST_AUTO_TEST_CASE(testValueAndVariance) {
     // Check that the prediction bias is small in the long run
@@ -360,6 +359,71 @@ BOOST_AUTO_TEST_CASE(testForecast) {
     BOOST_TEST_REQUIRE(errorAt95 < 0.001);
 }
 
+BOOST_AUTO_TEST_CASE(testStepChangeForecasting) {
+    // A randomized test that forecasts of time series with step changes
+    // don't explode. We previously sometimes ran into issues when we
+    // extrapolated the feature distributions we use to predict steps.
+    // In such cases we would predict far too many steps leading to
+    // overly wide forecast bounds and unrealistic predictions.
+
+    using TSizeVec = std::vector<std::size_t>;
+
+    test::CRandomNumbers rng;
+    double interval{20.0};
+
+    maths::time_series::CTrendComponent::TFloatMeanAccumulatorVec values;
+
+    for (std::size_t t = 0; t < 100; ++t) {
+        TSizeVec changePoints;
+        rng.generateUniformSamples(0, 1000, 6, changePoints);
+        std::sort(changePoints.begin(), changePoints.end());
+        changePoints.push_back(1000);
+        TDoubleVec levels;
+        rng.generateUniformSamples(-0.5 * interval, 0.5 * interval, 7, levels);
+
+        maths::time_series::CTrendComponent trendModel{0.012};
+
+        TDoubleVec noise;
+        auto level = levels.begin();
+        auto changePoint = changePoints.begin();
+        core_t::TTime time{1672531200};
+        for (std::size_t i = 0; i < 1000; ++i, time += BUCKET_LENGTH) {
+            rng.generateNormalSamples(0.0, 0.25, 1, noise);
+            double value{*level + noise[0]};
+            trendModel.add(time, value);
+            values.emplace_back().add(value);
+            if (i == *changePoint) {
+                ++level;
+                ++changePoint;
+                double shift{*level - *(level - 1)};
+                core_t::TTime valuesStartTime{
+                    time - static_cast<core_t::TTime>(values.size()) * BUCKET_LENGTH};
+                TSizeVec segments{0, *changePoint - *(changePoint - 1) - 1,
+                                  *changePoint - *(changePoint - 1)};
+                TDoubleVec shifts{0.0, *level - *(level - 1)};
+                trendModel.shiftLevel(shift, valuesStartTime, BUCKET_LENGTH,
+                                      values, segments, shifts);
+                values.clear();
+            } else {
+                trendModel.dontShiftLevel(time, value);
+            }
+        }
+
+        TDouble3VecVec forecast;
+        trendModel.forecast(time, time + 200 * BUCKET_LENGTH, BUCKET_LENGTH, 90.0, false,
+                            [](core_t::TTime) { return TDouble3Vec(3, 0.0); },
+                            [&forecast](core_t::TTime, const TDouble3Vec& value) {
+                                forecast.push_back(value);
+                            });
+
+        // Check that the prediction is in the switching interval and
+        // the forecast confidence interval isn't too wide.
+        BOOST_TEST_REQUIRE(forecast.back()[1] > -0.75 * interval);
+        BOOST_TEST_REQUIRE(forecast.back()[1] < 0.75 * interval);
+        BOOST_TEST_REQUIRE(forecast.back()[2] - forecast.back()[0] < 3.5 * interval);
+    }
+}
+
 BOOST_AUTO_TEST_CASE(testPersist) {
     // Check that serialization is idempotent.
 
@@ -393,9 +457,9 @@ BOOST_AUTO_TEST_CASE(testPersist) {
     maths::common::SDistributionRestoreParams params{maths_t::E_ContinuousData, 0.1};
 
     maths::time_series::CTrendComponent restoredComponent{0.1};
-    traverser.traverseSubLevel(
-        std::bind(&maths::time_series::CTrendComponent::acceptRestoreTraverser,
-                  &restoredComponent, std::cref(params), std::placeholders::_1));
+    traverser.traverseSubLevel([&](auto& traverser_) {
+        return restoredComponent.acceptRestoreTraverser(params, traverser_);
+    });
 
     BOOST_REQUIRE_EQUAL(origComponent.checksum(), restoredComponent.checksum());
 
@@ -440,9 +504,9 @@ BOOST_AUTO_TEST_CASE(testUpgradeTo7p1) {
     core::CRapidXmlParser parser;
     BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(xml));
     core::CRapidXmlStateRestoreTraverser traverser(parser);
-    traverser.traverseSubLevel(
-        std::bind(&maths::time_series::CTrendComponent::acceptRestoreTraverser,
-                  &component, std::cref(params), std::placeholders::_1));
+    traverser.traverseSubLevel([&](auto& traverser_) {
+        return component.acceptRestoreTraverser(params, traverser_);
+    });
 
     test::CRandomNumbers rng;
 
@@ -455,3 +519,4 @@ BOOST_AUTO_TEST_CASE(testUpgradeTo7p1) {
 }
 
 BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc
index 50a6b2ffdd..2a6609ec88 100644
--- a/lib/model/unittest/CMetricDataGathererTest.cc
+++ b/lib/model/unittest/CMetricDataGathererTest.cc
@@ -1476,13 +1476,13 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) {
     TStrVec influencerNames(std::begin(influencerNames_), std::end(influencerNames_));
     CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING,
                            EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING,
-                           influencerNames, KEY, features, startTime, 2u);
+                           influencerNames, KEY, features, startTime, 2);
 
     addPerson("p1", gatherer, m_ResourceMonitor, influencerNames.size());
     addPerson("p2", gatherer, m_ResourceMonitor, influencerNames.size());
 
     core_t::TTime bucketStart = startTime;
-    for (std::size_t i = 0u, b = 0; i < std::size(data); ++i) {
+    for (std::size_t i = 0; i < std::size(data); ++i) {
         if (data[i].get<0>() >= bucketStart + bucketLength) {
             LOG_DEBUG(<< "*** processing bucket ***");
             TFeatureSizeFeatureDataPrVecPrVec featureData;
@@ -1520,7 +1520,6 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) {
             }
 
             bucketStart += bucketLength;
-            ++b;
         }
         for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) {
             addArrival(gatherer, m_ResourceMonitor, data[i].get<0>(),
diff --git a/lib/model/unittest/CMetricPopulationDataGathererTest.cc b/lib/model/unittest/CMetricPopulationDataGathererTest.cc
index c4dce0d474..dea04a2880 100644
--- a/lib/model/unittest/CMetricPopulationDataGathererTest.cc
+++ b/lib/model/unittest/CMetricPopulationDataGathererTest.cc
@@ -975,10 +975,10 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) {
     TStrVec influencerNames(std::begin(influencerNames_), std::end(influencerNames_));
     CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING,
                            EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING,
-                           influencerNames, searchKey, features, startTime, 2u);
+                           influencerNames, searchKey, features, startTime, 2);
 
     core_t::TTime bucketStart = startTime;
-    for (std::size_t i = 0u, b = 0; i < std::size(data); ++i) {
+    for (std::size_t i = 0; i < std::size(data); ++i) {
         if (data[i].s_Time >= bucketStart + bucketLength) {
             LOG_DEBUG(<< "*** processing bucket ***");
             TFeatureSizeSizePrFeatureDataPrVecPrVec featureData;
@@ -1016,7 +1016,6 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) {
             }
 
             bucketStart += bucketLength;
-            ++b;
         }
         addArrival(data[i], gatherer, m_ResourceMonitor);
     }
diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc
index fc7931d603..5bcadce756 100644
--- a/lib/model/unittest/CResourceMonitorTest.cc
+++ b/lib/model/unittest/CResourceMonitorTest.cc
@@ -56,8 +56,6 @@ class CTestFixture {
         CHierarchicalResults results;
         std::string pervasive("IShouldNotBeRemoved");
 
-        std::size_t numBuckets = 0;
-
         for (core_t::TTime time = firstTime;
              time < static_cast<core_t::TTime>(firstTime + bucketLength * buckets);
              time += (bucketLength / std::max(std::size_t(1), newPeoplePerBucket))) {
@@ -66,7 +64,6 @@ class CTestFixture {
                 detector.buildResults(bucketStart, bucketStart + bucketLength, results);
                 monitor.pruneIfRequired(bucketStart);
                 monitor.updateMoments(monitor.totalMemory(), bucketStart, bucketLength);
-                ++numBuckets;
                 newBucket = true;
             }