elastic · tveasey · Nov 2, 2023 · Oct 28, 2023 · Oct 28, 2023 · Oct 30, 2023
diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
@@ -33,6 +33,8 @@
 === Enhancements
 
 * Upgrade Boost libraries to version 1.83. (See {ml-pull}2560[#2560].)
+* Improve forecasting for time series with step changes. (See {ml-pull}#2591[2591],
+  issue: {ml-issue}2466[#2466]).
 
 === Bug Fixes
 

diff --git a/include/maths/common/CNaiveBayes.h b/include/maths/common/CNaiveBayes.h
@@ -154,19 +154,43 @@ class MATHS_COMMON_EXPORT CNaiveBayesFeatureDensityFromPrior final
     TPriorPtr m_Prior;
 };
 
+//! \brief Enables using custom feature weights in class prediction.
+class CNaiveBayesFeatureWeight {
+public:
+    virtual ~CNaiveBayesFeatureWeight() = default;
+    virtual void add(std::size_t class_, double logLikelihood) = 0;
+    virtual double calculate() const = 0;
+};
+
 //! \brief Implements a Naive Bayes classifier.
 class MATHS_COMMON_EXPORT CNaiveBayes {
 public:
+    using TDoubleDoublePr = std::pair<double, double>;
     using TDoubleSizePr = std::pair<double, std::size_t>;
     using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
+    using TDoubleSizePrVecDoublePr = std::pair<TDoubleSizePrVec, double>;
     using TDouble1Vec = core::CSmallVector<double, 1>;
     using TDouble1VecVec = std::vector<TDouble1Vec>;
-    using TOptionalDouble = std::optional<double>;
+    using TFeatureWeightProvider = std::function<CNaiveBayesFeatureWeight&()>;
+
+private:
+    //! \brief All features have unit weight in class prediction.
+    class CUnitFeatureWeight : public CNaiveBayesFeatureWeight {
+    public:
+        void add(std::size_t, double) override {}
+        double calculate() const override { return 1.0; }
+    };
+
+    class CUnitFeatureWeightProvider {
+    public:
+        CUnitFeatureWeight& operator()() const { return m_UnitWeight; }
+
+    private:
+        mutable CUnitFeatureWeight m_UnitWeight;
+    };
 
 public:
-    explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
-                         double decayRate = 0.0,
-                         TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble());
+    explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate = 0.0);
     CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
                 const SDistributionRestoreParams& params,
                 core::CStateRestoreTraverser& traverser);
@@ -184,6 +208,9 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     //! Check if any training data has been added initialized.
     bool initialized() const;
 
+    //! Get the number of classes.
+    std::size_t numberClasses() const;
+
     //! This can be used to optionally seed the class counts
     //! with \p counts. These are added on to data class counts
     //! to compute the class posterior probabilities.
@@ -210,27 +237,53 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     //!
     //! \param[in] n The number of class probabilities to estimate.
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    TDoubleSizePrVec highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const;
+    TDoubleSizePrVecDoublePr highestClassProbabilities(
+        std::size_t n,
+        const TDouble1VecVec& x,
+        const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
 
     //! Get the probability of the class labeled \p label for \p x.
     //!
     //! \param[in] label The label of the class of interest.
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
+    //! conditional distributions.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    double classProbability(std::size_t label, const TDouble1VecVec& x) const;
+    TDoubleDoublePr classProbability(std::size_t label,
+                                     const TDouble1VecVec& x,
+                                     const TFeatureWeightProvider& weightProvider =
+                                         CUnitFeatureWeightProvider{}) const;
 
     //! Get the probabilities of all the classes for \p x.
     //!
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
+    //! A feature is missing is indicated by passing an empty vector
+    //! for that feature.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    TDoubleSizePrVec classProbabilities(const TDouble1VecVec& x) const;
+    TDoubleSizePrVecDoublePr
+    classProbabilities(const TDouble1VecVec& x,
+                       const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
 
     //! Debug the memory used by this object.
     void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const;
@@ -298,13 +351,6 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     bool validate(const TDouble1VecVec& x) const;
 
 private:
-    //! It is not always appropriate to use features with very low
-    //! probability in all classes to discriminate: the class choice
-    //! will be very sensitive to the underlying conditional density
-    //! model. This is a cutoff (for the minimum maximum class log
-    //! likelihood) in order to use a feature.
-    TOptionalDouble m_MinMaxLogLikelihoodToUseFeature;
-
     //! Controls the rate at which data are aged out.
     double m_DecayRate;
 

diff --git a/lib/core/CStateRestoreTraverser.cc b/lib/core/CStateRestoreTraverser.cc
@@ -18,8 +18,7 @@ namespace core {
 CStateRestoreTraverser::CStateRestoreTraverser() : m_BadState(false) {
 }
 
-CStateRestoreTraverser::~CStateRestoreTraverser() {
-}
+CStateRestoreTraverser::~CStateRestoreTraverser() = default;
 
 bool CStateRestoreTraverser::haveBadState() const {
     return m_BadState;

diff --git a/lib/maths/common/CNaiveBayes.cc b/lib/maths/common/CNaiveBayes.cc
@@ -40,8 +40,6 @@ namespace {
 const core::TPersistenceTag PRIOR_TAG{"a", "prior"};
 const core::TPersistenceTag CLASS_LABEL_TAG{"b", "class_label"};
 const core::TPersistenceTag CLASS_MODEL_TAG{"c", "class_model"};
-const core::TPersistenceTag MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG{
-    "d", "min_max_likelihood_to_use_feature"};
 const core::TPersistenceTag COUNT_TAG{"e", "count"};
 const core::TPersistenceTag CONDITIONAL_DENSITY_FROM_PRIOR_TAG{"f", "conditional_density_from_prior"};
 }
@@ -141,27 +139,26 @@ std::string CNaiveBayesFeatureDensityFromPrior::print() const {
     return result;
 }
 
-CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
-                         double decayRate,
-                         TOptionalDouble minMaxLogLikelihoodToUseFeature)
-    : m_MinMaxLogLikelihoodToUseFeature{minMaxLogLikelihoodToUseFeature},
-      m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
+CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate)
+    : m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
 }
 
 CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
                          const SDistributionRestoreParams& params,
                          core::CStateRestoreTraverser& traverser)
     : m_DecayRate{params.s_DecayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
-    if (traverser.traverseSubLevel([&](auto& traverser_) {
+    // If we persist before we create class conditional distributions we will
+    // not have anything to restore and hasSubLevel will be false. Trying to
+    // restore sets the traverser state to bad so we need to handle explicitly.
+    if (traverser.hasSubLevel() && traverser.traverseSubLevel([&](auto& traverser_) {
             return this->acceptRestoreTraverser(params, traverser_);
         }) == false) {
         traverser.setBadState();
     }
 }
 
 CNaiveBayes::CNaiveBayes(const CNaiveBayes& other)
-    : m_MinMaxLogLikelihoodToUseFeature{other.m_MinMaxLogLikelihoodToUseFeature},
-      m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
+    : m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
     for (const auto& class_ : other.m_ClassConditionalDensities) {
         m_ClassConditionalDensities.emplace(class_.first, class_.second);
     }
@@ -178,9 +175,6 @@ bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams& param
                 return class_.acceptRestoreTraverser(params, traverser_);
             }),
             m_ClassConditionalDensities.emplace(label, std::move(class_)))
-        RESTORE_SETUP_TEARDOWN(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, double value,
-                               core::CStringUtils::stringToType(traverser.value(), value),
-                               m_MinMaxLogLikelihoodToUseFeature.emplace(value))
     } while (traverser.next());
     return true;
 }
@@ -203,12 +197,6 @@ void CNaiveBayes::acceptPersistInserter(core::CStatePersistInserter& inserter) c
             class_->second.acceptPersistInserter(inserter_);
         });
     }
-
-    if (m_MinMaxLogLikelihoodToUseFeature) {
-        inserter.insertValue(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG,
-                             *m_MinMaxLogLikelihoodToUseFeature,
-                             core::CIEEE754::E_SinglePrecision);
-    }
 }
 
 CNaiveBayes& CNaiveBayes::operator=(const CNaiveBayes& other) {
@@ -223,26 +211,29 @@ void CNaiveBayes::swap(CNaiveBayes& other) {
     std::swap(m_DecayRate, other.m_DecayRate);
     m_Exemplar.swap(other.m_Exemplar);
     m_ClassConditionalDensities.swap(other.m_ClassConditionalDensities);
-    std::swap(m_MinMaxLogLikelihoodToUseFeature, other.m_MinMaxLogLikelihoodToUseFeature);
 }
 
 bool CNaiveBayes::initialized() const {
-    return m_ClassConditionalDensities.size() > 0 &&
+    return m_ClassConditionalDensities.empty() == false &&
            std::all_of(m_ClassConditionalDensities.begin(),
                        m_ClassConditionalDensities.end(),
                        [](const std::pair<std::size_t, CClass>& class_) {
                            return class_.second.initialized();
                        });
 }
 
+std::size_t CNaiveBayes::numberClasses() const {
+    return m_ClassConditionalDensities.size();
+}
+
 void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec& counts) {
     for (const auto& count : counts) {
         m_ClassConditionalDensities.emplace(count.second, CClass{count.first});
     }
 }
 
 void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec& x) {
-    if (!this->validate(x)) {
+    if (this->validate(x) == false) {
         return;
     }
 
@@ -257,7 +248,7 @@ void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec&
 
     bool updateCount{false};
     for (std::size_t i = 0; i < x.size(); ++i) {
-        if (x[i].size() > 0) {
+        if (x[i].empty() == false) {
             class_.conditionalDensities()[i]->add(x[i]);
             updateCount = true;
         }
@@ -288,62 +279,74 @@ void CNaiveBayes::propagateForwardsByTime(double time) {
     }
 }
 
-CNaiveBayes::TDoubleSizePrVec
-CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const {
-    TDoubleSizePrVec p(this->classProbabilities(x));
+CNaiveBayes::TDoubleSizePrVecDoublePr
+CNaiveBayes::highestClassProbabilities(std::size_t n,
+                                       const TDouble1VecVec& x,
+                                       const TFeatureWeightProvider& weightProvider) const {
+    auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
     n = std::min(n, p.size());
     std::sort(p.begin(), p.begin() + n, std::greater<>());
-    return TDoubleSizePrVec{p.begin(), p.begin() + n};
+    return {TDoubleSizePrVec{p.begin(), p.begin() + n}, minFeatureWeight};
 }
 
-double CNaiveBayes::classProbability(std::size_t label, const TDouble1VecVec& x) const {
-    TDoubleSizePrVec p(this->classProbabilities(x));
+CNaiveBayes::TDoubleDoublePr
+CNaiveBayes::classProbability(std::size_t label,
+                              const TDouble1VecVec& x,
+                              const TFeatureWeightProvider& weightProvider) const {
+    auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
     auto i = std::find_if(p.begin(), p.end(), [label](const TDoubleSizePr& p_) {
         return p_.second == label;
     });
-    return i == p.end() ? 0.0 : i->first;
+    return {i == p.end() ? 0.0 : i->first, minFeatureWeight};
 }
 
-CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecVec& x) const {
-    if (!this->validate(x)) {
-        return {};
+CNaiveBayes::TDoubleSizePrVecDoublePr
+CNaiveBayes::classProbabilities(const TDouble1VecVec& x,
+                                const TFeatureWeightProvider& weightProvider) const {
+    if (this->validate(x) == false) {
+        return {{}, 0.0};
     }
     if (m_ClassConditionalDensities.empty()) {
         LOG_ERROR(<< "Trying to compute class probabilities without supplying training data");
-        return {};
+        return {{}, 0.0};
     }
 
     using TDoubleVec = std::vector<double>;
-    using TMaxAccumulator = CBasicStatistics::SMax<double>::TAccumulator;
 
     TDoubleSizePrVec p;
     p.reserve(m_ClassConditionalDensities.size());
     for (const auto& class_ : m_ClassConditionalDensities) {
         p.emplace_back(CTools::fastLog(class_.second.count()), class_.first);
     }
+    double minFeatureWeight{1.0};
 
     TDoubleVec logLikelihoods;
     for (std::size_t i = 0; i < x.size(); ++i) {
-        if (x[i].size() > 0) {
-            TMaxAccumulator maxLogLikelihood;
+        if (x[i].empty() == false) {
+            auto& featureWeight = weightProvider();
             logLikelihoods.clear();
             for (const auto& class_ : m_ClassConditionalDensities) {
                 const auto& density = class_.second.conditionalDensities()[i];
                 double logLikelihood{density->logValue(x[i])};
                 double logMaximumLikelihood{density->logMaximumValue()};
-                maxLogLikelihood.add(logLikelihood - logMaximumLikelihood);
                 logLikelihoods.push_back(logLikelihood);
+                featureWeight.add(class_.first, logLikelihood - logMaximumLikelihood);
             }
-            double weight{1.0};
-            if (m_MinMaxLogLikelihoodToUseFeature) {
-                weight = CTools::logisticFunction(
-                    (maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) /
-                        std::fabs(*m_MinMaxLogLikelihoodToUseFeature),
-                    0.1);
-            }
+
+            // We compute the class c_i probability using
+            //
+            //    p(c_i | x) = exp(sum_i{w_j * log(L(x_j | c_i))}) / Z * p(c_i).
+            //
+            // Any feature whose weight < 1 has its significance dropped in class
+            // selection, effectively we use the w_i'th root of the log-likelihood
+            // which tends to 1 for all values if w_i is small enough. This can be
+            // used to ignore features that for which x is the extreme tails of the
+            // class conditional distribution.
+            double featureWeight_{featureWeight.calculate()};
             for (std::size_t j = 0; j < logLikelihoods.size(); ++j) {
-                p[j].first += weight * logLikelihoods[j];
+                p[j].first += featureWeight_ * logLikelihoods[j];
             }
+            minFeatureWeight = std::min(minFeatureWeight, featureWeight_);
         }
     }
 
@@ -357,7 +360,7 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecV
         pc.first /= Z;
     }
 
-    return p;
+    return {std::move(p), minFeatureWeight};
 }
 
 void CNaiveBayes::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const {
@@ -372,7 +375,6 @@ std::size_t CNaiveBayes::memoryUsage() const {
 }
 
 std::uint64_t CNaiveBayes::checksum(std::uint64_t seed) const {
-    CChecksum::calculate(seed, m_MinMaxLogLikelihoodToUseFeature);
     CChecksum::calculate(seed, m_DecayRate);
     CChecksum::calculate(seed, m_Exemplar);
     return CChecksum::calculate(seed, m_ClassConditionalDensities);
@@ -394,7 +396,7 @@ std::string CNaiveBayes::print() const {
 bool CNaiveBayes::validate(const TDouble1VecVec& x) const {
     auto class_ = m_ClassConditionalDensities.begin();
     if (class_ != m_ClassConditionalDensities.end() &&
-        class_->second.conditionalDensities().size() > 0 &&
+        class_->second.conditionalDensities().empty() == false &&
         class_->second.conditionalDensities().size() != x.size()) {
         LOG_ERROR(<< "Unexpected feature vector: " << x);
         return false;
@@ -431,7 +433,7 @@ bool CNaiveBayes::CClass::acceptRestoreTraverser(const SDistributionRestoreParam
 void CNaiveBayes::CClass::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
     inserter.insertValue(COUNT_TAG, m_Count, core::CIEEE754::E_SinglePrecision);
     for (const auto& density : m_ConditionalDensities) {
-        if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get())) {
+        if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get()) != nullptr) {
             inserter.insertLevel(CONDITIONAL_DENSITY_FROM_PRIOR_TAG,
                                  [&density](auto& inserter_) {
                                      density->acceptPersistInserter(inserter_);