Skip to content

[ML] Improve forecasting for time series with step changes #2591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
=== Enhancements

* Upgrade Boost libraries to version 1.83. (See {ml-pull}2560[#2560].)
* Improve forecasting for time series with step changes. (See {ml-pull}#2591[2591],
issue: {ml-issue}2466[#2466]).

=== Bug Fixes

Expand Down
74 changes: 60 additions & 14 deletions include/maths/common/CNaiveBayes.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,43 @@ class MATHS_COMMON_EXPORT CNaiveBayesFeatureDensityFromPrior final
TPriorPtr m_Prior;
};

//! \brief Enables using custom feature weights in class prediction.
class CNaiveBayesFeatureWeight {
public:
virtual ~CNaiveBayesFeatureWeight() = default;
virtual void add(std::size_t class_, double logLikelihood) = 0;
virtual double calculate() const = 0;
};

//! \brief Implements a Naive Bayes classifier.
class MATHS_COMMON_EXPORT CNaiveBayes {
public:
using TDoubleDoublePr = std::pair<double, double>;
using TDoubleSizePr = std::pair<double, std::size_t>;
using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
using TDoubleSizePrVecDoublePr = std::pair<TDoubleSizePrVec, double>;
using TDouble1Vec = core::CSmallVector<double, 1>;
using TDouble1VecVec = std::vector<TDouble1Vec>;
using TOptionalDouble = std::optional<double>;
using TFeatureWeightProvider = std::function<CNaiveBayesFeatureWeight&()>;

private:
//! \brief All features have unit weight in class prediction.
class CUnitFeatureWeight : public CNaiveBayesFeatureWeight {
public:
void add(std::size_t, double) override {}
double calculate() const override { return 1.0; }
};

class CUnitFeatureWeightProvider {
public:
CUnitFeatureWeight& operator()() const { return m_UnitWeight; }

private:
mutable CUnitFeatureWeight m_UnitWeight;
};

public:
explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
double decayRate = 0.0,
TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble());
explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate = 0.0);
CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
const SDistributionRestoreParams& params,
core::CStateRestoreTraverser& traverser);
Expand All @@ -184,6 +208,9 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
//! Check if any training data has been added initialized.
bool initialized() const;

//! Get the number of classes.
std::size_t numberClasses() const;

//! This can be used to optionally seed the class counts
//! with \p counts. These are added on to data class counts
//! to compute the class posterior probabilities.
Expand All @@ -210,27 +237,53 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
//!
//! \param[in] n The number of class probabilities to estimate.
//! \param[in] x The feature values.
//! \param[in] weightProvider Computes a feature weight from the class
//! conditional log-likelihood of the feature value. It should be in
//! the range [0,1]. The smaller the value the less impact the feature
//! has on class selection.
//! \return The class probabilities and the minimum feature weight.
//! \note \p x size should be equal to the number of features.
//! A feature is missing is indicated by passing an empty vector
//! for that feature.
TDoubleSizePrVec highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const;
TDoubleSizePrVecDoublePr highestClassProbabilities(
std::size_t n,
const TDouble1VecVec& x,
const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;

//! Get the probability of the class labeled \p label for \p x.
//!
//! \param[in] label The label of the class of interest.
//! \param[in] x The feature values.
//! \param[in] weightProvider Computes a feature weight from the class
//! conditional log-likelihood of the feature value. It should be in
//! the range [0,1]. The smaller the value the less impact the feature
//! has on class selection.
//! \return The class probabilities and the minimum feature weight.
//! conditional distributions.
//! \note \p x size should be equal to the number of features.
//! A feature is missing is indicated by passing an empty vector
//! for that feature.
double classProbability(std::size_t label, const TDouble1VecVec& x) const;
TDoubleDoublePr classProbability(std::size_t label,
const TDouble1VecVec& x,
const TFeatureWeightProvider& weightProvider =
CUnitFeatureWeightProvider{}) const;

//! Get the probabilities of all the classes for \p x.
//!
//! \param[in] x The feature values.
//! \param[in] weightProvider Computes a feature weight from the class
//! conditional log-likelihood of the feature value. It should be in
//! the range [0,1]. The smaller the value the less impact the feature
//! has on class selection.
//! \return The class probabilities and the minimum feature weight.
//! A feature is missing is indicated by passing an empty vector
//! for that feature.
//! \note \p x size should be equal to the number of features.
//! A feature is missing is indicated by passing an empty vector
//! for that feature.
TDoubleSizePrVec classProbabilities(const TDouble1VecVec& x) const;
TDoubleSizePrVecDoublePr
classProbabilities(const TDouble1VecVec& x,
const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;

//! Debug the memory used by this object.
void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const;
Expand Down Expand Up @@ -298,13 +351,6 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
bool validate(const TDouble1VecVec& x) const;

private:
//! It is not always appropriate to use features with very low
//! probability in all classes to discriminate: the class choice
//! will be very sensitive to the underlying conditional density
//! model. This is a cutoff (for the minimum maximum class log
//! likelihood) in order to use a feature.
TOptionalDouble m_MinMaxLogLikelihoodToUseFeature;

//! Controls the rate at which data are aged out.
double m_DecayRate;

Expand Down
3 changes: 1 addition & 2 deletions lib/core/CStateRestoreTraverser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ namespace core {
CStateRestoreTraverser::CStateRestoreTraverser() : m_BadState(false) {
}

CStateRestoreTraverser::~CStateRestoreTraverser() {
}
CStateRestoreTraverser::~CStateRestoreTraverser() = default;

bool CStateRestoreTraverser::haveBadState() const {
return m_BadState;
Expand Down
102 changes: 52 additions & 50 deletions lib/maths/common/CNaiveBayes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ namespace {
const core::TPersistenceTag PRIOR_TAG{"a", "prior"};
const core::TPersistenceTag CLASS_LABEL_TAG{"b", "class_label"};
const core::TPersistenceTag CLASS_MODEL_TAG{"c", "class_model"};
const core::TPersistenceTag MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG{
"d", "min_max_likelihood_to_use_feature"};
const core::TPersistenceTag COUNT_TAG{"e", "count"};
const core::TPersistenceTag CONDITIONAL_DENSITY_FROM_PRIOR_TAG{"f", "conditional_density_from_prior"};
}
Expand Down Expand Up @@ -141,27 +139,26 @@ std::string CNaiveBayesFeatureDensityFromPrior::print() const {
return result;
}

CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
double decayRate,
TOptionalDouble minMaxLogLikelihoodToUseFeature)
: m_MinMaxLogLikelihoodToUseFeature{minMaxLogLikelihoodToUseFeature},
m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate)
: m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
}

CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
const SDistributionRestoreParams& params,
core::CStateRestoreTraverser& traverser)
: m_DecayRate{params.s_DecayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
if (traverser.traverseSubLevel([&](auto& traverser_) {
// If we persist before we create class conditional distributions we will
// not have anything to restore and hasSubLevel will be false. Trying to
// restore sets the traverser state to bad so we need to handle explicitly.
if (traverser.hasSubLevel() && traverser.traverseSubLevel([&](auto& traverser_) {
return this->acceptRestoreTraverser(params, traverser_);
}) == false) {
traverser.setBadState();
}
}

CNaiveBayes::CNaiveBayes(const CNaiveBayes& other)
: m_MinMaxLogLikelihoodToUseFeature{other.m_MinMaxLogLikelihoodToUseFeature},
m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
: m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
for (const auto& class_ : other.m_ClassConditionalDensities) {
m_ClassConditionalDensities.emplace(class_.first, class_.second);
}
Expand All @@ -178,9 +175,6 @@ bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams& param
return class_.acceptRestoreTraverser(params, traverser_);
}),
m_ClassConditionalDensities.emplace(label, std::move(class_)))
RESTORE_SETUP_TEARDOWN(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, double value,
core::CStringUtils::stringToType(traverser.value(), value),
m_MinMaxLogLikelihoodToUseFeature.emplace(value))
} while (traverser.next());
return true;
}
Expand All @@ -203,12 +197,6 @@ void CNaiveBayes::acceptPersistInserter(core::CStatePersistInserter& inserter) c
class_->second.acceptPersistInserter(inserter_);
});
}

if (m_MinMaxLogLikelihoodToUseFeature) {
inserter.insertValue(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG,
*m_MinMaxLogLikelihoodToUseFeature,
core::CIEEE754::E_SinglePrecision);
}
}

CNaiveBayes& CNaiveBayes::operator=(const CNaiveBayes& other) {
Expand All @@ -223,26 +211,29 @@ void CNaiveBayes::swap(CNaiveBayes& other) {
std::swap(m_DecayRate, other.m_DecayRate);
m_Exemplar.swap(other.m_Exemplar);
m_ClassConditionalDensities.swap(other.m_ClassConditionalDensities);
std::swap(m_MinMaxLogLikelihoodToUseFeature, other.m_MinMaxLogLikelihoodToUseFeature);
}

bool CNaiveBayes::initialized() const {
return m_ClassConditionalDensities.size() > 0 &&
return m_ClassConditionalDensities.empty() == false &&
std::all_of(m_ClassConditionalDensities.begin(),
m_ClassConditionalDensities.end(),
[](const std::pair<std::size_t, CClass>& class_) {
return class_.second.initialized();
});
}

std::size_t CNaiveBayes::numberClasses() const {
return m_ClassConditionalDensities.size();
}

void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec& counts) {
for (const auto& count : counts) {
m_ClassConditionalDensities.emplace(count.second, CClass{count.first});
}
}

void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec& x) {
if (!this->validate(x)) {
if (this->validate(x) == false) {
return;
}

Expand All @@ -257,7 +248,7 @@ void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec&

bool updateCount{false};
for (std::size_t i = 0; i < x.size(); ++i) {
if (x[i].size() > 0) {
if (x[i].empty() == false) {
class_.conditionalDensities()[i]->add(x[i]);
updateCount = true;
}
Expand Down Expand Up @@ -288,62 +279,74 @@ void CNaiveBayes::propagateForwardsByTime(double time) {
}
}

CNaiveBayes::TDoubleSizePrVec
CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const {
TDoubleSizePrVec p(this->classProbabilities(x));
CNaiveBayes::TDoubleSizePrVecDoublePr
CNaiveBayes::highestClassProbabilities(std::size_t n,
const TDouble1VecVec& x,
const TFeatureWeightProvider& weightProvider) const {
auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
n = std::min(n, p.size());
std::sort(p.begin(), p.begin() + n, std::greater<>());
return TDoubleSizePrVec{p.begin(), p.begin() + n};
return {TDoubleSizePrVec{p.begin(), p.begin() + n}, minFeatureWeight};
}

double CNaiveBayes::classProbability(std::size_t label, const TDouble1VecVec& x) const {
TDoubleSizePrVec p(this->classProbabilities(x));
CNaiveBayes::TDoubleDoublePr
CNaiveBayes::classProbability(std::size_t label,
const TDouble1VecVec& x,
const TFeatureWeightProvider& weightProvider) const {
auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
auto i = std::find_if(p.begin(), p.end(), [label](const TDoubleSizePr& p_) {
return p_.second == label;
});
return i == p.end() ? 0.0 : i->first;
return {i == p.end() ? 0.0 : i->first, minFeatureWeight};
}

CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecVec& x) const {
if (!this->validate(x)) {
return {};
CNaiveBayes::TDoubleSizePrVecDoublePr
CNaiveBayes::classProbabilities(const TDouble1VecVec& x,
const TFeatureWeightProvider& weightProvider) const {
if (this->validate(x) == false) {
return {{}, 0.0};
}
if (m_ClassConditionalDensities.empty()) {
LOG_ERROR(<< "Trying to compute class probabilities without supplying training data");
return {};
return {{}, 0.0};
}

using TDoubleVec = std::vector<double>;
using TMaxAccumulator = CBasicStatistics::SMax<double>::TAccumulator;

TDoubleSizePrVec p;
p.reserve(m_ClassConditionalDensities.size());
for (const auto& class_ : m_ClassConditionalDensities) {
p.emplace_back(CTools::fastLog(class_.second.count()), class_.first);
}
double minFeatureWeight{1.0};

TDoubleVec logLikelihoods;
for (std::size_t i = 0; i < x.size(); ++i) {
if (x[i].size() > 0) {
TMaxAccumulator maxLogLikelihood;
if (x[i].empty() == false) {
auto& featureWeight = weightProvider();
logLikelihoods.clear();
for (const auto& class_ : m_ClassConditionalDensities) {
const auto& density = class_.second.conditionalDensities()[i];
double logLikelihood{density->logValue(x[i])};
double logMaximumLikelihood{density->logMaximumValue()};
maxLogLikelihood.add(logLikelihood - logMaximumLikelihood);
logLikelihoods.push_back(logLikelihood);
featureWeight.add(class_.first, logLikelihood - logMaximumLikelihood);
}
double weight{1.0};
if (m_MinMaxLogLikelihoodToUseFeature) {
weight = CTools::logisticFunction(
(maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) /
std::fabs(*m_MinMaxLogLikelihoodToUseFeature),
0.1);
}

// We compute the class c_i probability using
//
// p(c_i | x) = exp(sum_i{w_j * log(L(x_j | c_i))}) / Z * p(c_i).
//
// Any feature whose weight < 1 has its significance dropped in class
// selection, effectively we use the w_i'th root of the log-likelihood
// which tends to 1 for all values if w_i is small enough. This can be
// used to ignore features that for which x is the extreme tails of the
// class conditional distribution.
double featureWeight_{featureWeight.calculate()};
for (std::size_t j = 0; j < logLikelihoods.size(); ++j) {
p[j].first += weight * logLikelihoods[j];
p[j].first += featureWeight_ * logLikelihoods[j];
}
minFeatureWeight = std::min(minFeatureWeight, featureWeight_);
}
}

Expand All @@ -357,7 +360,7 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecV
pc.first /= Z;
}

return p;
return {std::move(p), minFeatureWeight};
}

void CNaiveBayes::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const {
Expand All @@ -372,7 +375,6 @@ std::size_t CNaiveBayes::memoryUsage() const {
}

std::uint64_t CNaiveBayes::checksum(std::uint64_t seed) const {
CChecksum::calculate(seed, m_MinMaxLogLikelihoodToUseFeature);
CChecksum::calculate(seed, m_DecayRate);
CChecksum::calculate(seed, m_Exemplar);
return CChecksum::calculate(seed, m_ClassConditionalDensities);
Expand All @@ -394,7 +396,7 @@ std::string CNaiveBayes::print() const {
bool CNaiveBayes::validate(const TDouble1VecVec& x) const {
auto class_ = m_ClassConditionalDensities.begin();
if (class_ != m_ClassConditionalDensities.end() &&
class_->second.conditionalDensities().size() > 0 &&
class_->second.conditionalDensities().empty() == false &&
class_->second.conditionalDensities().size() != x.size()) {
LOG_ERROR(<< "Unexpected feature vector: " << x);
return false;
Expand Down Expand Up @@ -431,7 +433,7 @@ bool CNaiveBayes::CClass::acceptRestoreTraverser(const SDistributionRestoreParam
void CNaiveBayes::CClass::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
inserter.insertValue(COUNT_TAG, m_Count, core::CIEEE754::E_SinglePrecision);
for (const auto& density : m_ConditionalDensities) {
if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get())) {
if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get()) != nullptr) {
inserter.insertLevel(CONDITIONAL_DENSITY_FROM_PRIOR_TAG,
[&density](auto& inserter_) {
density->acceptPersistInserter(inserter_);
Expand Down
Loading