Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bin/controller/CCommandProcessor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ bool CCommandProcessor::handleKill(std::uint32_t id, TStrVec tokens) {

if (m_Spawner.terminateChild(pid) == false) {
std::string error{"Failed to kill process with PID " + tokens[0]};
LOG_ERROR(<< error << " in command with ID " << id);
LOG_WARN(<< error << " in command with ID " << id);
m_ResponseWriter.writeResponse(id, false, error);
return false;
}
Expand Down
6 changes: 6 additions & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@
//=== Bug Fixes

//=== Regressions
== {es} version 9.3.0

=== Enhancements

* Downgrade log severity for a batch of recoverable errors. (See {ml-pull}[#2889].)

== {es} version 9.2.0

=== Enhancements
Expand Down
4 changes: 3 additions & 1 deletion lib/api/CAnomalyJob.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,9 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields, TOptionalTime t
ss << "Records must be in ascending time order. "
<< "Record '" << ml::api::CAnomalyJob::debugPrintRecord(dataRowFields) << "' time "
<< *time << " is before bucket time " << m_LastFinalisedBucketEndTime;
LOG_ERROR(<< ss.str());
LOG_WARN(<< "Skipping out-of-order record: time " << *time << " is before current bucket "
<< m_LastFinalisedBucketEndTime << " (record: "
<< ml::api::CAnomalyJob::debugPrintRecord(dataRowFields) << ")");
return true;
}

Expand Down
2 changes: 1 addition & 1 deletion lib/core/CDetachedProcessSpawner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class CTrackerThread : public CThread {

bool terminatePid(CProcess::TPid pid) {
if (!this->havePid(pid)) {
LOG_ERROR(<< "Will not attempt to kill process " << pid << ": not a child process");
LOG_WARN(<< "Will not attempt to kill process " << pid << ": not a child process");
return false;
}

Expand Down
6 changes: 3 additions & 3 deletions lib/maths/common/CLogNormalMeanPrecConjugate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1173,9 +1173,9 @@ CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TDouble1Vec& sampl
auto status = static_cast<maths_t::EFloatingPointErrorStatus>(
logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result));
if ((status & maths_t::E_FpFailed) != 0) {
LOG_ERROR(<< "Failed to compute log likelihood (" << this->debug() << ")");
LOG_ERROR(<< "samples = " << samples);
LOG_ERROR(<< "weights = " << weights);
LOG_WARN(<< "Failed to compute log likelihood (" << this->debug() << ")");
LOG_DEBUG(<< "samples = " << samples);
LOG_DEBUG(<< "weights = " << weights);
} else if ((status & maths_t::E_FpOverflowed) != 0) {
LOG_TRACE(<< "Log likelihood overflowed for (" << this->debug() << ")");
LOG_TRACE(<< "samples = " << samples);
Expand Down
8 changes: 4 additions & 4 deletions lib/maths/common/CMultimodalPrior.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1049,7 +1049,7 @@ bool CMultimodalPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalcu
maths_t::EFloatingPointErrorStatus status =
this->jointLogMarginalLikelihood({x}, weight, fx);
if ((status & maths_t::E_FpFailed) != 0) {
LOG_ERROR(<< "Unable to compute likelihood for " << x);
LOG_WARN(<< "Unable to compute likelihood for " << x);
return false;
}
if ((status & maths_t::E_FpOverflowed) != 0) {
Expand Down Expand Up @@ -1233,8 +1233,8 @@ bool CMultimodalPrior::checkInvariants(const std::string& tag) const {
CEqualWithTolerance<double> equal{
CToleranceTypes::E_AbsoluteTolerance | CToleranceTypes::E_RelativeTolerance, 1e-3};
if (equal(modeSamples, numberSamples) == false) {
LOG_ERROR(<< tag << "Sum mode samples = " << modeSamples
<< ", total samples = " << numberSamples);
LOG_WARN(<< tag << "Sum mode samples = " << modeSamples
<< ", total samples = " << numberSamples);
result = false;
}

Expand Down Expand Up @@ -1451,7 +1451,7 @@ void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex,
}

if (m_Prior->checkInvariants("SPLIT: ") == false) {
LOG_ERROR(<< "# samples = " << numberSamples << ", # modes = " << modes.size()
LOG_DEBUG(<< "# samples = " << numberSamples << ", # modes = " << modes.size()
<< ", pLeft = " << pLeft << ", pRight = " << pRight);
}

Expand Down
11 changes: 6 additions & 5 deletions lib/maths/common/COneOfNPrior.cc
Original file line number Diff line number Diff line change
Expand Up @@ -461,9 +461,10 @@ void COneOfNPrior::addSamples(const TDouble1Vec& samples,
}

if (this->badWeights()) {
LOG_ERROR(<< "Update failed (" << this->debugWeights() << ")");
LOG_ERROR(<< "samples = " << samples);
LOG_ERROR(<< "weights = " << weights);
LOG_WARN(<< "Prior update failed due to numerical instability (recovered by resetting to non-informative prior): "
<< this->debugWeights());
LOG_TRACE(<< "samples = " << samples);
LOG_TRACE(<< "weights = " << weights);
this->setToNonInformative(this->offsetMargin(), this->decayRate());
}
}
Expand Down Expand Up @@ -957,8 +958,8 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati

if (!(lowerBound >= 0.0 && lowerBound <= 1.001) ||
!(upperBound >= 0.0 && upperBound <= 1.001)) {
LOG_ERROR(<< "Bad probability bounds = [" << lowerBound << ", " << upperBound << "]"
<< ", " << logWeights);
LOG_WARN(<< "Bad probability bounds = [" << lowerBound << ", " << upperBound << "]"
<< ", " << logWeights);
}

if (CMathsFuncs::isNan(lowerBound)) {
Expand Down
10 changes: 5 additions & 5 deletions lib/maths/common/CTools.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1010,9 +1010,9 @@ operator()(const gamma& gamma_, double x, maths_t::ETail& tail) const {
} else if (std::fabs(fb - fx) < 10.0 * EPSILON * fx) {
y[i % 2] = b;
} else {
LOG_ERROR(<< "Failed in bracketed solver: " << e.what()
<< ", x = " << x << ", bracket = (" << a << ", " << b << ")"
<< ", f(bracket) = (" << fa - fx << "," << fb - fx << ")");
LOG_WARN(<< "Failed in bracketed solver: " << e.what()
<< ", x = " << x << ", bracket = (" << a << ", " << b << ")"
<< ", f(bracket) = (" << fa - fx << "," << fb - fx << ")");
return truncate(px, 0.0, 1.0);
}
}
Expand Down Expand Up @@ -1238,8 +1238,8 @@ operator()(const beta& beta_, double x, maths_t::ETail& tail) const {
} else if (std::fabs(fBracket.second - fx) < 10.0 * EPSILON * fx) {
y[i % 2] = bracket.second;
} else {
LOG_ERROR(<< "Failed in bracketed solver: " << e.what() << ", x = " << x
<< ", bracket " << bracket << ", f(bracket) = " << fBracket);
LOG_WARN(<< "Failed in bracketed solver: " << e.what() << ", x = " << x
<< ", bracket " << bracket << ", f(bracket) = " << fBracket);
return 1.0;
}
}
Expand Down
27 changes: 26 additions & 1 deletion lib/maths/common/CXMeansOnline1d.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,25 @@
#include <utility>
#include <vector>

namespace {
bool isNumericalError(const std::exception& e) {
// Check exception type
if (dynamic_cast<const std::domain_error*>(&e) ||
dynamic_cast<const std::overflow_error*>(&e) ||
dynamic_cast<const std::underflow_error*>(&e)) {
return true;
}

// Check message content as fallback
std::string msg = e.what();
return msg.find("boost::math") != std::string::npos ||
msg.find("Scale parameter") != std::string::npos ||
msg.find("must be > 0") != std::string::npos ||
msg.find("-nan") != std::string::npos ||
msg.find("-inf") != std::string::npos;
}
} // anonymous namespace

namespace ml {
namespace maths {
namespace common {
Expand Down Expand Up @@ -465,7 +484,13 @@ void winsorise(const TDoubleDoublePr& interval, TTuple& category) {
CBasicStatistics::moment<0>(category) = wm;
CBasicStatistics::moment<1>(category) = std::max((n - 1.0) / n * wv, 0.0);
} catch (const std::exception& e) {
LOG_ERROR(<< "Bad category = " << category << ": " << e.what());
if (isNumericalError(e)) {
LOG_WARN(<< "Numerical error in category computation (recovered): " << category
<< " - " << e.what());
} else {
LOG_ERROR(<< "Unexpected error in category computation: " << category
<< " - " << e.what());
}
}
}

Expand Down
6 changes: 4 additions & 2 deletions lib/model/CBucketGatherer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,10 @@ bool CBucketGatherer::validateSampleTimes(core_t::TTime& startTime, core_t::TTim
}
for (/**/; startTime < endTime; startTime += this->bucketLength()) {
if (!this->dataAvailable(startTime)) {
LOG_ERROR(<< "No counts available at " << startTime
<< ", current bucket = " << this->printCurrentBucket());
LOG_WARN(<< "Skipping bucket due to missing counts at " << startTime << " (bucket window: ["
<< startTime << "," << startTime + this->bucketLength()
<< "], current bucket = " << this->printCurrentBucket()
<< ") - continuing to check other buckets");
continue;
}
return true;
Expand Down
2 changes: 1 addition & 1 deletion lib/model/CResourceMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ bool CResourceMonitor::isMemoryStable(core_t::TTime bucketLength) const {

// Sanity check
if (maths::common::CBasicStatistics::count(m_ModelBytesMoments) == 0.0) {
LOG_ERROR(<< "Programmatic error: checking memory stability before adding any measurements");
LOG_WARN(<< "Programmatic error: checking memory stability before adding any measurements");
return false;
}

Expand Down