diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 1d27595101..3470991fd9 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -198,10 +198,8 @@ int main(int argc, char** argv) { }()}; if (persistInterval >= 0 && persister == nullptr) { - LOG_FATAL(<< "Periodic persistence cannot be enabled using the " - "'persistInterval' argument " - "unless a place to persist to has been specified " - "using the 'persist' argument"); + LOG_FATAL(<< "Periodic persistence cannot be enabled using the 'persistInterval' argument " + "unless a place to persist to has been specified using the 'persist' argument"); return EXIT_FAILURE; } diff --git a/bin/categorize/Main.cc b/bin/categorize/Main.cc index 8bddee98e6..d39f0517cc 100644 --- a/bin/categorize/Main.cc +++ b/bin/categorize/Main.cc @@ -149,10 +149,8 @@ int main(int argc, char** argv) { }()}; if (persistInterval >= 0 && persister == nullptr) { - LOG_FATAL(<< "Periodic persistence cannot be enabled using the " - "'persistInterval' argument " - "unless a place to persist to has been specified " - "using the 'persist' argument"); + LOG_FATAL(<< "Periodic persistence cannot be enabled using the 'persistInterval' argument " + "unless a place to persist to has been specified using the 'persist' argument"); return EXIT_FAILURE; } using TBackgroundPersisterCUPtr = const std::unique_ptr; diff --git a/include/maths/CConstantPrior.h b/include/maths/CConstantPrior.h index 617814404c..2787a24178 100644 --- a/include/maths/CConstantPrior.h +++ b/include/maths/CConstantPrior.h @@ -74,17 +74,14 @@ class MATHS_EXPORT CConstantPrior : public CPrior { virtual bool needsOffset() const; //! No-op. - virtual double adjustOffset(const TWeightStyleVec& weightStyle, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Returns zero. virtual double offset() const; //! Set the constant if it hasn't been set. - virtual void addSamples(const TWeightStyleVec& weightStyle, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! No-op. virtual void propagateForwardsByTime(double time); @@ -96,27 +93,22 @@ class MATHS_EXPORT CConstantPrior : public CPrior { virtual double marginalLikelihoodMean() const; //! Returns constant or zero if unset (by equidistribution). - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! All confidence intervals are the point [constant, constant]. - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Returns a large value if all samples are equal to the constant //! and zero otherwise. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Get \p numberSamples times the constant. @@ -124,25 +116,22 @@ class MATHS_EXPORT CConstantPrior : public CPrior { //! A large number if any sample is less than the constant and //! zero otherwise. - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; //! A large number if any sample is larger than the constant and //! zero otherwise. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; //! Returns one if all samples equal the constant and one otherwise. virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CGammaRateConjugate.h b/include/maths/CGammaRateConjugate.h index cf760927de..60240d72f1 100644 --- a/include/maths/CGammaRateConjugate.h +++ b/include/maths/CGammaRateConjugate.h @@ -22,6 +22,7 @@ #include #include #include +#include #include namespace ml { @@ -142,15 +143,11 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! This samples the current marginal likelihood and uses these samples //! to reconstruct the prior with adjusted offset. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -158,14 +155,9 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! Update the prior with a collection of independent samples from the //! gamma variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -184,14 +176,11 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -203,29 +192,23 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the gamma rate. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -240,9 +223,6 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. For the //! count variance scale weight style the weight is interpreted as a scale @@ -269,9 +249,8 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and //! a value of infinity is not well handled. (Very large values are //! handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -281,9 +260,8 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -292,9 +270,6 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see CTools::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights. See minusLogJointCdf for discussion. //! \param[out] lowerBound Filled in with the probability of the set @@ -308,9 +283,8 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior { //! i.e. a value of zero is not well defined and a value of infinity //! is not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CLogNormalMeanPrecConjugate.h b/include/maths/CLogNormalMeanPrecConjugate.h index 7e0a0513b0..e9e64c6a67 100644 --- a/include/maths/CLogNormalMeanPrecConjugate.h +++ b/include/maths/CLogNormalMeanPrecConjugate.h @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -145,15 +146,11 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! This samples the current marginal likelihood and uses these samples //! to reconstruct the prior with adjusted offset. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -161,14 +158,9 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! Update the prior with a collection of independent samples from //! the log-normal variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -187,14 +179,11 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -206,30 +195,24 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the exponentiated normal mean //! and precision. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -244,9 +227,6 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. For the //! count variance scale weight style the weight is interpreted as a scale @@ -280,9 +260,8 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and //! a value of infinity is not well handled. The approximations we //! make are less good for \f$\gamma_i\f$ a long way from one. - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -292,9 +271,8 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -303,9 +281,6 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights. See minusLogJointCdf for discussion. //! \param[out] lowerBound Filled in with the probability of the set @@ -319,9 +294,8 @@ class MATHS_EXPORT CLogNormalMeanPrecConjugate : public CPrior { //! i.e. a value of zero is not well defined and a value of infinity //! is not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CModel.h b/include/maths/CModel.h index 8564b821b2..5364a3fb69 100644 --- a/include/maths/CModel.h +++ b/include/maths/CModel.h @@ -95,8 +95,7 @@ class MATHS_EXPORT CModelParams { class MATHS_EXPORT CModelAddSamplesParams { public: using TDouble2Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAryVec = std::vector; public: CModelAddSamplesParams(); @@ -118,20 +117,15 @@ class MATHS_EXPORT CModelAddSamplesParams { //! Get the model propagation interval. double propagationInterval() const; - //! Set the weight styles. - CModelAddSamplesParams& weightStyles(const maths_t::TWeightStyleVec& styles); - //! Get the weight styles. - const maths_t::TWeightStyleVec& weightStyles() const; - //! Set the trend samples weights. - CModelAddSamplesParams& trendWeights(const TDouble2Vec4VecVec& weights); + CModelAddSamplesParams& trendWeights(const TDouble2VecWeightsAryVec& weights); //! Get the trend sample weights. - const TDouble2Vec4VecVec& trendWeights() const; + const TDouble2VecWeightsAryVec& trendWeights() const; //! Set the prior samples weights. - CModelAddSamplesParams& priorWeights(const TDouble2Vec4VecVec& weights); + CModelAddSamplesParams& priorWeights(const TDouble2VecWeightsAryVec& weights); //! Get the prior sample weights. - const TDouble2Vec4VecVec& priorWeights() const; + const TDouble2VecWeightsAryVec& priorWeights() const; private: //! The data type. @@ -140,12 +134,10 @@ class MATHS_EXPORT CModelAddSamplesParams { bool m_IsNonNegative; //! The propagation interval. double m_PropagationInterval; - //! Controls the interpretation of the weights. - const maths_t::TWeightStyleVec* m_WeightStyles; //! The trend sample weights. - const TDouble2Vec4VecVec* m_TrendWeights; + const TDouble2VecWeightsAryVec* m_TrendWeights; //! The prior sample weights. - const TDouble2Vec4VecVec* m_PriorWeights; + const TDouble2VecWeightsAryVec* m_PriorWeights; }; //! \brief The extra parameters needed by CModel::probability. @@ -154,10 +146,9 @@ class MATHS_EXPORT CModelProbabilityParams { using TOptionalSize = boost::optional; using TBool2Vec = core::CSmallVector; using TBool2Vec1Vec = core::CSmallVector; - using TDouble2Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4Vec1Vec = core::CSmallVector; using TSize2Vec = core::CSmallVector; + using TDouble2VecWeightsAry = maths_t::TDouble2VecWeightsAry; + using TDouble2VecWeightsAry1Vec = maths_t::TDouble2VecWeightsAry1Vec; using TProbabilityCalculation2Vec = core::CSmallVector; @@ -188,19 +179,14 @@ class MATHS_EXPORT CModelProbabilityParams { //! Get whether the values' bucket is empty. const TBool2Vec1Vec& bucketEmpty() const; - //! Set the weight styles. - CModelProbabilityParams& weightStyles(const maths_t::TWeightStyleVec& styles); - //! Get the weight styles. - const maths_t::TWeightStyleVec& weightStyles() const; - //! Add a value's weights. - CModelProbabilityParams& addWeights(const TDouble2Vec4Vec& weights); + CModelProbabilityParams& addWeights(const TDouble2VecWeightsAry& weights); //! Set the values' weights. - CModelProbabilityParams& weights(const TDouble2Vec4Vec1Vec& weights); + CModelProbabilityParams& weights(const TDouble2VecWeightsAry1Vec& weights); //! Get the values' weights. - const TDouble2Vec4Vec1Vec& weights() const; + const TDouble2VecWeightsAry1Vec& weights() const; //! Get writable values' weights. - TDouble2Vec4Vec1Vec& weights(); + TDouble2VecWeightsAry1Vec& weights(); //! Add a coordinate for which to compute probability. CModelProbabilityParams& addCoordinate(std::size_t coordinate); @@ -226,10 +212,8 @@ class MATHS_EXPORT CModelProbabilityParams { double m_SeasonalConfidenceInterval; //! True if the bucket is empty and false otherwise. TBool2Vec1Vec m_BucketEmpty; - //! Controls the interpretation of the weights. - const maths_t::TWeightStyleVec* m_WeightStyles; //! The sample weights. - TDouble2Vec4Vec1Vec m_Weights; + TDouble2VecWeightsAry1Vec m_Weights; //! The coordinates for which to compute the probability. TSize2Vec m_Coordinates; //! The most anomalous coordinate (if there is one). @@ -259,8 +243,6 @@ class MATHS_EXPORT CModel { using TDouble10Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; using TDouble2Vec3Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4Vec1Vec = core::CSmallVector; using TSize1Vec = core::CSmallVector; using TSize2Vec = core::CSmallVector; using TSize2Vec1Vec = core::CSmallVector; @@ -270,6 +252,8 @@ class MATHS_EXPORT CModel { using TSizeDoublePr1Vec = core::CSmallVector; using TTimeDouble2VecSizeTr = core::CTriple; using TTimeDouble2VecSizeTrVec = std::vector; + using TDouble2VecWeightsAry = maths_t::TDouble2VecWeightsAry; + using TDouble2VecWeightsAry1Vec = maths_t::TDouble2VecWeightsAry1Vec; using TTail2Vec = core::CSmallVector; //! Possible statuses for updating a model. @@ -322,18 +306,15 @@ class MATHS_EXPORT CModel { //! Get the most likely value for the time series at \p time. virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const = 0; + const TDouble2VecWeightsAry& weights) const = 0; //! Get the most likely value for each correlate time series at //! \p time, if there are any. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec1Vec& weights) const = 0; + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const TDouble2VecWeightsAry1Vec& weights) const = 0; //! Get the local maxima of the residual distribution. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const = 0; + virtual TDouble2Vec1Vec residualModes(const TDouble2VecWeightsAry& weights) const = 0; //! Remove any trend components from \p value. virtual void detrend(const TTime2Vec1Vec& time, @@ -349,8 +330,7 @@ class MATHS_EXPORT CModel { //! confidence interval for the time series at \p time. virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const = 0; + const TDouble2VecWeightsAry& weights) const = 0; //! Forecast the time series and get its \p confidenceInterval //! percentage confidence interval between \p startTime and @@ -481,18 +461,14 @@ class MATHS_EXPORT CModelStub : public CModel { virtual void skipTime(core_t::TTime gap); //! Returns empty. - virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec mode(core_t::TTime time, const TDouble2VecWeightsAry& weights) const; //! Returns empty. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec1Vec& weights) const; + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const TDouble2VecWeightsAry1Vec& weights) const; //! Returns empty. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec1Vec residualModes(const TDouble2VecWeightsAry& weights) const; //! No-op. virtual void detrend(const TTime2Vec1Vec& time, @@ -507,8 +483,8 @@ class MATHS_EXPORT CModelStub : public CModel { //! Returns empty. virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + const TDouble2VecWeightsAry& weights) const; + //! Returns empty. virtual bool forecast(core_t::TTime startTime, core_t::TTime endTime, diff --git a/include/maths/CMultimodalPrior.h b/include/maths/CMultimodalPrior.h index 4091e82e54..fd23243a5d 100644 --- a/include/maths/CMultimodalPrior.h +++ b/include/maths/CMultimodalPrior.h @@ -134,9 +134,8 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! Forward the offset to the mode priors. //! //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -144,14 +143,9 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! Update the prior with a collection of independent samples from //! the variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -172,19 +166,15 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { virtual double nearestMarginalLikelihoodMean(double value) const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the local maxima of the marginal likelihood function. virtual TDouble1Vec - marginalLikelihoodModes(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodModes(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -196,30 +186,24 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the mode parameters and summing //! over modes. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -234,9 +218,6 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ @@ -247,9 +228,8 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and //! a value of infinity is not well handled. (Very large values are //! handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -259,9 +239,8 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -270,9 +249,6 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with the probability of the set @@ -286,9 +262,8 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { //! i.e. a value of zero is not well defined and a value of infinity is //! not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; @@ -330,9 +305,6 @@ class MATHS_EXPORT CMultimodalPrior : public CPrior { bool checkInvariants(const std::string& tag = std::string()) const; private: - using TDouble1VecVec = std::vector; - using TDouble4Vec1VecVec = std::vector; - //! The callback invoked when a mode is split. class MATHS_EXPORT CModeSplitCallback { public: diff --git a/include/maths/CMultimodalPriorUtils.h b/include/maths/CMultimodalPriorUtils.h index 87f37e4bcc..489c57c41a 100644 --- a/include/maths/CMultimodalPriorUtils.h +++ b/include/maths/CMultimodalPriorUtils.h @@ -51,29 +51,29 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { using TDoubleDoublePr = std::pair; using TDoubleVec = std::vector; using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; + using TDoubleWeightsAry = maths_t::TDoubleWeightsAry; + using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; - using TWeights = CConstantWeights; + using TWeights = maths_t::CUnitWeights; //! Get the mode of the marginal likelihood function. template static TDoubleDoublePr marginalLikelihoodSupport(const std::vector>& modes) { if (modes.size() == 0) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } if (modes.size() == 1) { return modes[0].s_Prior->marginalLikelihoodSupport(); } - TDoubleDoublePr result(boost::numeric::bounds::highest(), - boost::numeric::bounds::lowest()); + TDoubleDoublePr result{boost::numeric::bounds::highest(), + boost::numeric::bounds::lowest()}; // We define this is as the union of the mode supports. - for (std::size_t i = 0u; i < modes.size(); ++i) { - TDoubleDoublePr s = modes[i].s_Prior->marginalLikelihoodSupport(); + for (const auto& mode : modes) { + TDoubleDoublePr s = mode.s_Prior->marginalLikelihoodSupport(); result.first = std::min(result.first, s.first); result.second = std::max(result.second, s.second); } @@ -108,43 +108,34 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! Get the mode of the marginal likelihood function. template static double marginalLikelihoodMode(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) { + const TDoubleWeightsAry& weights) { if (modes.size() == 0) { return 0.0; } if (modes.size() == 1) { - return modes[0].s_Prior->marginalLikelihoodMode(weightStyles, weights); + return modes[0].s_Prior->marginalLikelihoodMode(weights); } - using TMaxAccumulator = - CBasicStatistics::COrderStatisticsStack>; + using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; // We'll approximate this as the maximum likelihood mode (mode). double result = 0.0; - double seasonalScale = 1.0; - double countVarianceScale = 1.0; - try { - seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights)); - countVarianceScale = maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale " << e.what()); - } + double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weights)); + double countVarianceScale = maths_t::countVarianceScale(weights); // Declared outside the loop to minimize number of times they // are created. TDouble1Vec mode(1); - TDouble4Vec1Vec weight(1, TDouble4Vec(1, countVarianceScale)); + TDoubleWeightsAry1Vec weight{maths_t::countVarianceScaleWeight(countVarianceScale)}; TMaxAccumulator maxLikelihood; for (std::size_t i = 0u; i < modes.size(); ++i) { double w = modes[i].weight(); const T& prior = modes[i].s_Prior; - mode[0] = prior->marginalLikelihoodMode(TWeights::COUNT_VARIANCE, weight[0]); + mode[0] = prior->marginalLikelihoodMode(weight[0]); double likelihood; - if (prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, - mode, weight, likelihood) & + if (prior->jointLogMarginalLikelihood(mode, weight, likelihood) & (maths_t::E_FpFailed | maths_t::E_FpOverflowed)) { continue; } @@ -153,7 +144,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } } - if (maths_t::hasSeasonalVarianceScale(weightStyles, weights)) { + if (maths_t::hasSeasonalVarianceScale(weights)) { double mean = marginalLikelihoodMean(modes); result = mean + seasonalScale * (result - mean); } @@ -165,13 +156,12 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { template static double marginalLikelihoodVariance(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) { + const TDoubleWeightsAry& weights) { if (modes.size() == 0) { return boost::numeric::bounds::highest(); } if (modes.size() == 1) { - return modes[0].s_Prior->marginalLikelihoodVariance(weightStyles, weights); + return modes[0].s_Prior->marginalLikelihoodVariance(weights); } // By linearity we have that: @@ -179,14 +169,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { // = Sum_i{ w(i) * (Integral{ x^2 * f(x | i) } - m^2) } // = Sum_i{ w(i) * ((mi^2 + vi) - m^2) } - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale " << e.what()); - } - + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double mean = marginalLikelihoodMean(modes); TMeanAccumulator result; @@ -214,8 +198,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { marginalLikelihoodConfidenceInterval(const PRIOR& prior, const std::vector& modes, double percentage, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) { + const TDoubleWeightsAry& weights) { TDoubleDoublePr support = marginalLikelihoodSupport(modes); if (isNonInformative(modes)) { @@ -223,8 +206,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } if (modes.size() == 1) { - return modes[0].s_Prior->marginalLikelihoodConfidenceInterval( - percentage, weightStyles, weights); + return modes[0].s_Prior->marginalLikelihoodConfidenceInterval(percentage, weights); } percentage /= 100.0; @@ -236,8 +218,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { double p1 = std::log((1.0 - percentage) / 2.0); double p2 = std::log((1.0 + percentage) / 2.0); - CLogCdf fl(CLogCdf::E_Lower, prior, weightStyles, weights); - CLogCdf fu(CLogCdf::E_Upper, prior, weightStyles, weights); + CLogCdf fl(CLogCdf::E_Lower, prior, weights); + CLogCdf fu(CLogCdf::E_Upper, prior, weights); CCompositeFunctions::CMinusConstant&> f1(fl, p1); CCompositeFunctions::CMinusConstant&> f2(fu, p2); @@ -247,7 +229,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { TDoubleDoublePr result; - double x0 = marginalLikelihoodMode(modes, weightStyles, weights); + double x0 = marginalLikelihoodMode(modes, weights); try { double f10 = f1(x0); @@ -318,9 +300,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { template static maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& result) { // The likelihood can be computed from the conditional likelihood // that a sample is from each mode. In particular, the likelihood @@ -359,20 +340,21 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { TSizeDoublePr5Vec modeLogLikelihoods; modeLogLikelihoods.reserve(modes.size()); - double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) + double mean = maths_t::hasSeasonalVarianceScale(weights) ? marginalLikelihoodMean(modes) : 0.0; - TDouble4Vec1Vec weight(1, TDouble4Vec(1, 1.0)); + TDoubleWeightsAry1Vec weight{TWeights::UNIT}; try { for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double seasonalScale = std::sqrt( - maths_t::seasonalVarianceScale(weightStyles, weights[i])); + double n = maths_t::countForUpdate(weights[i]); + double seasonalScale = + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); double logSeasonalScale = seasonalScale != 1.0 ? std::log(seasonalScale) : 0.0; sample[0] = mean + (samples[i] - mean) / seasonalScale; - weight[0][0] = maths_t::countVarianceScale(weightStyles, weights[i]); + maths_t::setCountVarianceScale( + maths_t::countVarianceScale(weights[i]), weight[0]); // We re-normalize so that the maximum log likelihood is one // to avoid underflow. @@ -383,13 +365,13 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { double modeLogLikelihood; maths_t::EFloatingPointErrorStatus status = modes[j].s_Prior->jointLogMarginalLikelihood( - TWeights::COUNT_VARIANCE, sample, weight, modeLogLikelihood); + sample, weight, modeLogLikelihood); if (status & maths_t::E_FpFailed) { // Logging handled at a lower level. return status; } if (!(status & maths_t::E_FpOverflowed)) { - modeLogLikelihoods.push_back(std::make_pair(j, modeLogLikelihood)); + modeLogLikelihoods.emplace_back(j, modeLogLikelihood); maxLogLikelihood = std::max(maxLogLikelihood, modeLogLikelihood); } } @@ -498,13 +480,12 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! variable. template static bool minusLogJointCdf(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) { - return minusLogJointCdf(modes, CMinusLogJointCdf(), weightStyles, - samples, weights, lowerBound, upperBound); + return minusLogJointCdf(modes, CMinusLogJointCdf(), samples, weights, + lowerBound, upperBound); } //! Compute minus the log of the one minus the joint c.d.f. of the @@ -512,15 +493,13 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { //! to cancellation errors at one, i.e. the smallest non-zero value //! this can return is the minimum double rather than epsilon. template - static bool - minusLogJointCdfComplement(const std::vector>& modes, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, - double& lowerBound, - double& upperBound) { - return minusLogJointCdf(modes, CMinusLogJointCdfComplement(), weightStyles, - samples, weights, lowerBound, upperBound); + static bool minusLogJointCdfComplement(const std::vector>& modes, + const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, + double& lowerBound, + double& upperBound) { + return minusLogJointCdf(modes, CMinusLogJointCdfComplement(), samples, + weights, lowerBound, upperBound); } //! Calculate the joint probability of seeing a lower likelihood @@ -530,9 +509,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { static bool probabilityOfLessLikelySamples(const PRIOR& prior, const std::vector& modes, maths_t::EProbabilityCalculation calculation, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) { @@ -543,14 +521,13 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { LOG_ERROR(<< "Can't compute distribution for empty sample set"); return false; } - if (isNonInformative(modes)) { return true; } if (modes.size() == 1) { return modes[0].s_Prior->probabilityOfLessLikelySamples( - calculation, weightStyles, samples, weights, lowerBound, upperBound, tail); + calculation, samples, weights, lowerBound, upperBound, tail); } // Ideally we'd find the probability of the set of samples whose @@ -595,7 +572,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { switch (calculation) { case maths_t::E_OneSidedBelow: - if (!minusLogJointCdf(modes, weightStyles, samples, weights, upperBound, lowerBound)) { + if (!minusLogJointCdf(modes, samples, weights, upperBound, lowerBound)) { LOG_ERROR(<< "Failed computing probability of less likely samples: " << core::CContainerPrinter::print(samples)); return false; @@ -616,7 +593,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { support.first = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.first; support.second = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.second; - double mean = marginalLikelihoodMean(modes); + bool hasSeasonalScale = maths_t::hasSeasonalVarianceScale(weights); + double mean = hasSeasonalScale ? marginalLikelihoodMean(modes) : 0.0; double a = boost::numeric::bounds::highest(); double b = boost::numeric::bounds::lowest(); @@ -631,29 +609,23 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { b = CTools::truncate(b, support.first, support.second); LOG_TRACE(<< "a = " << a << ", b = " << b << ", Z = " << Z); - std::size_t svi = static_cast( - std::find(weightStyles.begin(), weightStyles.end(), - maths_t::E_SampleSeasonalVarianceScaleWeight) - - weightStyles.begin()); - // Declared outside the loop to minimize the number of times - // they are created. - TDouble4Vec1Vec weight(1); - TDouble1Vec wt(1); + // it is created. + TDoubleWeightsAry1Vec weight(1); int tail_ = 0; for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; weight[0] = weights[i]; - - if (svi < weight.size()) { - x = mean + (x - mean) / std::sqrt(weights[i][svi]); - weight[0][svi] = 1.0; + if (hasSeasonalScale) { + x = mean + (x - mean) / + std::sqrt(maths_t::seasonalVarianceScale(weight[0])); + maths_t::setSeasonalVarianceScale(1.0, weight[0]); } double fx; maths_t::EFloatingPointErrorStatus status = - jointLogMarginalLikelihood(modes, weightStyles, {x}, weight, fx); + jointLogMarginalLikelihood(modes, {x}, weight, fx); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Unable to compute likelihood for " << x); return false; @@ -664,16 +636,15 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } LOG_TRACE(<< "x = " << x << ", f(x) = " << fx); - CPrior::CLogMarginalLikelihood logLikelihood(prior, weightStyles, weight); + CPrior::CLogMarginalLikelihood logLikelihood(prior, weight); CTools::CMixtureProbabilityOfLessLikelySample calculator( modes.size(), x, fx, a, b); for (const auto& mode : modes) { double w = mode.weight() / Z; - double centre = mode.s_Prior->marginalLikelihoodMode( - weightStyles, weight[0]); - double spread = std::sqrt(mode.s_Prior->marginalLikelihoodVariance( - weightStyles, weight[0])); + double centre = mode.s_Prior->marginalLikelihoodMode(weight[0]); + double spread = std::sqrt( + mode.s_Prior->marginalLikelihoodVariance(weight[0])); calculator.addMode(w, centre, spread); tail_ = tail_ | (x < centre ? maths_t::E_LeftTail : maths_t::E_RightTail); } @@ -683,31 +654,27 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { double lb, ub; - double l; + double xl; CEqualWithTolerance lequal( CToleranceTypes::E_AbsoluteTolerance, EPS * a); - if (calculator.leftTail(logLikelihood, MAX_ITERATIONS, lequal, l)) { - wt[0] = l; - minusLogJointCdf(modes, weightStyles, wt, weight, lb, ub); + if (calculator.leftTail(logLikelihood, MAX_ITERATIONS, lequal, xl)) { + minusLogJointCdf(modes, {xl}, weight, lb, ub); sampleLowerBound += std::exp(std::min(-lb, -ub)); sampleUpperBound += std::exp(std::max(-lb, -ub)); } else { - wt[0] = l; - minusLogJointCdf(modes, weightStyles, wt, weight, lb, ub); + minusLogJointCdf(modes, {xl}, weight, lb, ub); sampleUpperBound += std::exp(std::max(-lb, -ub)); } - double r; + double xr; CEqualWithTolerance requal( CToleranceTypes::E_AbsoluteTolerance, EPS * b); - if (calculator.rightTail(logLikelihood, MAX_ITERATIONS, requal, r)) { - wt[0] = r; - minusLogJointCdfComplement(modes, weightStyles, wt, weight, lb, ub); + if (calculator.rightTail(logLikelihood, MAX_ITERATIONS, requal, xr)) { + minusLogJointCdfComplement(modes, {xr}, weight, lb, ub); sampleLowerBound += std::exp(std::min(-lb, -ub)); sampleUpperBound += std::exp(std::max(-lb, -ub)); } else { - wt[0] = r; - minusLogJointCdfComplement(modes, weightStyles, wt, weight, lb, ub); + minusLogJointCdfComplement(modes, {xr}, weight, lb, ub); sampleUpperBound += std::exp(std::max(-lb, -ub)); } @@ -733,8 +700,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } break; case maths_t::E_OneSidedAbove: - if (!minusLogJointCdfComplement(modes, weightStyles, samples, - weights, upperBound, lowerBound)) { + if (!minusLogJointCdfComplement(modes, samples, weights, upperBound, lowerBound)) { LOG_ERROR(<< "Failed computing probability of less likely samples: " << core::CContainerPrinter::print(samples)); return false; @@ -785,13 +751,11 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { public: template bool operator()(const T& prior, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { - return prior->minusLogJointCdf(weightStyles, samples, weights, - lowerBound, upperBound); + return prior->minusLogJointCdf(samples, weights, lowerBound, upperBound); } }; @@ -800,13 +764,11 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { public: template bool operator()(const T& prior, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { - return prior->minusLogJointCdfComplement(weightStyles, samples, weights, - lowerBound, upperBound); + return prior->minusLogJointCdfComplement(samples, weights, lowerBound, upperBound); } }; @@ -820,18 +782,13 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { enum EStyle { E_Lower, E_Upper, E_Mean }; public: - CLogCdf(EStyle style, - const PRIOR& prior, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) - : m_Style(style), m_Prior(&prior), m_WeightStyles(&weightStyles), - m_Weights(1, weights), m_X(1u, 0.0) {} + CLogCdf(EStyle style, const PRIOR& prior, const TDoubleWeightsAry& weights) + : m_Style(style), m_Prior(&prior), m_Weights(1, weights), m_X(1u, 0.0) {} double operator()(double x) const { m_X[0] = x; double lowerBound, upperBound; - if (!m_Prior->minusLogJointCdf(*m_WeightStyles, m_X, m_Weights, - lowerBound, upperBound)) { + if (!m_Prior->minusLogJointCdf(m_X, m_Weights, lowerBound, upperBound)) { throw std::runtime_error("Unable to compute c.d.f. at " + core::CStringUtils::typeToString(x)); } @@ -849,8 +806,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { private: EStyle m_Style; const PRIOR* m_Prior; - const maths_t::TWeightStyleVec* m_WeightStyles; - TDouble4Vec1Vec m_Weights; + TDoubleWeightsAry1Vec m_Weights; //! Avoids creating the vector argument to minusLogJointCdf //! more than once. mutable TDouble1Vec m_X; @@ -862,9 +818,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { template static bool minusLogJointCdf(const std::vector>& modes, CDF minusLogCdf, - const maths_t::TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) { lowerBound = upperBound = 0.0; @@ -875,8 +830,7 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { } if (modes.size() == 1) { - return minusLogCdf(modes[0].s_Prior, weightStyles, samples, weights, - lowerBound, upperBound); + return minusLogCdf(modes[0].s_Prior, samples, weights, lowerBound, upperBound); } using TMinAccumulator = CBasicStatistics::COrderStatisticsStack; @@ -890,23 +844,21 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { // Declared outside the loop to minimize the number of times // they are created. TDouble1Vec sample(1); - TDouble4Vec1Vec weight(1, TDouble4Vec(1, 1.0)); - TDouble4Vec modeLowerBounds; - TDouble4Vec modeUpperBounds; + TDoubleWeightsAry1Vec weight{TWeights::UNIT}; + TDoubleVec modeLowerBounds; + TDoubleVec modeUpperBounds; modeLowerBounds.reserve(modes.size()); modeUpperBounds.reserve(modes.size()); try { - double mean = maths_t::hasSeasonalVarianceScale(weightStyles, weights) + double mean = maths_t::hasSeasonalVarianceScale(weights) ? marginalLikelihoodMean(modes) : 0.0; for (std::size_t i = 0; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); - double seasonalScale = std::sqrt( - maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double seasonalScale = + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); if (isNonInformative(modes)) { lowerBound -= n * std::log(CTools::IMPROPER_CDF); @@ -914,9 +866,9 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { continue; } - sample[0] = seasonalScale != 1.0 ? mean + (samples[i] - mean) / seasonalScale - : samples[i]; - weight[0][0] = countVarianceScale; + sample[0] = mean + (samples[i] - mean) / seasonalScale; + maths_t::setCountVarianceScale( + maths_t::countVarianceScale(weights[i]), weight[0]); // We re-normalize so that the maximum log c.d.f. is one // to avoid underflow. @@ -928,8 +880,8 @@ class MATHS_EXPORT CMultimodalPriorUtils : private core::CNonInstantiatable { for (std::size_t j = 0u; j < modes.size(); ++j) { double modeLowerBound; double modeUpperBound; - if (!minusLogCdf(modes[j].s_Prior, TWeights::COUNT_VARIANCE, sample, - weight, modeLowerBound, modeUpperBound)) { + if (!minusLogCdf(modes[j].s_Prior, sample, weight, + modeLowerBound, modeUpperBound)) { LOG_ERROR(<< "Unable to compute c.d.f. for " << core::CContainerPrinter::print(samples)); return false; diff --git a/include/maths/CMultinomialConjugate.h b/include/maths/CMultinomialConjugate.h index 6137c6a2ec..008058b78d 100644 --- a/include/maths/CMultinomialConjugate.h +++ b/include/maths/CMultinomialConjugate.h @@ -107,9 +107,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { virtual bool needsOffset() const; //! No-op. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Returns zero. virtual double offset() const; @@ -117,14 +116,9 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! Update the prior with a collection of independent samples from the //! multinomial variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -143,14 +137,11 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -163,20 +154,15 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! distribution is discrete we can only approximate the probability. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Ignored. //! \param[in] weights Ignored. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the category probability parameters. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the log likelihood of \p samples. @@ -186,9 +172,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -205,9 +190,6 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! Compute minus the log of the joint cumulative density function //! of the marginal likelihood at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. Note variance scales are ignored. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound If the model has not overflowed this is @@ -219,9 +201,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! the c.d.f. and \f$\{x_i\}\f$ are the samples. Otherwise, it is //! filled in with a sharp upper bound. //! \note The samples are assumed to be independent. - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -232,9 +213,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -243,9 +223,6 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. Note variance scales are ignored. //! \param[in] samples The samples of interest. //! \param[in] weights The weights. See minusLogJointCdf for discussion. //! \param[out] lowerBound If the model has not overflowed this is filled @@ -260,9 +237,8 @@ class MATHS_EXPORT CMultinomialConjugate : public CPrior { //! are in or neither. //! \note The samples are assumed to be independent. virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CMultivariateConstantPrior.h b/include/maths/CMultivariateConstantPrior.h index 95273fe32c..200268e5a1 100644 --- a/include/maths/CMultivariateConstantPrior.h +++ b/include/maths/CMultivariateConstantPrior.h @@ -73,14 +73,12 @@ class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior { virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); //! No-op. - virtual void adjustOffset(const TWeightStyleVec& weightStyle, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights); + virtual void adjustOffset(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights); //! Set the constant if it hasn't been set. - virtual void addSamples(const TWeightStyleVec& weightStyle, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights); + virtual void addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights); //! No-op. virtual void propagateForwardsByTime(double time); @@ -100,8 +98,7 @@ class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior { virtual TDouble10Vec marginalLikelihoodMean() const; //! Returns constant or zero if unset (by equidistribution). - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const; + virtual TDouble10Vec marginalLikelihoodMode(const TDouble10VecWeightsAry& weights) const; //! Get the covariance matrix of the marginal likelihood. virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const; @@ -112,9 +109,8 @@ class MATHS_EXPORT CMultivariateConstantPrior : public CMultivariatePrior { //! Returns a large value if all samples are equal to the constant //! and zero otherwise. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const; //! Get \p numberSamples times the constant. diff --git a/include/maths/CMultivariateMultimodalPrior.h b/include/maths/CMultivariateMultimodalPrior.h index a36cf02a54..6d12941b11 100644 --- a/include/maths/CMultivariateMultimodalPrior.h +++ b/include/maths/CMultivariateMultimodalPrior.h @@ -61,7 +61,7 @@ using TSizeDoublePr = std::pair; using TSizeDoublePr3Vec = core::CSmallVector; using TPriorPtr = std::shared_ptr; using TDouble10Vec1Vec = CMultivariatePrior::TDouble10Vec1Vec; -using TDouble10Vec4Vec1Vec = CMultivariatePrior::TDouble10Vec4Vec1Vec; +using TDouble10VecWeightsAry1Vec = CMultivariatePrior::TDouble10VecWeightsAry1Vec; using TMode = SMultimodalPriorMode>; using TModeVec = std::vector; @@ -69,9 +69,8 @@ using TModeVec = std::vector; MATHS_EXPORT maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& modes, - const maths_t::TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& sample, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, TSizeDoublePr3Vec& modeLogLikelihoods, double& result); @@ -146,7 +145,6 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { using TClusterer = CClusterer; using TClustererPtr = std::shared_ptr; using TPriorPtrVec = std::vector; - using TWeights = CConstantWeights; // Lift all overloads of into scope. //{ @@ -283,34 +281,26 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! For priors with non-negative support this adjusts the offset used //! to extend the support to handle negative samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. - virtual void adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) { + virtual void adjustOffset(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) { // This has to adjust offsets for its modes because it must be // possible to call jointLogMarginalLikelihood before the samples // have been added to the prior in order for model selection to // work. for (const auto& mode : m_Modes) { - mode.s_Prior->adjustOffset(weightStyles, samples, weights); + mode.s_Prior->adjustOffset(samples, weights); } } //! Update the prior with a collection of independent samples from the //! process. //! - //! \param[in] weightStyles_ Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles_, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) { + virtual void addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) { if (samples.empty()) { return; } @@ -324,27 +314,13 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { // Declared outside the loop to minimize the number of times it // is initialized. - TWeightStyleVec weightStyles(weightStyles_); TDouble10Vec1Vec sample(1); - TDouble10Vec4Vec1Vec weight(1); + TDouble10VecWeightsAry1Vec weight{TWeights::unit(N)}; TSizeDoublePr2Vec clusters; - std::size_t indices[maths_t::NUMBER_WEIGHT_STYLES]; - std::size_t missing = weightStyles.size() + 1; - std::fill_n(indices, maths_t::NUMBER_WEIGHT_STYLES, missing); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - indices[weightStyles[i]] = i; - } - std::size_t seasonal = indices[maths_t::E_SampleSeasonalVarianceScaleWeight]; - std::size_t count = indices[maths_t::E_SampleCountWeight]; - std::size_t winsorisation = indices[maths_t::E_SampleWinsorisationWeight]; - if (count == missing) { - count = weightStyles.size(); - weightStyles.push_back(maths_t::E_SampleCountWeight); - } - try { - bool hasSeasonalScale = !this->isNonInformative() && seasonal != missing; + bool hasSeasonalScale = !this->isNonInformative() && + maths_t::hasSeasonalVarianceScale(weights); TPoint mean = hasSeasonalScale ? this->mean() : TPoint(0.0); @@ -356,19 +332,16 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } if (hasSeasonalScale) { - TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale( - N, weightStyles_, weights[i]))); + TPoint seasonalScale = + sqrt(TPoint(maths_t::seasonalVarianceScale(weights[i]))); x = mean + (x - mean) / seasonalScale; } sample[0] = x.template toVector(); weight[0] = weights[i]; - weight[0].resize(weightStyles.size(), TDouble10Vec(N, 1.0)); - if (seasonal != missing) { - weight[0][seasonal].assign(N, 1.0); - } + maths_t::setSeasonalVarianceScale(1.0, N, weight[0]); - double smallestCountWeight = this->smallest(weight[0][count]); + double smallestCountWeight = this->smallest(maths_t::count(weight[0])); clusters.clear(); m_Clusterer->add(x, clusters, smallestCountWeight); @@ -386,16 +359,17 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { m_Modes.emplace_back(cluster.first, m_SeedPrior); k = m_Modes.end() - 1; } - weight[0][count].assign(N, cluster.second); - if (winsorisation != missing) { - TDouble10Vec& ww = weight[0][winsorisation]; + maths_t::setCount(cluster.second, N, weight[0]); + if (maths_t::isWinsorised(weight)) { + TDouble10Vec ww = maths_t::winsorisationWeight(weight[0]); double f = (k->weight() + cluster.second) / Z; for (auto& w : ww) { w = std::max(1.0 - (1.0 - w) / f, w * f); } + maths_t::setWinsorisationWeight(ww, weight[0]); } - k->s_Prior->addSamples(weightStyles, sample, weight); - n += this->smallest(maths_t::countForUpdate(N, weightStyles, weight[0])); + k->s_Prior->addSamples(sample, weight); + n += this->smallest(maths_t::countForUpdate(weight[0])); } this->addSamples(n); } @@ -410,7 +384,6 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { LOG_ERROR(<< "Bad propagation time " << time); return; } - if (this->isNonInformative()) { // Nothing to be done. return; @@ -449,6 +422,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! is univariate. virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + std::size_t n = m_Modes.size(); CMultimodalPrior::TPriorPtrVec modes; @@ -498,6 +472,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! is univariate. virtual TPriorPtrDoublePr bivariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + if (N == 2) { return TPriorPtrDoublePr(TPriorPtr(this->clone()), 0.0); } @@ -538,6 +513,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! Get the support for the marginal likelihood function. virtual TDouble10VecDouble10VecPr marginalLikelihoodSupport() const { + if (m_Modes.size() == 0) { return {TPoint::smallest().template toVector(), TPoint::largest().template toVector()}; @@ -568,13 +544,13 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { if (m_Modes.size() == 1) { return m_Modes[0].s_Prior->marginalLikelihoodMean(); } - return this->mean().template toVector(); } //! Get the nearest mean of the multimodal prior marginal likelihood, //! otherwise the marginal likelihood mean. virtual TDouble10Vec nearestMarginalLikelihoodMean(const TDouble10Vec& value_) const { + if (m_Modes.empty()) { return TDouble10Vec(N, 0.0); } @@ -599,13 +575,13 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weight) const { + virtual TDouble10Vec marginalLikelihoodMode(const TDouble10VecWeightsAry& weight) const { + if (m_Modes.size() == 0) { return TDouble10Vec(N, 0.0); } if (m_Modes.size() == 1) { - return m_Modes[0].s_Prior->marginalLikelihoodMode(weightStyles, weight); + return m_Modes[0].s_Prior->marginalLikelihoodMode(weight); } using TMaxAccumulator = @@ -614,15 +590,9 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { // We'll approximate this as the mode with the maximum likelihood. TPoint result(0.0); - TPoint seasonalScale(1.0); - TDouble10Vec4Vec1Vec weight_(1, TDouble10Vec4Vec(1)); - try { - seasonalScale = - sqrt(TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weight))); - weight_[0][0] = maths_t::countVarianceScale(N, weightStyles, weight); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale " << e.what()); - } + TPoint seasonalScale = sqrt(TPoint(maths_t::seasonalVarianceScale(weight))); + TDouble10VecWeightsAry1Vec weight_{TWeights::unit(N)}; + maths_t::setCountVarianceScale(maths_t::countVarianceScale(weight), weight_[0]); // Declared outside the loop to minimize number of times it is created. TDouble10Vec1Vec mode(1); @@ -631,10 +601,9 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { for (const auto& mode_ : m_Modes) { double w = mode_.weight(); const TPriorPtr& prior = mode_.s_Prior; - mode[0] = prior->marginalLikelihoodMode(TWeights::COUNT_VARIANCE, weight_[0]); + mode[0] = prior->marginalLikelihoodMode(weight_[0]); double likelihood; - if (prior->jointLogMarginalLikelihood(TWeights::COUNT_VARIANCE, - mode, weight_, likelihood) & + if (prior->jointLogMarginalLikelihood(mode, weight_, likelihood) & maths_t::E_FpAllErrors) { continue; } @@ -649,12 +618,11 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } //! Get the local maxima of the marginal likelihood functions. - TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const { + TDouble10Vec1Vec marginalLikelihoodModes(const TDouble10VecWeightsAry& weights) const { TDouble10Vec1Vec result; result.reserve(m_Modes.size()); for (const auto& mode : m_Modes) { - result.push_back(mode.s_Prior->marginalLikelihoodMode(weightStyles, weights)); + result.push_back(mode.s_Prior->marginalLikelihoodMode(weights)); } return result; } @@ -684,17 +652,14 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! Calculate the log marginal likelihood function, integrating over the //! prior density function. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const { + result = 0.0; if (samples.empty()) { @@ -704,7 +669,6 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { if (!this->check(samples, weights)) { return maths_t::E_FpFailed; } - if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because @@ -721,8 +685,8 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { if (m_Modes.size() == 1) { // Apply a small penalty to kill off this model if the data are // single mode. - maths_t::EFloatingPointErrorStatus status = m_Modes[0].s_Prior->jointLogMarginalLikelihood( - weightStyles, samples, weights, result); + maths_t::EFloatingPointErrorStatus status = + m_Modes[0].s_Prior->jointLogMarginalLikelihood(samples, weights, result); result -= 10.0 * this->decayRate(); return status; } @@ -736,32 +700,29 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { detail::TSizeDoublePr3Vec modeLogLikelihoods; modeLogLikelihoods.reserve(m_Modes.size()); - bool hasSeasonalScale = maths_t::hasSeasonalVarianceScale(weightStyles, weights); + TPoint mean = maths_t::hasSeasonalVarianceScale(weights) ? this->mean() + : TPoint(0.0); - TPoint mean = hasSeasonalScale ? this->mean() : TPoint(0.0); - TDouble10Vec4Vec1Vec weights_(1, TDouble10Vec4Vec(1, TDouble10Vec(N, 1.0))); + TDouble10VecWeightsAry1Vec weight{TWeights::unit(N)}; try { for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = this->smallest( - maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint seasonalScale = sqrt(TPoint( - maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); + double n = this->smallest(maths_t::countForUpdate(weights[i])); + TPoint seasonalScale = + sqrt(TPoint(maths_t::seasonalVarianceScale(weights[i]))); double logSeasonalScale = 0.0; for (std::size_t j = 0u; j < seasonalScale.dimension(); ++j) { logSeasonalScale += std::log(seasonalScale(j)); } TPoint x(samples[i]); - if (hasSeasonalScale) { - x = mean + (x - mean) / seasonalScale; - } + x = mean + (x - mean) / seasonalScale; sample[0] = x.template toVector(); - weights_[0][0] = maths_t::countVarianceScale(N, weightStyles, weights[i]); + maths_t::setCountVarianceScale( + maths_t::countVarianceScale(weights[i]), weight[0]); double sampleLogLikelihood; maths_t::EFloatingPointErrorStatus status = detail::jointLogMarginalLikelihood( - m_Modes, TWeights::COUNT_VARIANCE, sample, weights_, - modeLogLikelihoods, sampleLogLikelihood); + m_Modes, sample, weight, modeLogLikelihoods, sampleLogLikelihood); if (status & maths_t::E_FpOverflowed) { result = boost::numeric::bounds::lowest(); return status; @@ -930,6 +891,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { void operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t rightSplitIndex) const { + LOG_TRACE(<< "Splitting mode with index " << sourceIndex); TModeVec& modes = m_Prior->m_Modes; @@ -961,24 +923,22 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - double nl = pLeft * numberSamples; - double ns = std::min(nl, static_cast(N + 2)); - double s = static_cast(samples.size()); - LOG_TRACE(<< "# left = " << nl); + double wl = pLeft * numberSamples; + double ws = std::min(wl, static_cast(N + 2)); + double n = static_cast(samples.size()); + LOG_TRACE(<< "# left = " << wl); TDouble10Vec1Vec samples_; samples_.reserve(samples.size()); for (const auto& sample : samples) { samples_.push_back(sample.template toVector()); } - TDouble10Vec seedWeight(N, ns / s); - TDouble10Vec4Vec1Vec weights(samples_.size(), TDouble10Vec4Vec(1, seedWeight)); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); - double weight = (nl - ns) / s; - if (weight > 0.0) { - weights.assign(weights.size(), - TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); + TDouble10VecWeightsAry1Vec weights(samples_.size(), + maths_t::countWeight(ws / n, N)); + modes.back().s_Prior->addSamples(samples_, weights); + if (wl > ws) { + weights.assign(weights.size(), maths_t::countWeight((wl - ws) / n, N)); + modes.back().s_Prior->addSamples(samples_, weights); LOG_TRACE(<< modes.back().s_Prior->print()); } } @@ -993,24 +953,22 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - double nr = pRight * numberSamples; - double ns = std::min(nr, static_cast(N + 2)); - double s = static_cast(samples.size()); - LOG_TRACE(<< "# right = " << nr); + double wr = pRight * numberSamples; + double ws = std::min(wr, static_cast(N + 2)); + double n = static_cast(samples.size()); + LOG_TRACE(<< "# right = " << wr); TDouble10Vec1Vec samples_; samples_.reserve(samples.size()); for (const auto& sample : samples) { samples_.push_back(sample.template toVector()); } - TDouble10Vec seedWeight(N, ns / s); - TDouble10Vec4Vec1Vec weights(samples_.size(), TDouble10Vec4Vec(1, seedWeight)); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); - double weight = (nr - ns) / s; - if (weight > 0.0) { - weights.assign(weights.size(), - TDouble10Vec4Vec(1, TDouble10Vec(N, weight))); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples_, weights); + TDouble10VecWeightsAry1Vec weights(samples_.size(), + maths_t::countWeight(ws / n, N)); + modes.back().s_Prior->addSamples(samples_, weights); + if (wr > ws) { + weights.assign(weights.size(), maths_t::countWeight((wr - ws) / n, N)); + modes.back().s_Prior->addSamples(samples_, weights); LOG_TRACE(<< modes.back().s_Prior->print()); } } @@ -1106,6 +1064,7 @@ class CMultivariateMultimodalPrior : public CMultivariatePrior { //! Get the convariance matrix for the marginal likelihood. TMatrix covarianceMatrix() const { + // By linearity we have that: // Integral{ (x - m)' * (x - m) * Sum_i{ w(i) * f(x | i) } } // = Sum_i{ w(i) * (Integral{ x' * x * f(x | i) } - m' * m) } diff --git a/include/maths/CMultivariateNormalConjugate.h b/include/maths/CMultivariateNormalConjugate.h index 7396463c40..d90c28b695 100644 --- a/include/maths/CMultivariateNormalConjugate.h +++ b/include/maths/CMultivariateNormalConjugate.h @@ -187,21 +187,16 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { } //! No-op. - virtual void adjustOffset(const TWeightStyleVec& /*weightStyles*/, - const TDouble10Vec1Vec& /*samples*/, - const TDouble10Vec4Vec1Vec& /*weights*/) {} + virtual void adjustOffset(const TDouble10Vec1Vec& /*samples*/, + const TDouble10VecWeightsAry1Vec& /*weights*/) {} //! Update the prior with a collection of independent samples from the //! process. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) { + virtual void addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) { if (samples.empty()) { return; } @@ -209,7 +204,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { return; } - this->CMultivariatePrior::addSamples(weightStyles, samples, weights); + this->CMultivariatePrior::addSamples(samples, weights); // Note that if either count weight or Winsorisation weights are supplied // the weight of the sample x(i) is interpreted as its count, so for example @@ -242,19 +237,13 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { TPoint numberSamples(0.0); TCovariance covariancePost; - try { - for (std::size_t i = 0u; i < samples.size(); ++i) { - TPoint x(samples[i]); - TPoint n(maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint varianceScale = - TPoint(maths_t::seasonalVarianceScale(N, weightStyles, weights[i])) * - TPoint(maths_t::countVarianceScale(N, weightStyles, weights[i])); - numberSamples += n; - covariancePost.add(x, n / varianceScale); - } - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to update likelihood: " << e.what()); - return; + for (std::size_t i = 0u; i < samples.size(); ++i) { + TPoint x(samples[i]); + TPoint n(maths_t::countForUpdate(weights[i])); + TPoint varianceScale = TPoint(maths_t::seasonalVarianceScale(weights[i])) * + TPoint(maths_t::countVarianceScale(weights[i])); + numberSamples += n; + covariancePost.add(x, n / varianceScale); } TPoint scaledNumberSamples = covariancePost.s_Count; @@ -314,7 +303,6 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { LOG_ERROR(<< "Bad propagation time " << time); return; } - if (this->isNonInformative()) { // Nothing to be done. return; @@ -367,6 +355,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! is univariate. virtual TUnivariatePriorPtrDoublePr univariate(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { + if (!this->check(marginalize, condition)) { return TUnivariatePriorPtrDoublePr(); } @@ -455,7 +444,6 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { if (N == 2) { return TPriorPtrDoublePr(std::shared_ptr(this->clone()), 0.0); } - if (!this->check(marginalize, condition)) { return TPriorPtrDoublePr(); } @@ -548,8 +536,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { } //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble10Vec4Vec& /*weights*/) const { + virtual TDouble10Vec marginalLikelihoodMode(const TDouble10VecWeightsAry& /*weights*/) const { return this->marginalLikelihoodMean(); } @@ -566,16 +553,12 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! Calculate the log marginal likelihood function, integrating over the //! prior density function. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -608,8 +591,8 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { if (this->isInteger()) { double logLikelihood; - status = this->jointLogMarginalLikelihood( - weightStyles, samples, TPoint(0.5), weights, logLikelihood); + status = this->jointLogMarginalLikelihood(samples, TPoint(0.5), + weights, logLikelihood); if (status != maths_t::E_FpNoErrors) { return status; } @@ -622,7 +605,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { CSampling::uniformSample(0.0, 1.0, 3 * N, z); for (std::size_t i = 0u; i < z.size(); i += N) { status = this->jointLogMarginalLikelihood( - weightStyles, samples, TPoint(&z[i], &z[i + N]), weights, logLikelihood); + samples, TPoint(&z[i], &z[i + N]), weights, logLikelihood); if (status & maths_t::E_FpFailed) { return maths_t::E_FpFailed; } @@ -640,8 +623,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { result = maxLogLikelihood + std::log(sum / n); } else { - status = this->jointLogMarginalLikelihood(weightStyles, samples, - TPoint(0.0), weights, result); + status = this->jointLogMarginalLikelihood(samples, TPoint(0.0), weights, result); } if (status & maths_t::E_FpFailed) { @@ -954,6 +936,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! Get the covariance matrix for the marginal likelihood. TMatrix covarianceMatrix() const { + // This can be found by change of variables from the prior on the // precision matrix. In particular, if X ~ W_d(V, n) and Y = X^(-1), // then Y ~ W_d^(-1)(V^(-1), n), i.e. the inverse Wishart with the @@ -977,7 +960,6 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { if (this->isNonInformative()) { return TMatrix(0.0); } - TMatrix result(m_WishartScaleMatrix / m_WishartDegreesFreedom); return TMatrix(fromDenseMatrix(toDenseMatrix(result).inverse())); } @@ -986,6 +968,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { bool equalTolerance(const CMultivariateNormalConjugate& rhs, unsigned int toleranceType, double epsilon) const { + LOG_DEBUG(<< m_GaussianMean << " " << rhs.m_GaussianMean); LOG_DEBUG(<< m_GaussianPrecision << " " << rhs.m_GaussianPrecision); LOG_DEBUG(<< m_WishartDegreesFreedom << " " << rhs.m_WishartDegreesFreedom); @@ -1044,18 +1027,16 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { //! Compute the marginal likelihood for \p samples at the offset //! \p offset. maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, const TPoint& offset, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, double& result) const { + // As usual, one can find the marginal likelihood by noting that // it is proportional to the ratio of the normalization factors // of the conjugate distribution before and after update with the // samples. - double d = static_cast(N); - double numberSamples = 0.0; TCovariance covariancePost; double logCountVarianceScales = 0.0; @@ -1063,11 +1044,10 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { TPoint m(this->marginalLikelihoodMean()); for (std::size_t i = 0u; i < samples.size(); ++i) { TPoint x(samples[i]); - TPoint n(maths_t::countForUpdate(N, weightStyles, weights[i])); - TPoint seasonalScale = sqrt(TPoint( - maths_t::seasonalVarianceScale(N, weightStyles, weights[i]))); - TPoint countVarianceScale( - maths_t::countVarianceScale(N, weightStyles, weights[i])); + TPoint n(maths_t::countForUpdate(weights[i])); + TPoint seasonalScale = + sqrt(TPoint(maths_t::seasonalVarianceScale(weights[i]))); + TPoint countVarianceScale(maths_t::countVarianceScale(weights[i])); x = m + (x + offset - m) / seasonalScale; numberSamples += this->smallest(n.template toVector()); covariancePost.add(x, n / countVarianceScale); @@ -1129,6 +1109,7 @@ class CMultivariateNormalConjugate : public CMultivariatePrior { LOG_TRACE(<< "logGammaPostMinusPrior = " << logGammaPostMinusPrior); LOG_TRACE(<< "logCountVarianceScales = " << logCountVarianceScales); + double d = static_cast(N); result = 0.5 * (wishartDegreesFreedomPrior * logDeterminantPrior - wishartDegreesFreedomPost * logDeterminantPost - d * (logGaussianPrecisionPost - logGaussianPrecisionPrior) + diff --git a/include/maths/CMultivariateOneOfNPrior.h b/include/maths/CMultivariateOneOfNPrior.h index 25e4fad103..bbb6e64cdb 100644 --- a/include/maths/CMultivariateOneOfNPrior.h +++ b/include/maths/CMultivariateOneOfNPrior.h @@ -158,21 +158,16 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0); //! Forward the offset to the model priors. - virtual void adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights); + virtual void adjustOffset(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights); //! Update the model weights using the marginal likelihoods for //! the data. The component prior parameters are then updated. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights); + virtual void addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -231,24 +226,19 @@ class MATHS_EXPORT CMultivariateOneOfNPrior : public CMultivariatePrior { virtual TDouble10Vec marginalLikelihoodVariances() const; //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const; + virtual TDouble10Vec marginalLikelihoodMode(const TDouble10VecWeightsAry& weights) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the distribution parameters. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. diff --git a/include/maths/CMultivariatePrior.h b/include/maths/CMultivariatePrior.h index d94cca82af..ba68929017 100644 --- a/include/maths/CMultivariatePrior.h +++ b/include/maths/CMultivariatePrior.h @@ -47,19 +47,19 @@ class MATHS_EXPORT CMultivariatePrior { using TDouble10Vec = core::CSmallVector; using TDouble10Vec1Vec = core::CSmallVector; using TDouble10Vec2Vec = core::CSmallVector; - using TDouble10Vec4Vec = core::CSmallVector; using TDouble10Vec10Vec = core::CSmallVector; - using TDouble10Vec4Vec1Vec = core::CSmallVector; using TDouble10VecDouble10VecPr = std::pair; using TSize10Vec = core::CSmallVector; using TSizeDoublePr = std::pair; using TSizeDoublePr10Vec = core::CSmallVector; - using TWeightStyleVec = maths_t::TWeightStyleVec; using TTail10Vec = core::CSmallVector; + using TDouble10VecWeightsAry = maths_t::TDouble10VecWeightsAry; + using TDouble10VecWeightsAry1Vec = maths_t::TDouble10VecWeightsAry1Vec; using TUnivariatePriorPtr = std::shared_ptr; using TUnivariatePriorPtrDoublePr = std::pair; using TPriorPtr = std::shared_ptr; using TPriorPtrDoublePr = std::pair; + using TWeights = maths_t::CUnitWeights; public: //! The value of the decay rate to fall back to using if the input @@ -137,26 +137,18 @@ class MATHS_EXPORT CMultivariatePrior { //! For priors with non-negative support this adjusts the offset used //! to extend the support to handle negative samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. - virtual void adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) = 0; + virtual void adjustOffset(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) = 0; //! Update the prior with a collection of independent samples from the //! process. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) = 0; + virtual void addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) = 0; //! Update the prior for the specified elapsed time. virtual void propagateForwardsByTime(double time) = 0; @@ -202,12 +194,11 @@ class MATHS_EXPORT CMultivariatePrior { virtual TDouble10Vec nearestMarginalLikelihoodMean(const TDouble10Vec& value) const; //! Get the mode of the marginal likelihood function. - virtual TDouble10Vec marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const = 0; + virtual TDouble10Vec + marginalLikelihoodMode(const TDouble10VecWeightsAry& weights) const = 0; //! Get the local maxima of the marginal likelihood function. - virtual TDouble10Vec1Vec marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const; + virtual TDouble10Vec1Vec marginalLikelihoodModes(const TDouble10VecWeightsAry& weights) const; //! Get the covariance matrix for the marginal likelihood. virtual TDouble10Vec10Vec marginalLikelihoodCovariance() const = 0; @@ -218,16 +209,12 @@ class MATHS_EXPORT CMultivariatePrior { //! Calculate the log marginal likelihood function, integrating over the //! prior density function. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const = 0; //! Sample the marginal likelihood function. @@ -258,9 +245,6 @@ class MATHS_EXPORT CMultivariatePrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[in] coordinates The coordinates for which to compute probabilities. @@ -275,9 +259,8 @@ class MATHS_EXPORT CMultivariatePrior { //! a value of zero is not well defined and a value of infinity is not well //! handled. (Very large values are handled though.) bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, const TSize10Vec& coordinates, TDouble10Vec2Vec& lowerBounds, TDouble10Vec2Vec& upperBounds, @@ -289,9 +272,6 @@ class MATHS_EXPORT CMultivariatePrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the process. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with a lower bound for the probability @@ -307,9 +287,8 @@ class MATHS_EXPORT CMultivariatePrior { //! a value of zero is not well defined and a value of infinity is not well //! handled. (Very large values are handled though.) bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, double& lowerBound, double& upperBound, TTail10Vec& tail) const; @@ -400,7 +379,7 @@ class MATHS_EXPORT CMultivariatePrior { void addSamples(double n); //! Check that the samples and weights are consistent. - bool check(const TDouble10Vec1Vec& samples, const TDouble10Vec4Vec1Vec& weights) const; + bool check(const TDouble10Vec1Vec& samples, const TDouble10VecWeightsAry1Vec& weights) const; //! Check that the variables to marginalize out and condition on //! are consistent. diff --git a/include/maths/CNormalMeanPrecConjugate.h b/include/maths/CNormalMeanPrecConjugate.h index dd2d3f3c5a..0b461629ff 100644 --- a/include/maths/CNormalMeanPrecConjugate.h +++ b/include/maths/CNormalMeanPrecConjugate.h @@ -130,9 +130,8 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { virtual bool needsOffset() const; //! No-op. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Returns zero. virtual double offset() const; @@ -140,14 +139,9 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! Update the prior with a collection of independent samples from //! the normal variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -166,14 +160,11 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -185,29 +176,23 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the normal mean and precision. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -222,9 +207,6 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. For //! the count variance scale weight style the weight is interpreted as @@ -245,9 +227,8 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! \f$(0,\infty)\f$, i.e. a value of zero is not well defined and //! a value of infinity is not well handled. (Very large values are //! handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -257,9 +238,8 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -268,9 +248,6 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights. See minusLogJointCdf for discussion. //! \param[out] lowerBound Filled in with the probability of the set @@ -284,9 +261,8 @@ class MATHS_EXPORT CNormalMeanPrecConjugate : public CPrior { //! i.e. a value of zero is not well defined and a value of infinity //! is not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/COneOfNPrior.h b/include/maths/COneOfNPrior.h index a94eca659b..6fc63e2953 100644 --- a/include/maths/COneOfNPrior.h +++ b/include/maths/COneOfNPrior.h @@ -142,9 +142,8 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! Forward the offset to the model priors. //! //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Get the maximum model offset. virtual double offset() const; @@ -152,14 +151,9 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! Update the model weights using the marginal likelihoods for //! the data. The component prior parameters are then updated. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -181,14 +175,11 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { virtual double nearestMarginalLikelihoodMean(double value) const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -200,29 +191,23 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range (0.0, 100.0]. - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the distribution parameters. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -240,9 +225,8 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { private: //! The common c.d.f. implementation. bool minusLogJointCdfImpl(bool complement, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -250,9 +234,6 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with a lower bound to acceptable @@ -265,9 +246,8 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! \warning The variance scales must be in the range \f$(0,\infty)\f$, //! i.e. a value of zero is not well defined and a value of infinity is //! not well handled. (Very large values are handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -277,9 +257,8 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -288,9 +267,6 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with the probability of the set @@ -304,9 +280,8 @@ class MATHS_EXPORT COneOfNPrior : public CPrior { //! i.e. a value of zero is not well defined and a value of infinity is //! not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CPoissonMeanConjugate.h b/include/maths/CPoissonMeanConjugate.h index 40c46eb343..ae292e7db9 100644 --- a/include/maths/CPoissonMeanConjugate.h +++ b/include/maths/CPoissonMeanConjugate.h @@ -113,15 +113,11 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! This samples the current marginal likelihood and uses these samples //! to reconstruct the prior with adjusted offset. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights); //! Get the current offset. virtual double offset() const; @@ -129,14 +125,9 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! Update the prior with a collection of independent samples from the //! Poisson variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + virtual void addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Propagate the prior density function forwards by \p time. //! @@ -154,14 +145,11 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { virtual double marginalLikelihoodMean() const; //! Get the mode of the marginal likelihood function. - virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual double marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -173,29 +161,23 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). - virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( - double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + virtual TDoubleDoublePr + marginalLikelihoodConfidenceInterval(double percentage, + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Compute the log marginal likelihood function at \p samples integrating //! over the prior density function for the Poisson mean. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. //! \note The samples are assumed to be independent and identically //! distributed. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const; //! Sample the marginal likelihood function. @@ -210,18 +192,14 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! Compute minus the log of the joint c.d.f. of the marginal likelihood //! at \p samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with \f$-\log(\prod_i{F(x_i)})\f$ //! where \f$F(.)\f$ is the c.d.f. and \f$\{x_i\}\f$ are the samples. //! \param[out] upperBound Equal to \p lowerBound. //! \note The samples are assumed to be independent. - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -231,9 +209,8 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const; @@ -242,9 +219,6 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples of interest. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with the probability of the set @@ -255,9 +229,8 @@ class MATHS_EXPORT CPoissonMeanConjugate : public CPrior { //! are in or neither. //! \note The samples are assumed to be independent. virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const; diff --git a/include/maths/CPrior.h b/include/maths/CPrior.h index 35ff1591d4..2f814c1ba3 100644 --- a/include/maths/CPrior.h +++ b/include/maths/CPrior.h @@ -21,7 +21,6 @@ #include #include -#include #include #include @@ -51,11 +50,10 @@ class MATHS_EXPORT CPrior { using TDoubleVecVec = std::vector; using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; - using TWeightStyleVec = maths_t::TWeightStyleVec; using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TWeights = CConstantWeights; + using TDoubleWeightsAry = maths_t::TDoubleWeightsAry; + using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; + using TWeights = maths_t::CUnitWeights; //! \brief Data for plotting a series struct MATHS_EXPORT SPlot { @@ -102,16 +100,14 @@ class MATHS_EXPORT CPrior { public: CLogMarginalLikelihood(const CPrior& prior, - const TWeightStyleVec& weightStyles = CConstantWeights::COUNT, - const TDouble4Vec1Vec& weights = CConstantWeights::SINGLE_UNIT); + const TDoubleWeightsAry1Vec& weights = TWeights::SINGLE_UNIT); double operator()(double x) const; bool operator()(double x, double& result) const; private: const CPrior* m_Prior; - const TWeightStyleVec* m_WeightStyles; - const TDouble4Vec1Vec* m_Weights; + const TDoubleWeightsAry1Vec* m_Weights; //! Avoids creating the vector argument to jointLogMarginalLikelihood //! more than once. mutable TDouble1Vec m_X; @@ -187,15 +183,11 @@ class MATHS_EXPORT CPrior { //! For priors with non-negative support this adjusts the offset used //! to extend the support to handle negative samples. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples The samples from which to determine the offset. //! \param[in] weights The weights of each sample in \p samples. //! \return The penalty to apply in model selection. - virtual double adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) = 0; + virtual double adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) = 0; //! Get the current sample offset. virtual double offset() const = 0; @@ -203,14 +195,10 @@ class MATHS_EXPORT CPrior { //! Update the prior with a collection of independent samples from the //! variable. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. - virtual void addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) = 0; + virtual void addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) = 0; //! Update the prior for the specified elapsed time. virtual void propagateForwardsByTime(double time) = 0; @@ -227,13 +215,11 @@ class MATHS_EXPORT CPrior { //! Get the mode of the marginal likelihood function. virtual double - marginalLikelihoodMode(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const = 0; + marginalLikelihoodMode(const TDoubleWeightsAry& weights = TWeights::UNIT) const = 0; //! Get the local maxima of the marginal likelihood function. virtual TDouble1Vec - marginalLikelihoodModes(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + marginalLikelihoodModes(const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the \p percentage symmetric confidence interval for the marginal //! likelihood function, i.e. the values \f$a\f$ and \f$b\f$ such that: @@ -245,32 +231,25 @@ class MATHS_EXPORT CPrior { //! the percentage of interest \p percentage. //! //! \param[in] percentage The percentage of interest. - //! \param[in] weightStyles Optional variance scale weight styles. //! \param[in] weights Optional variance scale weights. //! \note \p percentage should be in the range [0.0, 100.0). virtual TDoubleDoublePr marginalLikelihoodConfidenceInterval( double percentage, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const = 0; + const TDoubleWeightsAry& weights = TWeights::UNIT) const = 0; //! Get the variance of the marginal likelihood. virtual double - marginalLikelihoodVariance(const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const = 0; + marginalLikelihoodVariance(const TDoubleWeightsAry& weights = TWeights::UNIT) const = 0; //! Calculate the log marginal likelihood function integrating over the //! prior density function. //! - //! \param[in] weightStyles Controls the interpretation of the weight(s) - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] result Filled in with the joint likelihood of \p samples. virtual maths_t::EFloatingPointErrorStatus - jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const = 0; //! Sample the marginal likelihood function. @@ -296,9 +275,6 @@ class MATHS_EXPORT CPrior { //! Calculate minus the log of the joint c.d.f. of the marginal likelihood //! for a collection of independent samples from the variable. //! - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with a lower bound for @@ -314,9 +290,8 @@ class MATHS_EXPORT CPrior { //! \warning The variance scales must be in the range \f$(0,\infty)\f$, //! i.e. a value of zero is not well defined and a value of infinity //! is not well handled. (Very large values are handled though.) - virtual bool minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const = 0; @@ -326,9 +301,8 @@ class MATHS_EXPORT CPrior { //! can return is the minimum double rather than epsilon. //! //! \see minusLogJointCdf for more details. - virtual bool minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + virtual bool minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const = 0; @@ -338,9 +312,6 @@ class MATHS_EXPORT CPrior { //! //! \param[in] calculation The style of the probability calculation //! (see model_t::EProbabilityCalculation for details). - //! \param[in] weightStyles Controls the interpretation of the weights - //! that are associated with each sample. See maths_t::ESampleWeightStyle - //! for more details. //! \param[in] samples A collection of samples of the variable. //! \param[in] weights The weights of each sample in \p samples. //! \param[out] lowerBound Filled in with a lower bound for the probability @@ -359,9 +330,8 @@ class MATHS_EXPORT CPrior { //! i.e. a value of zero is not well defined and a value of infinity //! is not well handled. (Very large values are handled though.) virtual bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const = 0; @@ -449,8 +419,7 @@ class MATHS_EXPORT CPrior { bool expectation(const F& f, const std::size_t numberIntervals, T& result, - const TWeightStyleVec& weightStyles = TWeights::COUNT_VARIANCE, - const TDouble4Vec& weights = TWeights::UNIT) const; + const TDoubleWeightsAry& weights = TWeights::UNIT) const; //! Get the number of samples received to date. double numberSamples() const; @@ -474,7 +443,7 @@ class MATHS_EXPORT CPrior { //! Get a set of sample for the prior to use in adjust offset. void adjustOffsetResamples(double minimumSample, TDouble1Vec& resamples, - TDouble4Vec1Vec& resamplesWeights) const; + TDoubleWeightsAry1Vec& resamplesWeights) const; protected: //! \brief Defines a set of operations to adjust the offset parameter @@ -485,28 +454,24 @@ class MATHS_EXPORT CPrior { virtual ~COffsetParameters() = default; //! Add a collection of samples. - void samples(const maths_t::TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights); + void samples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights); //! Capture a collection of re-samples from the prior. virtual void resample(double minimumSample); protected: CPrior& prior() const; - const maths_t::TWeightStyleVec& weightStyles() const; const TDouble1Vec& samples() const; - const TDouble4Vec1Vec& weights() const; + const TDoubleWeightsAry1Vec& weights() const; const TDouble1Vec& resamples() const; - const TDouble4Vec1Vec& resamplesWeights() const; + const TDoubleWeightsAry1Vec& resamplesWeights() const; private: CPrior* m_Prior; - const maths_t::TWeightStyleVec* m_WeightStyles; const TDouble1Vec* m_Samples; - const TDouble4Vec1Vec* m_Weights; + const TDoubleWeightsAry1Vec* m_Weights; TDouble1Vec m_Resamples; - TDouble4Vec1Vec m_ResamplesWeights; + TDoubleWeightsAry1Vec m_ResamplesWeights; }; //! \brief Computes the likelihood of a collection of samples and @@ -548,9 +513,8 @@ class MATHS_EXPORT CPrior { //! specified reward. //! //! \return The penalty to apply to the model in selection. - double adjustOffsetWithCost(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + double adjustOffsetWithCost(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, COffsetCost& cost, CApplyOffset& apply); diff --git a/include/maths/CPriorDetail.h b/include/maths/CPriorDetail.h index 74fa0ff8d1..5c6486092f 100644 --- a/include/maths/CPriorDetail.h +++ b/include/maths/CPriorDetail.h @@ -39,8 +39,8 @@ template bool CPrior::expectation(const F& f, std::size_t numberIntervals, T& result, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weight) const { + const TDoubleWeightsAry& weight) const { + if (numberIntervals == 0) { LOG_ERROR(<< "Must specify non-zero number of intervals"); return false; @@ -49,14 +49,14 @@ bool CPrior::expectation(const F& f, result = T(); double n = static_cast(numberIntervals); - TDoubleDoublePr interval = this->marginalLikelihoodConfidenceInterval( - 100.0 - 1.0 / (100.0 * n), weightStyles, weight); + TDoubleDoublePr interval = + this->marginalLikelihoodConfidenceInterval(100.0 - 1.0 / (100.0 * n), weight); double x = interval.first; double dx = (interval.second - interval.first) / n; double normalizationFactor = 0.0; - TDouble4Vec1Vec weights(1, weight); - CPrior::CLogMarginalLikelihood logLikelihood(*this, weightStyles, weights); + TDoubleWeightsAry1Vec weights{weight}; + CPrior::CLogMarginalLikelihood logLikelihood(*this, weights); CCompositeFunctions::CExp likelihood(logLikelihood); for (std::size_t i = 0u; i < numberIntervals; ++i, x += dx) { T productIntegral; diff --git a/include/maths/CTimeSeriesDecomposition.h b/include/maths/CTimeSeriesDecomposition.h index 74dee3f646..b64178291b 100644 --- a/include/maths/CTimeSeriesDecomposition.h +++ b/include/maths/CTimeSeriesDecomposition.h @@ -107,9 +107,6 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt //! //! \param[in] time The time of the function point. //! \param[in] value The function value at \p time. - //! \param[in] weightStyles The styles of \p weights. Both the count - //! and the Winsorisation weight styles have an effect. See also - //! maths_t::ESampleWeightStyle for more details. //! \param[in] weights The weights of \p value. The smaller //! the count weight the less influence \p value has on the trend //! and it's local variance. @@ -117,8 +114,7 @@ class MATHS_EXPORT CTimeSeriesDecomposition : public CTimeSeriesDecompositionInt //! and false otherwise. virtual bool addPoint(core_t::TTime time, double value, - const maths_t::TWeightStyleVec& weightStyles = TWeights::COUNT, - const maths_t::TDouble4Vec& weights = TWeights::UNIT); + const maths_t::TDoubleWeightsAry& weights = TWeights::UNIT); //! Propagate the decomposition forwards to \p time. void propagateForwardsTo(core_t::TTime time); diff --git a/include/maths/CTimeSeriesDecompositionDetail.h b/include/maths/CTimeSeriesDecompositionDetail.h index 45eabfe085..dadd1d2bda 100644 --- a/include/maths/CTimeSeriesDecompositionDetail.h +++ b/include/maths/CTimeSeriesDecompositionDetail.h @@ -63,8 +63,7 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { SAddValue(core_t::TTime time, core_t::TTime lastTime, double value, - const maths_t::TWeightStyleVec& weightStyles, - const maths_t::TDouble4Vec& weights, + const maths_t::TDoubleWeightsAry& weights, double trend, double seasonal, double calendar, @@ -73,10 +72,8 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! The value to add. double s_Value; - //! The styles of the weights. - const maths_t::TWeightStyleVec& s_WeightStyles; //! The weights of associated with the value. - const maths_t::TDouble4Vec& s_Weights; + const maths_t::TDoubleWeightsAry& s_Weights; //! The trend component prediction at the value's time. double s_Trend; //! The seasonal component prediction at the value's time. diff --git a/include/maths/CTimeSeriesDecompositionInterface.h b/include/maths/CTimeSeriesDecompositionInterface.h index b66a465fb6..f76b4f34e2 100644 --- a/include/maths/CTimeSeriesDecompositionInterface.h +++ b/include/maths/CTimeSeriesDecompositionInterface.h @@ -20,7 +20,6 @@ #include #include -#include #include #include @@ -44,8 +43,7 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface { public: using TDouble3Vec = core::CSmallVector; using TDouble3VecVec = std::vector; - using TDoubleAry = boost::array; - using TWeights = CConstantWeights; + using TWeights = maths_t::CUnitWeights; //! The components of the decomposition. enum EComponents { @@ -79,9 +77,6 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface { //! //! \param[in] time The time of the function point. //! \param[in] value The function value at \p time. - //! \param[in] weightStyles The styles of \p weights. Both the - //! count and the Winsorisation weight styles have an effect. - //! See maths_t::ESampleWeightStyle for more details. //! \param[in] weights The weights of \p value. The smaller //! the product count weight the less influence \p value has //! on the trend and it's local variance. @@ -89,8 +84,7 @@ class MATHS_EXPORT CTimeSeriesDecompositionInterface { //! and false otherwise. virtual bool addPoint(core_t::TTime time, double value, - const maths_t::TWeightStyleVec& weightStyles = TWeights::COUNT, - const maths_t::TDouble4Vec& weights = TWeights::UNIT) = 0; + const maths_t::TDoubleWeightsAry& weights = TWeights::UNIT) = 0; //! Propagate the decomposition forwards to \p time. virtual void propagateForwardsTo(core_t::TTime time) = 0; diff --git a/include/maths/CTimeSeriesDecompositionStub.h b/include/maths/CTimeSeriesDecompositionStub.h index b0d4d3b8a9..824f706faf 100644 --- a/include/maths/CTimeSeriesDecompositionStub.h +++ b/include/maths/CTimeSeriesDecompositionStub.h @@ -46,8 +46,7 @@ class MATHS_EXPORT CTimeSeriesDecompositionStub : public CTimeSeriesDecompositio //! No-op returning false. virtual bool addPoint(core_t::TTime time, double value, - const maths_t::TWeightStyleVec& weightStyles = TWeights::COUNT, - const maths_t::TDouble4Vec& weights = TWeights::UNIT); + const maths_t::TDoubleWeightsAry& weights = TWeights::UNIT); //! No-op. virtual void propagateForwardsTo(core_t::TTime time); diff --git a/include/maths/CTimeSeriesModel.h b/include/maths/CTimeSeriesModel.h index 2751b9d097..fcb584ca76 100644 --- a/include/maths/CTimeSeriesModel.h +++ b/include/maths/CTimeSeriesModel.h @@ -95,19 +95,15 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { virtual void skipTime(core_t::TTime gap); //! Get the most likely value for the time series at \p time. - virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec mode(core_t::TTime time, const TDouble2VecWeightsAry& weights) const; //! Get the most likely value for each correlate time series at //! \p time, if there are any. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec1Vec& weights) const; + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const TDouble2VecWeightsAry1Vec& weights) const; //! Get the local maxima of the residual distribution. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec1Vec residualModes(const TDouble2VecWeightsAry& weights) const; //! Remove any trend components from \p value. virtual void detrend(const TTime2Vec1Vec& time, @@ -123,8 +119,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { //! confidence interval for the time series at \p time. virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + const TDouble2VecWeightsAry& weights) const; //! Forecast the time series and get its \p confidenceInterval //! percentage confidence interval between \p startTime and @@ -187,7 +182,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { private: using TDouble1Vec = core::CSmallVector; using TDouble1VecVec = std::vector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAryVec = std::vector; using TVector = CVectorNx1; using TVectorMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TDecayRateController2AryPtr = std::shared_ptr; @@ -203,9 +198,8 @@ class MATHS_EXPORT CUnivariateTimeSeriesModel : public CModel { CUnivariateTimeSeriesModel(const CUnivariateTimeSeriesModel& other, std::size_t id); //! Update the trend with \p samples. - EUpdateResult updateTrend(const maths_t::TWeightStyleVec& trendStyles, - const TTimeDouble2VecSizeTrVec& samples, - const TDouble2Vec4VecVec& trendWeights); + EUpdateResult updateTrend(const TTimeDouble2VecSizeTrVec& samples, + const TDouble2VecWeightsAryVec& trendWeights); //! Compute the prediction errors for \p sample. void appendPredictionErrors(double interval, double sample, TDouble1VecVec (&result)[2]); @@ -296,8 +290,7 @@ class MATHS_EXPORT CTimeSeriesCorrelations { using TTime1Vec = core::CSmallVector; using TDouble1Vec = core::CSmallVector; using TDouble2Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; + using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TSize1Vec = core::CSmallVector; using TSizeSize1VecUMap = boost::unordered_map; using TSize2Vec = core::CSmallVector; @@ -322,7 +315,7 @@ class MATHS_EXPORT CTimeSeriesCorrelations { //! The tags for each sample. TSize1Vec s_Tags; //! The sample weights. - TDouble4Vec1Vec s_Weights; + TDoubleWeightsAry1Vec s_Weights; //! The interval by which to age the prior. double s_Interval; //! The prior decay rate multiplier. @@ -345,7 +338,7 @@ class MATHS_EXPORT CTimeSeriesCorrelations { //! //! \note This should be called exactly once after every univariate //! time series model has added its samples. - void processSamples(const maths_t::TWeightStyleVec& weightStyles); + void processSamples(); //! Refresh the models to account for any changes to the correlation //! estimates. @@ -408,7 +401,7 @@ class MATHS_EXPORT CTimeSeriesCorrelations { void addSamples(std::size_t id, maths_t::EDataType type, const TTimeDouble2VecSizeTrVec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double interval, double multiplier); @@ -510,18 +503,14 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { virtual void skipTime(core_t::TTime gap); //! Get the most likely value for the time series at \p time. - virtual TDouble2Vec mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec mode(core_t::TTime time, const TDouble2VecWeightsAry& weights) const; //! Returns empty. - virtual TDouble2Vec1Vec correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec1Vec& weights) const; + virtual TDouble2Vec1Vec + correlateModes(core_t::TTime time, const TDouble2VecWeightsAry1Vec& weights) const; //! Get the local maxima of the residual distribution. - virtual TDouble2Vec1Vec residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + virtual TDouble2Vec1Vec residualModes(const TDouble2VecWeightsAry& weights) const; //! Remove any trend components from \p value. virtual void detrend(const TTime2Vec1Vec& time, @@ -537,8 +526,7 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { //! confidence interval for the time series at \p time. virtual TDouble2Vec3Vec confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights) const; + const TDouble2VecWeightsAry& weights) const; //! Not currently supported. virtual bool forecast(core_t::TTime startTime, @@ -599,7 +587,7 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { private: using TDouble1Vec = core::CSmallVector; using TDouble1VecVec = std::vector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAryVec = std::vector; using TVector = CVectorNx1; using TVectorMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TDecayRateController2AryPtr = std::shared_ptr; @@ -608,9 +596,8 @@ class MATHS_EXPORT CMultivariateTimeSeriesModel : public CModel { private: //! Update the trend with \p samples. - EUpdateResult updateTrend(const maths_t::TWeightStyleVec& trendStyles, - const TTimeDouble2VecSizeTrVec& samples, - const TDouble2Vec4VecVec& trendWeights); + EUpdateResult updateTrend(const TTimeDouble2VecSizeTrVec& samples, + const TDouble2VecWeightsAryVec& trendWeights); //! Compute the prediction errors for \p sample. void appendPredictionErrors(double interval, diff --git a/include/maths/Constants.h b/include/maths/Constants.h index 2c968a4cf2..88a88218ab 100644 --- a/include/maths/Constants.h +++ b/include/maths/Constants.h @@ -94,40 +94,6 @@ const double MAXIMUM_ACCURATE_VARIANCE_SCALE{2.0}; //! can be in significant error). const double DEFAULT_SEASONAL_CONFIDENCE_INTERVAL{50.0}; -//! \brief A collection of weight styles and weights. -class MATHS_EXPORT CConstantWeights { -public: - using TDouble2Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; - using TDouble2Vec4Vec1Vec = core::CSmallVector; - -public: - //! A single count weight style. - static const maths_t::TWeightStyleVec COUNT; - //! A single count variance weight style. - static const maths_t::TWeightStyleVec COUNT_VARIANCE; - //! A single seasonal variance weight style. - static const maths_t::TWeightStyleVec SEASONAL_VARIANCE; - //! A unit weight. - static const TDouble4Vec UNIT; - //! A single unit weight. - static const TDouble4Vec1Vec SINGLE_UNIT; - //! Get a unit weight for data with \p dimension. - template - static core::CSmallVector unit(std::size_t dimension) { - return TDouble2Vec4Vec{VECTOR(dimension, 1.0)}; - } - //! Get a single unit weight for data with \p dimension. - template - static core::CSmallVector, 1> - singleUnit(std::size_t dimension) { - return core::CSmallVector, 1>{ - core::CSmallVector{VECTOR(dimension, 1.0)}}; - } -}; - //! The minimum fractional count of points in a cluster. const double MINIMUM_CLUSTER_SPLIT_FRACTION{0.0}; diff --git a/include/maths/MathsTypes.h b/include/maths/MathsTypes.h index 7b75a7cb59..867e44e15d 100644 --- a/include/maths/MathsTypes.h +++ b/include/maths/MathsTypes.h @@ -21,6 +21,9 @@ #include +#include + +#include #include #include @@ -33,11 +36,8 @@ class CSeasonalComponent; namespace maths_t { using TDoubleDoublePr = std::pair; -using TDouble4Vec = core::CSmallVector; +using TDouble2Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; -using TDouble10Vec4Vec = core::CSmallVector; -using TDouble10Vec4Vec1Vec = core::CSmallVector; using TSeasonalComponentVec = std::vector; using TCalendarComponentVec = std::vector; @@ -80,86 +80,307 @@ enum ESampleWeightStyle { //! IMPORTANT: this must be kept this up-to-date with ESampleWeightStyle. const std::size_t NUMBER_WEIGHT_STYLES = 4; -using TWeightStyleVec = core::CSmallVector; +template +using TWeightsAry = boost::array; +using TDoubleWeightsAry = TWeightsAry; +using TDoubleWeightsAry1Vec = core::CSmallVector; +using TDouble2VecWeightsAry = TWeightsAry; +using TDouble2VecWeightsAry1Vec = core::CSmallVector; +using TDouble10VecWeightsAry = TWeightsAry; +using TDouble10VecWeightsAry1Vec = core::CSmallVector; -//! Extract the effective sample count from a collection of weights. +namespace maths_types_detail { + +//! \brief Constructs a unit weight. +template +struct SUnitWeightFactory { + static std::size_t dimension(const VECTOR& weight) { return weight.size(); } + static VECTOR weight(std::size_t dimension) { + return VECTOR(dimension, 1.0); + } +}; +//! \brief Constructs a unit weight. +template<> +struct SUnitWeightFactory { + static std::size_t dimension(double) { return 1; } + static double weight(std::size_t) { return 1.0; } +}; + +//! \brief Add two weights. +template +struct SWeightArithmetic { + static void add(const VECTOR& lhs, VECTOR& rhs) { + for (std::size_t i = 0u; i < lhs.size(); ++i) { + rhs[i] += lhs[i]; + } + } + static void multiply(const VECTOR& lhs, VECTOR& rhs) { + for (std::size_t i = 0u; i < lhs.size(); ++i) { + rhs[i] *= lhs[i]; + } + } +}; +//! \brief Add two weights. +template<> +struct SWeightArithmetic { + static void add(double lhs, double& rhs) { rhs += lhs; } + static void multiply(double lhs, double& rhs) { rhs *= lhs; } +}; +} + +//! \brief A collection of weight styles and weights. +class MATHS_EXPORT CUnitWeights { +public: + //! A unit weight. + static const TDoubleWeightsAry UNIT; + //! A single unit weight. + static const TDoubleWeightsAry1Vec SINGLE_UNIT; + //! Get a conformable unit weight for \p weight. + template + static TWeightsAry unit(const VECTOR& weight) { + return unit(maths_types_detail::SUnitWeightFactory::dimension(weight)); + } + //! Get a unit weight for data with \p dimension. + template + static TWeightsAry unit(std::size_t dimension) { + TWeightsAry result; + result.fill(maths_types_detail::SUnitWeightFactory::weight(dimension)); + return result; + } + //! Get a single conformable unit weight for \p weight. + template + static core::CSmallVector, 1> singleUnit(const VECTOR& weight) { + return {unit(weight)}; + } + //! Get a single unit weight for data with \p dimension. + template + static core::CSmallVector, 1> singleUnit(std::size_t dimension) { + return {unit(dimension)}; + } +}; + +//! Get a weights array with count weight \p weight. +template +TWeightsAry countWeight(const VECTOR& weight) { + TWeightsAry result(CUnitWeights::unit(weight)); + result[E_SampleCountWeight] = weight; + return result; +} + +//! Get a weights array with count weight \p weight. MATHS_EXPORT -double count(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +TDoubleWeightsAry countWeight(double weight); -//! Extract the effective sample count from a collection of weights. +//! Get a weights array with count weight \p weight. +MATHS_EXPORT +TDouble10VecWeightsAry countWeight(double weight, std::size_t dimension); + +//! Set the count weight in \p weights to \p weight. +template +void setCount(const VECTOR& weight, TWeightsAry& weights) { + weights[E_SampleCountWeight] = weight; +} + +//! Set the count weight in \p weights to \p weight. MATHS_EXPORT -TDouble10Vec count(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +void setCount(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights); + +//! Add \p weight to the count weight of \p weights. +template +void addCount(const VECTOR& weight, TWeightsAry& weights) { + maths_types_detail::SWeightArithmetic::add(weight, weights[E_SampleCountWeight]); +} + +//! Extract the effective sample count from a collection of weights. +template +const VECTOR& count(const TWeightsAry& weights) { + return weights[E_SampleCountWeight]; +} //! Extract the effective sample count with which to update a model //! from a collection of weights. MATHS_EXPORT -double countForUpdate(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +double countForUpdate(const TDoubleWeightsAry& weights); //! Extract the effective sample count with which to update a model //! from a collection of weights. MATHS_EXPORT -TDouble10Vec countForUpdate(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +TDouble10Vec countForUpdate(const TDouble10VecWeightsAry& weights); -//! Extract the variance scale from a collection of weights. +//! Get a weights array with Winsorisation weight \p weight. +template +TWeightsAry winsorisationWeight(const VECTOR& weight) { + TWeightsAry result(CUnitWeights::unit(weight)); + result[E_SampleWinsorisationWeight] = weight; + return result; +} + +//! Get a weights array with Winsorisation weight \p weight. MATHS_EXPORT -double seasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +TDoubleWeightsAry winsorisationWeight(double weight); -//! Extract the variance scale from a collection of weights. +//! Get a weights array with Winsorisation weight \p weight. MATHS_EXPORT -TDouble10Vec seasonalVarianceScale(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +TDouble10VecWeightsAry winsorisationWeight(double weight, std::size_t dimension); -//! Extract the variance scale from a collection of weights. +//! Set the Winsorisation weight in \p weights to \p weight. +template +void setWinsorisationWeight(const VECTOR& weight, TWeightsAry& weights) { + weights[E_SampleWinsorisationWeight] = weight; +} + +//! Set the Winsorisation weight in \p weights to \p weight. MATHS_EXPORT -double countVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +void setWinsorisationWeight(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights); -//! Extract the variance scale from a collection of weights. +//! Extract the Winsorisation weight from a collection of weights. +template +const VECTOR& winsorisationWeight(const TWeightsAry& weights) { + return weights[E_SampleWinsorisationWeight]; +} + +//! Check if a non-unit Winsorisation weight applies. MATHS_EXPORT -TDouble10Vec countVarianceScale(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +bool isWinsorised(const TDoubleWeightsAry& weights); -//! Check if a non-unit seasonal variance scale applies. +//! Check if a non-unit Winsorisation weight applies. +MATHS_EXPORT +bool isWinsorised(const TDoubleWeightsAry1Vec& weights); + +//! Check if a non-unit Winsorisation weight applies. +template +bool isWinsorised(const TWeightsAry& weights) { + return std::any_of(weights[E_SampleWinsorisationWeight].begin(), + weights[E_SampleWinsorisationWeight].end(), + [](double weight) { return weight != 1.0; }); +} + +//! Check if a non-unit Winsorisation weight applies. +template +bool isWinsorised(const core::CSmallVector, 1>& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TWeightsAry& weight) { + return isWinsorised(weight); + }); +} + +//! Get a weights array with seasonal variance scale \p weight. +template +TWeightsAry seasonalVarianceScaleWeight(const VECTOR& weight) { + TWeightsAry result(CUnitWeights::unit(weight)); + result[E_SampleSeasonalVarianceScaleWeight] = weight; + return result; +} + +//! Get a weights vector with seasonal variance scale \p weight. +MATHS_EXPORT +TDoubleWeightsAry seasonalVarianceScaleWeight(double weight); + +//! Get a weights vector with seasonal variance scale \p weight. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +TDouble10VecWeightsAry seasonalVarianceScaleWeight(double weight, std::size_t dimension); + +//! Set the seasonal variance scale weight in \p weights to \p weight. +template +void setSeasonalVarianceScale(const VECTOR& weight, TWeightsAry& weights) { + weights[E_SampleSeasonalVarianceScaleWeight] = weight; +} + +//! Set the seasonal variance scale weight in \p weights to \p weight. +MATHS_EXPORT +void setSeasonalVarianceScale(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights); + +//! Extract the variance scale from a collection of weights. +template +const VECTOR& seasonalVarianceScale(const TWeightsAry& weights) { + return weights[E_SampleSeasonalVarianceScaleWeight]; +} //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble4Vec1Vec& weights); +bool hasSeasonalVarianceScale(const TDoubleWeightsAry& weights); //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +bool hasSeasonalVarianceScale(const TDoubleWeightsAry1Vec& weights); //! Check if a non-unit seasonal variance scale applies. +template +bool hasSeasonalVarianceScale(const TWeightsAry& weights) { + return std::any_of(weights[E_SampleSeasonalVarianceScaleWeight].begin(), + weights[E_SampleSeasonalVarianceScaleWeight].end(), + [](double weight) { return weight != 1.0; }); +} + +//! Check if a non-unit seasonal variance scale applies. +template +bool hasSeasonalVarianceScale(const core::CSmallVector, 1>& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TWeightsAry& weight) { + return hasSeasonalVarianceScale(weight); + }); +} + +//! Get a weights array with count variance scale \p weight. +template +TWeightsAry countVarianceScaleWeight(const VECTOR& weight) { + TWeightsAry result(CUnitWeights::unit(weight)); + result[E_SampleCountVarianceScaleWeight] = weight; + return result; +} + +//! Get a weights vector with count variance scale \p weight. +MATHS_EXPORT +TDoubleWeightsAry countVarianceScaleWeight(double weight); + +//! Get a weights vector with count variance scale \p weight. +MATHS_EXPORT +TDouble10VecWeightsAry countVarianceScaleWeight(double weight, std::size_t dimension); + +//! Set the count variance scale weight in \p weights to \p weight. +template +void setCountVarianceScale(const VECTOR& weight, TWeightsAry& weights) { + weights[E_SampleCountVarianceScaleWeight] = weight; +} + +//! Set the count variance scale weight in \p weights to \p weight. MATHS_EXPORT -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec1Vec& weights); +void setCountVarianceScale(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights); + +//! Multiply the count variance scale of \p weights by \p weight. +template +void multiplyCountVarianceScale(const VECTOR& weight, TWeightsAry& weights) { + maths_types_detail::SWeightArithmetic::multiply( + weight, weights[E_SampleCountVarianceScaleWeight]); +} + +//! Extract the variance scale from a collection of weights. +template +const VECTOR& countVarianceScale(const TWeightsAry& weights) { + return weights[E_SampleCountVarianceScaleWeight]; +} //! Check if a non-unit count variance scale applies. MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights); +bool hasCountVarianceScale(const TDoubleWeightsAry& weights); //! Check if a non-unit seasonal variance scale applies. MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec1Vec& weights); +bool hasCountVarianceScale(const TDoubleWeightsAry1Vec& weights); //! Check if a non-unit seasonal variance scale applies. -MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights); +template +bool hasCountVarianceScale(const TWeightsAry& weights) { + return std::any_of(weights[E_SampleCountVarianceScaleWeight].begin(), + weights[E_SampleCountVarianceScaleWeight].end(), + [](double weight) { return weight != 1.0; }); +} //! Check if a non-unit seasonal variance scale applies. -MATHS_EXPORT -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec1Vec& weights); +template +bool hasCountVarianceScale(const core::CSmallVector, 1>& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TWeightsAry& weight) { + return hasCountVarianceScale(weight); + }); +} //! Enumerates the possible probability of less likely sample calculations. //! diff --git a/include/model/CAnomalyDetectorModel.h b/include/model/CAnomalyDetectorModel.h index 899fe1cedd..e345276456 100644 --- a/include/model/CAnomalyDetectorModel.h +++ b/include/model/CAnomalyDetectorModel.h @@ -137,12 +137,8 @@ class MODEL_EXPORT CAnomalyDetectorModel : private core::CNonCopyable { using TSizeVec = std::vector; using TDoubleVec = std::vector; using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; using TDouble10Vec1Vec = core::CSmallVector; - using TDouble10Vec4Vec = core::CSmallVector; - using TDouble10Vec4Vec1Vec = core::CSmallVector; using TDouble1VecDoublePr = std::pair; using TDouble1VecDouble1VecPr = std::pair; using TSizeDoublePr = std::pair; diff --git a/include/model/CBucketQueue.h b/include/model/CBucketQueue.h index 01c87d2735..0663c8e8c1 100644 --- a/include/model/CBucketQueue.h +++ b/include/model/CBucketQueue.h @@ -213,9 +213,8 @@ class CBucketQueue { } } else if (traverser.name() == BUCKET_TAG) { if (i >= m_Queue.size()) { - LOG_WARN(<< "Bucket queue is smaller on restore than on " - "persist: " - << i << " >= " << m_Queue.size() + LOG_WARN(<< "Bucket queue is smaller on restore than on persist: " << i + << " >= " << m_Queue.size() << ". Extra buckets will be ignored."); // Restore into a temporary T dummy; @@ -256,9 +255,8 @@ class CBucketQueue { } } else if (traverser.name() == BUCKET_TAG) { if (i >= m_Queue.size()) { - LOG_WARN(<< "Bucket queue is smaller on restore than on " - "persist: " - << i << " >= " << m_Queue.size() + LOG_WARN(<< "Bucket queue is smaller on restore than on persist: " << i + << " >= " << m_Queue.size() << ". Extra buckets will be ignored."); if (traverser.hasSubLevel()) { // Restore into a temporary diff --git a/include/model/CIndividualModel.h b/include/model/CIndividualModel.h index 147de6800e..708772cd5c 100644 --- a/include/model/CIndividualModel.h +++ b/include/model/CIndividualModel.h @@ -262,7 +262,7 @@ class MODEL_EXPORT CIndividualModel : public CAnomalyDetectorModel { maths::CModel* model(model_t::EFeature feature, std::size_t pid); //! Sample the correlate models. - void sampleCorrelateModels(const maths_t::TWeightStyleVec& weightStyles); + void sampleCorrelateModels(); //! Correct \p baseline with \p corrections for interim results. void correctBaselineForInterim(model_t::EFeature feature, diff --git a/lib/maths/CConstantPrior.cc b/lib/maths/CConstantPrior.cc index eb0e4f4dd8..865d4b0ffb 100644 --- a/lib/maths/CConstantPrior.cc +++ b/lib/maths/CConstantPrior.cc @@ -75,7 +75,6 @@ bool CConstantPrior::acceptRestoreTraverser(core::CStateRestoreTraverser& traver core::CStringUtils::stringToType(traverser.value(), constant), m_Constant.reset(constant)) } while (traverser.next()); - return true; } @@ -95,9 +94,8 @@ bool CConstantPrior::needsOffset() const { return false; } -double CConstantPrior::adjustOffset(const TWeightStyleVec& /*weightStyle*/, - const TDouble1Vec& /*samples*/, - const TDouble4Vec1Vec& /*weights*/) { +double CConstantPrior::adjustOffset(const TDouble1Vec& /*samples*/, + const TDoubleWeightsAry1Vec& /*weights*/) { return 0.0; } @@ -105,9 +103,8 @@ double CConstantPrior::offset() const { return 0.0; } -void CConstantPrior::addSamples(const TWeightStyleVec& /*weightStyle*/, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& /*weights*/) { +void CConstantPrior::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& /*weights*/) { if (m_Constant || samples.empty()) { return; } @@ -118,44 +115,39 @@ void CConstantPrior::propagateForwardsByTime(double /*time*/) { } CConstantPrior::TDoubleDoublePr CConstantPrior::marginalLikelihoodSupport() const { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } double CConstantPrior::marginalLikelihoodMean() const { if (this->isNonInformative()) { return 0.0; } - return *m_Constant; } -double CConstantPrior::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CConstantPrior::marginalLikelihoodMode(const TDoubleWeightsAry& /*weights*/) const { return this->marginalLikelihoodMean(); } CConstantPrior::TDoubleDoublePr CConstantPrior::marginalLikelihoodConfidenceInterval(double /*percentage*/, - const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { + const TDoubleWeightsAry& /*weights*/) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } - - return std::make_pair(*m_Constant, *m_Constant); + return {*m_Constant, *m_Constant}; } -double CConstantPrior::marginalLikelihoodVariance(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CConstantPrior::marginalLikelihoodVariance(const TDoubleWeightsAry& /*weights*/) const { return this->isNonInformative() ? boost::numeric::bounds::highest() : 0.0; } maths_t::EFloatingPointErrorStatus -CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CConstantPrior::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { + result = 0.0; if (samples.empty()) { @@ -192,7 +184,7 @@ CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, return maths_t::E_FpOverflowed; } - numberSamples += maths_t::countForUpdate(weightStyles, weights[i]); + numberSamples += maths_t::countForUpdate(weights[i]); } result = numberSamples * core::constants::LOG_MAX_DOUBLE; @@ -202,19 +194,17 @@ CConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, void CConstantPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { samples.clear(); - if (this->isNonInformative()) { return; } - samples.resize(numberSamples, *m_Constant); } -bool CConstantPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CConstantPrior::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + lowerBound = upperBound = 0.0; if (samples.empty()) { @@ -225,7 +215,7 @@ bool CConstantPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, double numberSamples = 0.0; try { for (std::size_t i = 0u; i < samples.size(); ++i) { - numberSamples += maths_t::count(weightStyles, weights[i]); + numberSamples += maths_t::count(weights[i]); } } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute c.d.f. " << e.what()); @@ -251,11 +241,11 @@ bool CConstantPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, return true; } -bool CConstantPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CConstantPrior::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + lowerBound = upperBound = 0.0; if (samples.empty()) { @@ -266,7 +256,7 @@ bool CConstantPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightSty double numberSamples = 0.0; try { for (std::size_t i = 0u; i < samples.size(); ++i) { - numberSamples += maths_t::count(weightStyles, weights[i]); + numberSamples += maths_t::count(weights[i]); } } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute c.d.f. " << e.what()); @@ -293,12 +283,12 @@ bool CConstantPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightSty } bool CConstantPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation /*calculation*/, - const TWeightStyleVec& /*weightStyles*/, const TDouble1Vec& samples, - const TDouble4Vec1Vec& /*weights*/, + const TDoubleWeightsAry1Vec& /*weights*/, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { + lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; diff --git a/lib/maths/CGammaRateConjugate.cc b/lib/maths/CGammaRateConjugate.cc index 92bae9b962..a73066cb64 100644 --- a/lib/maths/CGammaRateConjugate.cc +++ b/lib/maths/CGammaRateConjugate.cc @@ -55,10 +55,8 @@ namespace { namespace detail { using TDoubleDoublePr = std::pair; -using TWeightStyleVec = maths_t::TWeightStyleVec; using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; @@ -321,9 +319,6 @@ struct SPlusWeight { //! (integrating over the prior for the gamma rate) and aggregate the //! results using \p aggregate. //! -//! \param[in] weightStyles Controls the interpretation of the weight(s) -//! that are associated with each sample. See maths_t::ESampleWeightStyle -//! for more details. //! \param[in] samples The weighted samples. //! \param[in] func The function to evaluate. //! \param[in] aggregate The function to aggregate the results of \p func. @@ -338,9 +333,8 @@ struct SPlusWeight { //! of the likelihood for \p samples. //! \param[out] result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool evaluateFunctionOnJointDistribution(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, FUNC func, AGGREGATOR aggregate, bool isNonInformative, @@ -380,7 +374,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // everywhere. (It is acceptable to approximate all finite samples // as at the median of this distribution.) for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double x = samples[i] + offset; result = aggregate(result, func(CTools::SImproperDistribution(), x), n); } @@ -414,10 +408,9 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // gamma distributed and u is a constant offset. This means // that {x(i) + u} are gamma distributed. - double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); double x = samples[i] + offset; LOG_TRACE(<< "x = " << x); @@ -442,10 +435,9 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // gamma distributed and u is a constant offset. This means // that {x(i) + u} are gamma distributed. - double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); double x = samples[i] + offset; double scaledLikelihoodShape = likelihoodShape / varianceScale; double scaledPriorRate = varianceScale * priorRate; @@ -478,29 +470,26 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, template class CEvaluateOnSamples : core::CNonCopyable { public: - CEvaluateOnSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CEvaluateOnSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double offset, double likelihoodShape, double priorShape, double priorRate) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), - m_IsNonInformative(isNonInformative), m_Offset(offset), - m_LikelihoodShape(likelihoodShape), m_PriorShape(priorShape), - m_PriorRate(priorRate) {} + : m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), + m_Offset(offset), m_LikelihoodShape(likelihoodShape), + m_PriorShape(priorShape), m_PriorRate(priorRate) {} bool operator()(double x, double& result) const { return evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, + m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, m_Offset + x, m_LikelihoodShape, m_PriorShape, m_PriorRate, result); } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Offset; double m_LikelihoodShape; @@ -516,25 +505,24 @@ class CEvaluateOnSamples : core::CNonCopyable { class CProbabilityOfLessLikelySamples : core::CNonCopyable { public: CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double offset, double likelihoodShape, double priorShape, double priorRate) - : m_Calculation(calculation), m_WeightStyles(weightStyles), - m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), - m_Offset(offset), m_LikelihoodShape(likelihoodShape), - m_PriorShape(priorShape), m_PriorRate(priorRate), m_Tail(0) {} + : m_Calculation(calculation), m_Samples(samples), m_Weights(weights), + m_IsNonInformative(isNonInformative), m_Offset(offset), + m_LikelihoodShape(likelihoodShape), m_PriorShape(priorShape), + m_PriorRate(priorRate), m_Tail(0) {} bool operator()(double x, double& result) const { CJointProbabilityOfLessLikelySamples probability; maths_t::ETail tail = maths_t::E_UndeterminedTail; if (!evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, + m_Samples, m_Weights, boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), CJointProbabilityOfLessLikelySamples::SAddProbability(), m_IsNonInformative, @@ -553,9 +541,8 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { private: maths_t::EProbabilityCalculation m_Calculation; - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Offset; double m_LikelihoodShape; @@ -583,22 +570,22 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { //! a and b are the prior gamma shape and rate, respectively. class CLogMarginalLikelihood : core::CNonCopyable { public: - CLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double offset, double likelihoodShape, double priorShape, double priorRate) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), - m_Offset(offset), m_LikelihoodShape(likelihoodShape), - m_PriorShape(priorShape), m_PriorRate(priorRate), m_NumberSamples(0.0), - m_ImpliedShape(0.0), m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { + : m_Samples(samples), m_Weights(weights), m_Offset(offset), + m_LikelihoodShape(likelihoodShape), m_PriorShape(priorShape), + m_PriorRate(priorRate), m_NumberSamples(0.0), m_ImpliedShape(0.0), + m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { this->precompute(); } //! Evaluate the log marginal likelihood at the offset \p x. bool operator()(double x, double& result) const { + if (m_ErrorStatus & maths_t::E_FpFailed) { return false; } @@ -609,10 +596,9 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { for (std::size_t i = 0u; i < m_Samples.size(); ++i) { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * - maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + double n = maths_t::countForUpdate(m_Weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(m_Weights[i]) * + maths_t::countVarianceScale(m_Weights[i]); double sample = m_Samples[i] + x + m_Offset; @@ -662,10 +648,9 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { for (std::size_t i = 0u; i < m_Weights.size(); ++i) { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * - maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + double n = maths_t::countForUpdate(m_Weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(m_Weights[i]) * + maths_t::countVarianceScale(m_Weights[i]); m_NumberSamples += n; if (varianceScale != 1.0) { logVarianceScaleSum -= m_LikelihoodShape / varianceScale * @@ -699,9 +684,8 @@ class CLogMarginalLikelihood : core::CNonCopyable { } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; double m_Offset; double m_LikelihoodShape; double m_PriorShape; @@ -796,21 +780,19 @@ bool CGammaRateConjugate::needsOffset() const { return true; } -double CGammaRateConjugate::adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +double CGammaRateConjugate::adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { COffsetCost cost(*this); CApplyOffset apply(*this); - return this->adjustOffsetWithCost(weightStyles, samples, weights, cost, apply); + return this->adjustOffsetWithCost(samples, weights, cost, apply); } double CGammaRateConjugate::offset() const { return m_Offset; } -void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CGammaRateConjugate::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } @@ -822,8 +804,8 @@ void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, return; } - this->adjustOffset(weightStyles, samples, weights); - this->CPrior::addSamples(weightStyles, samples, weights); + this->adjustOffset(samples, weights); + this->CPrior::addSamples(samples, weights); // We assume the data are described by X = Y - u where, Y is gamma // distributed and u is a constant offset. @@ -897,10 +879,9 @@ void CGammaRateConjugate::addSamples(const TWeightStyleVec& weightStyles, try { double shift = boost::math::digamma(m_LikelihoodShape); for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::countForUpdate(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); double x = samples[i] + m_Offset; if (!CMathsFuncs::isFinite(x) || x <= 0.0) { @@ -1007,22 +988,17 @@ void CGammaRateConjugate::propagateForwardsByTime(double time) { } CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodSupport() const { - return std::make_pair(-m_Offset, boost::numeric::bounds::highest()); + return {-m_Offset, boost::numeric::bounds::highest()}; } double CGammaRateConjugate::marginalLikelihoodMean() const { return this->isInteger() ? this->mean() - 0.5 : this->mean(); } -double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } +double CGammaRateConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& weights) const { + + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); if (!this->isNonInformative()) { // We use the fact that the marginal likelihood is the distribution @@ -1066,8 +1042,8 @@ double CGammaRateConjugate::marginalLikelihoodMode(const TWeightStyleVec& weight return std::max(mean == 0.0 ? 0.0 : mean - variance / mean, 0.0) - m_Offset; } -double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double CGammaRateConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -1085,13 +1061,8 @@ double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec& we // to show that Var(a' / B) = a'^2 * E[ 1.0 / B^2 - (b / (a - 1))^2] // whence... - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double a = this->priorShape(); if (a <= 2.0) { return varianceScale * CBasicStatistics::variance(m_SampleMoments); @@ -1103,8 +1074,7 @@ double CGammaRateConjugate::marginalLikelihoodVariance(const TWeightStyleVec& we CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { + const TDoubleWeightsAry& weights) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -1123,8 +1093,8 @@ CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage, // and beta equal to m_PriorShape. try { - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double scaledLikelihoodShape = m_LikelihoodShape / varianceScale; double scaledPriorRate = varianceScale * this->priorRate(); boost::math::beta_distribution<> beta(scaledLikelihoodShape, this->priorShape()); @@ -1137,18 +1107,17 @@ CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage, (this->isInteger() ? 0.5 : 0.0); } LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2); - return std::make_pair(x1, x2); + return {x1, x2}; } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); + LOG_ERROR("Failed to compute confidence interval: " << e.what()); } return this->marginalLikelihoodSupport(); } maths_t::EFloatingPointErrorStatus -CGammaRateConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CGammaRateConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -1180,8 +1149,8 @@ CGammaRateConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightSty maths_t::EFloatingPointErrorStatus status = maths_t::E_FpFailed; try { detail::CLogMarginalLikelihood logMarginalLikelihood( - weightStyles, samples, weights, m_Offset, m_LikelihoodShape, - this->priorShape(), this->priorRate()); + samples, weights, m_Offset, m_LikelihoodShape, this->priorShape(), + this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation // w.r.t. to the hidden offset of the samples Z, which is uniform @@ -1338,18 +1307,17 @@ void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples, } } -bool CGammaRateConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CGammaRateConjugate::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdf = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; - TMinusLogCdf minusLogCdf(weightStyles, samples, weights, - this->isNonInformative(), m_Offset, m_LikelihoodShape, - this->priorShape(), this->priorRate()); + TMinusLogCdf minusLogCdf(samples, weights, this->isNonInformative(), m_Offset, + m_LikelihoodShape, this->priorShape(), this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1378,18 +1346,18 @@ bool CGammaRateConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, return true; } -bool CGammaRateConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CGammaRateConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdfComplement = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; TMinusLogCdfComplement minusLogCdfComplement( - weightStyles, samples, weights, this->isNonInformative(), m_Offset, - m_LikelihoodShape, this->priorShape(), this->priorRate()); + samples, weights, this->isNonInformative(), m_Offset, m_LikelihoodShape, + this->priorShape(), this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1419,9 +1387,8 @@ bool CGammaRateConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weig } bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { @@ -1429,8 +1396,8 @@ bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCa tail = maths_t::E_UndeterminedTail; detail::CProbabilityOfLessLikelySamples probability( - calculation, weightStyles, samples, weights, this->isNonInformative(), - m_Offset, m_LikelihoodShape, this->priorShape(), this->priorRate()); + calculation, samples, weights, this->isNonInformative(), m_Offset, + m_LikelihoodShape, this->priorShape(), this->priorRate()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1469,6 +1436,7 @@ bool CGammaRateConjugate::isNonInformative() const { } void CGammaRateConjugate::print(const std::string& indent, std::string& result) const { + result += core_t::LINE_ENDING + indent + "gamma "; if (this->isNonInformative()) { result += "non-informative"; @@ -1495,6 +1463,7 @@ void CGammaRateConjugate::print(const std::string& indent, std::string& result) } std::string CGammaRateConjugate::printJointDensityFunction() const { + if (this->isNonInformative()) { // The non-informative likelihood is improper 0 everywhere. return EMPTY_STRING; @@ -1571,6 +1540,7 @@ double CGammaRateConjugate::likelihoodShape() const { } double CGammaRateConjugate::likelihoodRate() const { + if (this->isNonInformative()) { return 0.0; } @@ -1588,9 +1558,10 @@ double CGammaRateConjugate::likelihoodRate() const { CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::confidenceIntervalRate(double percentage) const { + if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } percentage /= 100.0; @@ -1600,16 +1571,16 @@ CGammaRateConjugate::confidenceIntervalRate(double percentage) const { try { // The prior distribution for the rate is gamma. boost::math::gamma_distribution<> gamma(this->priorShape(), 1.0 / this->priorRate()); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile), - boost::math::quantile(gamma, upperPercentile)); + return {boost::math::quantile(gamma, lowerPercentile), + boost::math::quantile(gamma, upperPercentile)}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what() << ", prior shape = " << this->priorShape() << ", prior rate = " << this->priorRate()); } - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } bool CGammaRateConjugate::equalTolerance(const CGammaRateConjugate& rhs, @@ -1623,6 +1594,7 @@ bool CGammaRateConjugate::equalTolerance(const CGammaRateConjugate& rhs, } double CGammaRateConjugate::mean() const { + if (this->isNonInformative()) { return CBasicStatistics::mean(m_SampleMoments); } diff --git a/lib/maths/CKMeansOnline1d.cc b/lib/maths/CKMeansOnline1d.cc index bec8af1436..d6301ed225 100644 --- a/lib/maths/CKMeansOnline1d.cc +++ b/lib/maths/CKMeansOnline1d.cc @@ -39,11 +39,10 @@ namespace maths { namespace { -using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; +using TDouble1Vec = core::CSmallVector; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; namespace detail { @@ -67,7 +66,7 @@ double logLikelihoodFromCluster(const TDouble1Vec& sample, const CNormalMeanPrecConjugate& normal) { double likelihood; maths_t::EFloatingPointErrorStatus status = normal.jointLogMarginalLikelihood( - CConstantWeights::COUNT, sample, CConstantWeights::SINGLE_UNIT, likelihood); + sample, maths_t::CUnitWeights::SINGLE_UNIT, likelihood); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Unable to compute probability for: " << sample[0]); return core::constants::LOG_MIN_DOUBLE - 1.0; @@ -168,6 +167,7 @@ bool CKMeansOnline1d::clusterSpread(std::size_t index, double& result) const { } void CKMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, double count) const { + result.clear(); if (m_Clusters.empty()) { @@ -210,6 +210,7 @@ void CKMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, do } void CKMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, double count) { + clusters.clear(); if (m_Clusters.empty()) { @@ -219,11 +220,10 @@ void CKMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub this->cluster(point, clusters, count); TDouble1Vec sample{point}; - TDouble4Vec1Vec weight{TDouble4Vec(1)}; - for (std::size_t i = 0u; i < clusters.size(); ++i) { - weight[0][0] = clusters[i].second; - m_Clusters[clusters[i].first].addSamples(CConstantWeights::COUNT, sample, weight); + for (const auto& cluster : clusters) { + m_Clusters[cluster.first].addSamples( + sample, {maths_t::countWeight(cluster.second)}); } } diff --git a/lib/maths/CLogNormalMeanPrecConjugate.cc b/lib/maths/CLogNormalMeanPrecConjugate.cc index 5f0c404837..919d4ded87 100644 --- a/lib/maths/CLogNormalMeanPrecConjugate.cc +++ b/lib/maths/CLogNormalMeanPrecConjugate.cc @@ -54,13 +54,11 @@ namespace maths { namespace { -using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; using TSizeVec = std::vector; +using TDouble1Vec = core::CSmallVector; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; -using TWeightStyleVec = maths_t::TWeightStyleVec; //! Compute x * x. inline double pow2(double x) { @@ -110,9 +108,6 @@ inline void locationAndScale(double vs, //! (integrating over the prior for the exponentiated normal mean and //! precision) and aggregate the results using \p aggregate. //! -//! \param weightStyles Controls the interpretation of weights that are -//! associated with each sample. See maths_t::ESampleWeightStyle for more -//! details. //! \param samples The weighted samples. //! \param weights The weights of each sample in \p samples. //! \param func The function to evaluate. @@ -127,9 +122,8 @@ inline void locationAndScale(double vs, //! \param precision The precision of the conditional mean prior. //! \param result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool evaluateFunctionOnJointDistribution(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, FUNC func, AGGREGATOR aggregate, bool isNonInformative, @@ -165,7 +159,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // (It is acceptable to approximate all finite samples as at the median // of this distribution.) for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); result = aggregate( result, func(CTools::SImproperDistribution(), samples[i] + offset), n); } @@ -191,15 +185,14 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, double s = std::exp(-r); for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); double location; double scale; locationAndScale(varianceScale, r, s, mean, precision, rate, shape, location, scale); - boost::math::lognormal_distribution<> lognormal(location, scale); + boost::math::lognormal lognormal(location, scale); result = aggregate(result, func(lognormal, samples[i] + offset), n); } } else { @@ -210,10 +203,9 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, double s = std::exp(-r); for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); double location; double scale; locationAndScale(varianceScale, r, s, mean, precision, rate, @@ -241,29 +233,27 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, template class CEvaluateOnSamples : core::CNonCopyable { public: - CEvaluateOnSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CEvaluateOnSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double offset, double mean, double precision, double shape, double rate) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), + : m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), m_Offset(offset), m_Mean(mean), m_Precision(precision), m_Shape(shape), m_Rate(rate) {} bool operator()(double x, double& result) const { return evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, + m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, m_Offset + x, m_Shape, m_Rate, m_Mean, m_Precision, result); } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Offset; double m_Mean; @@ -325,7 +315,7 @@ class CVarianceKernel { bool operator()(const TValue& x, TValue& result) const { try { boost::math::gamma_distribution<> gamma(m_A, 1.0 / m_B); - boost::math::normal_distribution<> normal(m_M, std::sqrt(1.0 / x(0) / m_P)); + boost::math::normal normal(m_M, std::sqrt(1.0 / x(0) / m_P)); double fx = boost::math::pdf(normal, x(1)) * boost::math::pdf(gamma, x(0)); double m = std::exp(x(1) + 0.5 / x(0)); result(0) = (m * m * (std::exp(1.0 / x(0)) - 1.0) + pow2(m - m_Mean)) * fx; @@ -352,26 +342,25 @@ class CVarianceKernel { class CProbabilityOfLessLikelySamples : core::CNonCopyable { public: CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double offset, double mean, double precision, double shape, double rate) - : m_Calculation(calculation), m_WeightStyles(weightStyles), - m_Samples(samples), m_Weights(weights), + : m_Calculation(calculation), m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), m_Offset(offset), m_Mean(mean), m_Precision(precision), m_Shape(shape), m_Rate(rate), m_Tail(0) {} bool operator()(double x, double& result) const { + CJointProbabilityOfLessLikelySamples probability; maths_t::ETail tail = maths_t::E_UndeterminedTail; if (!evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, + m_Samples, m_Weights, boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), CJointProbabilityOfLessLikelySamples::SAddProbability(), m_IsNonInformative, @@ -392,9 +381,8 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { private: maths_t::EProbabilityCalculation m_Calculation; - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Offset; double m_Mean; @@ -431,23 +419,22 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { //! a and b are the prior Gamma shape and rate, respectively. class CLogMarginalLikelihood : core::CNonCopyable { public: - CLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double offset, double mean, double precision, double shape, double rate) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), - m_Offset(offset), m_Mean(mean), m_Precision(precision), - m_Shape(shape), m_Rate(rate), m_NumberSamples(0.0), m_Scales(), - m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { + : m_Samples(samples), m_Weights(weights), m_Offset(offset), m_Mean(mean), + m_Precision(precision), m_Shape(shape), m_Rate(rate), m_NumberSamples(0.0), + m_Scales(), m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { this->precompute(); } //! Evaluate the log marginal likelihood at the offset \p x. bool operator()(double x, double& result) const { + if (m_ErrorStatus & maths_t::E_FpFailed) { return false; } @@ -457,7 +444,7 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { for (std::size_t i = 0u; i < m_Samples.size(); ++i) { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); + double n = maths_t::countForUpdate(m_Weights[i]); double sample = m_Samples[i] + m_Offset + x; if (sample <= 0.0) { // Technically, the marginal likelihood is zero here @@ -517,15 +504,14 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { double logVarianceScaleSum = 0.0; - if (maths_t::hasSeasonalVarianceScale(m_WeightStyles, m_Weights) || - maths_t::hasCountVarianceScale(m_WeightStyles, m_Weights)) { + if (maths_t::hasSeasonalVarianceScale(m_Weights) || + maths_t::hasCountVarianceScale(m_Weights)) { m_Scales.reserve(m_Weights.size()); double r = m_Rate / m_Shape; double s = std::exp(-r); for (std::size_t i = 0u; i < m_Weights.size(); ++i) { - double varianceScale = - maths_t::seasonalVarianceScale(m_WeightStyles, m_Weights[i]) * - maths_t::countVarianceScale(m_WeightStyles, m_Weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(m_Weights[i]) * + maths_t::countVarianceScale(m_Weights[i]); // Get the scale and shift of the exponentiated Gaussian. if (varianceScale == 1.0) { @@ -542,7 +528,7 @@ class CLogMarginalLikelihood : core::CNonCopyable { double weightedNumberSamples = 0.0; for (std::size_t i = 0u; i < m_Weights.size(); ++i) { - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); + double n = maths_t::countForUpdate(m_Weights[i]); m_NumberSamples += n; weightedNumberSamples += n / (m_Scales.empty() ? 1.0 : m_Scales[i].first); @@ -567,9 +553,8 @@ class CLogMarginalLikelihood : core::CNonCopyable { } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; double m_Offset; double m_Mean; double m_Precision; @@ -595,12 +580,10 @@ const double CLogMarginalLikelihood::LOG_2_PI = //! [n, n+1]. class CLogSampleSquareDeviation : core::CNonCopyable { public: - CLogSampleSquareDeviation(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CLogSampleSquareDeviation(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double mean) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), - m_Mean(mean) {} + : m_Samples(samples), m_Weights(weights), m_Mean(mean) {} bool operator()(double x, double& result) const { result = 0.0; @@ -609,7 +592,7 @@ class CLogSampleSquareDeviation : core::CNonCopyable { if (residual <= 0.0) { continue; } - double n = maths_t::countForUpdate(m_WeightStyles, m_Weights[i]); + double n = maths_t::countForUpdate(m_Weights[i]); residual = std::log(residual + x) - m_Mean; result += n * pow2(residual); } @@ -617,9 +600,8 @@ class CLogSampleSquareDeviation : core::CNonCopyable { } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; double m_Mean; }; @@ -711,25 +693,22 @@ bool CLogNormalMeanPrecConjugate::needsOffset() const { return true; } -double CLogNormalMeanPrecConjugate::adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +double CLogNormalMeanPrecConjugate::adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { COffsetCost cost(*this); CApplyOffset apply(*this); - return this->adjustOffsetWithCost(weightStyles, samples, weights, cost, apply); + return this->adjustOffsetWithCost(samples, weights, cost, apply); } double CLogNormalMeanPrecConjugate::offset() const { return m_Offset; } -void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CLogNormalMeanPrecConjugate::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -737,8 +716,8 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles return; } - this->adjustOffset(weightStyles, samples, weights); - this->CPrior::addSamples(weightStyles, samples, weights); + this->adjustOffset(samples, weights); + this->CPrior::addSamples(samples, weights); // We assume the data are described by X = exp(Y) - u where, Y is normally // distributed and u is a constant offset. @@ -803,78 +782,69 @@ void CLogNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles double r = m_GammaRate / m_GammaShape; double s = std::exp(-r); - try { - if (this->isInteger()) { - // Filled in with samples rescaled to have approximately unit - // variance scale. - TDouble1Vec scaledSamples; - scaledSamples.resize(samples.size(), 1.0); - - TMeanAccumulator logSamplesMean_; - for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); - double x = samples[i] + m_Offset; - numberSamples += n; - double t = varianceScale == 1.0 - ? r - : r + std::log(s + varianceScale * (1.0 - s)); - double shift = (r - t) / 2.0; - double scale = r == t ? 1.0 : t / r; - scaledSamples[i] = scale; - double logxInvPlus1 = std::log(1.0 / x + 1.0); - double logxPlus1 = std::log(x + 1.0); - logSamplesMean_.add(x * logxInvPlus1 + logxPlus1 - 1.0 - shift, n / scale); - } - scaledNumberSamples = CBasicStatistics::count(logSamplesMean_); - logSamplesMean = CBasicStatistics::mean(logSamplesMean_); - - double mean = (m_GaussianPrecision * m_GaussianMean + - scaledNumberSamples * logSamplesMean) / - (m_GaussianPrecision + scaledNumberSamples); - for (std::size_t i = 0u; i < scaledSamples.size(); ++i) { - double scale = scaledSamples[i]; - scaledSamples[i] = - scale == 1.0 - ? samples[i] + m_Offset - : std::exp(mean + (std::log(samples[i] + m_Offset) - mean) / - std::sqrt(scale)); - } + if (this->isInteger()) { + // Filled in with samples rescaled to have approximately unit + // variance scale. + TDouble1Vec scaledSamples; + scaledSamples.resize(samples.size(), 1.0); + + TMeanAccumulator logSamplesMean_; + for (std::size_t i = 0u; i < samples.size(); ++i) { + double n = maths_t::countForUpdate(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); + double x = samples[i] + m_Offset; + numberSamples += n; + double t = varianceScale == 1.0 + ? r + : r + std::log(s + varianceScale * (1.0 - s)); + double shift = (r - t) / 2.0; + double scale = r == t ? 1.0 : t / r; + scaledSamples[i] = scale; + double logxInvPlus1 = std::log(1.0 / x + 1.0); + double logxPlus1 = std::log(x + 1.0); + logSamplesMean_.add(x * logxInvPlus1 + logxPlus1 - 1.0 - shift, n / scale); + } + scaledNumberSamples = CBasicStatistics::count(logSamplesMean_); + logSamplesMean = CBasicStatistics::mean(logSamplesMean_); + + double mean = (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * logSamplesMean) / + (m_GaussianPrecision + scaledNumberSamples); + for (std::size_t i = 0u; i < scaledSamples.size(); ++i) { + double scale = scaledSamples[i]; + scaledSamples[i] = + scale == 1.0 ? samples[i] + m_Offset + : std::exp(mean + (std::log(samples[i] + m_Offset) - mean) / + std::sqrt(scale)); + } - detail::CLogSampleSquareDeviation deviationFunction( - weightStyles, scaledSamples, weights, logSamplesMean); - CIntegration::gaussLegendre( - deviationFunction, 0.0, 1.0, logSamplesSquareDeviation); - } else { - TMeanVarAccumulator logSamplesMoments; - for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); - double x = samples[i] + m_Offset; - if (x <= 0.0) { - LOG_ERROR(<< "Discarding " << x << " it's not log-normal"); - continue; - } - numberSamples += n; - double t = varianceScale == 1.0 - ? r - : r + std::log(s + varianceScale * (1.0 - s)); - double scale = r == t ? 1.0 : t / r; - double shift = (r - t) / 2.0; - logSamplesMoments.add(std::log(x) - shift, n / scale); + detail::CLogSampleSquareDeviation deviationFunction(scaledSamples, weights, + logSamplesMean); + CIntegration::gaussLegendre( + deviationFunction, 0.0, 1.0, logSamplesSquareDeviation); + } else { + TMeanVarAccumulator logSamplesMoments; + for (std::size_t i = 0u; i < samples.size(); ++i) { + double n = maths_t::countForUpdate(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); + double x = samples[i] + m_Offset; + if (x <= 0.0) { + LOG_ERROR(<< "Discarding " << x << " it's not log-normal"); + continue; } - scaledNumberSamples = CBasicStatistics::count(logSamplesMoments); - logSamplesMean = CBasicStatistics::mean(logSamplesMoments); - logSamplesSquareDeviation = (scaledNumberSamples - 1.0) * - CBasicStatistics::variance(logSamplesMoments); + numberSamples += n; + double t = varianceScale == 1.0 + ? r + : r + std::log(s + varianceScale * (1.0 - s)); + double scale = r == t ? 1.0 : t / r; + double shift = (r - t) / 2.0; + logSamplesMoments.add(std::log(x) - shift, n / scale); } - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to update likelihood: " << e.what()); - return; + scaledNumberSamples = CBasicStatistics::count(logSamplesMoments); + logSamplesMean = CBasicStatistics::mean(logSamplesMoments); + logSamplesSquareDeviation = (scaledNumberSamples - 1.0) * + CBasicStatistics::variance(logSamplesMoments); } m_GammaShape += 0.5 * numberSamples; @@ -942,7 +912,6 @@ void CLogNormalMeanPrecConjugate::propagateForwardsByTime(double time) { LOG_ERROR(<< "Bad propagation time " << time); return; } - if (this->isNonInformative()) { // Nothing to be done. return; @@ -979,15 +948,15 @@ void CLogNormalMeanPrecConjugate::propagateForwardsByTime(double time) { CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::marginalLikelihoodSupport() const { - return std::make_pair(-m_Offset, boost::numeric::bounds::highest()); + return {-m_Offset, boost::numeric::bounds::highest()}; } double CLogNormalMeanPrecConjugate::marginalLikelihoodMean() const { return this->isInteger() ? this->mean() - 0.5 : this->mean(); } -double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative()) { return std::exp(m_GaussianMean) - m_Offset; } @@ -996,13 +965,8 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec // is log-normally distributed and for small precision it is log-t. // See evaluateFunctionOnJointDistribution for more discussion. - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); try { double r = m_GammaRate / m_GammaShape; double s = std::exp(-r); @@ -1012,7 +976,7 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec m_GammaRate, m_GammaShape, location, scale); LOG_TRACE(<< "location = " << location << ", scale = " << scale); if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE) { - boost::math::lognormal_distribution<> logNormal(location, scale); + boost::math::lognormal logNormal(location, scale); return boost::math::mode(logNormal) - m_Offset; } CLogTDistribution logt(2.0 * m_GammaShape, location, scale); @@ -1031,8 +995,8 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec return (normalPrecision == 0.0 ? 0.0 : std::exp(normalMean - 1.0 / normalPrecision)) - m_Offset; } -double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -1057,13 +1021,8 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyl // // Note that b / a > 0 so this is necessarily non-negative. - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double vh = std::exp(2.0 * m_GaussianMean + m_GammaRate / m_GammaShape * (2.0 / m_GaussianPrecision + 1.0)) * (std::exp(m_GammaRate / m_GammaShape) - 1.0); @@ -1078,8 +1037,7 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyl a[0] = boost::math::quantile(gamma, 0.03); b[0] = boost::math::quantile(gamma, 0.97); - boost::math::normal_distribution<> normal( - m_GaussianMean, 1.0 / a[0] / m_GaussianPrecision); + boost::math::normal normal(m_GaussianMean, 1.0 / a[0] / m_GaussianPrecision); a[1] = boost::math::quantile(normal, 0.03); b[1] = boost::math::quantile(normal, 0.97); @@ -1100,8 +1058,7 @@ double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyl CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { + const TDoubleWeightsAry& weights) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -1112,8 +1069,8 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent // We use the fact that the marginal likelihood is a log-t distribution. try { - double varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double r = m_GammaRate / m_GammaShape; double s = std::exp(-r); @@ -1124,7 +1081,7 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent LOG_TRACE(<< "location = " << location << ", scale = " << scale); if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE) { - boost::math::lognormal_distribution<> logNormal(location, scale); + boost::math::lognormal logNormal(location, scale); double x1 = boost::math::quantile(logNormal, (1.0 - percentage) / 2.0) - m_Offset - (this->isInteger() ? 0.5 : 0.0); double x2 = percentage > 0.0 @@ -1132,7 +1089,7 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent m_Offset - (this->isInteger() ? 0.5 : 0.0) : x1; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2); - return std::make_pair(x1, x2); + return {x1, x2}; } CLogTDistribution logt(2.0 * m_GammaShape, location, scale); double x1 = quantile(logt, (1.0 - percentage) / 2.0) - m_Offset - @@ -1141,7 +1098,7 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent m_Offset - (this->isInteger() ? 0.5 : 0.0) : x1; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2); - return std::make_pair(x1, x2); + return {x1, x2}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); } @@ -1150,9 +1107,8 @@ CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percent } maths_t::EFloatingPointErrorStatus -CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -1160,7 +1116,6 @@ CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& w LOG_ERROR(<< "Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -1182,8 +1137,8 @@ CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& w } detail::CLogMarginalLikelihood logMarginalLikelihood( - weightStyles, samples, weights, m_Offset, m_GaussianMean, - m_GaussianPrecision, m_GammaShape, m_GammaRate); + samples, weights, m_Offset, m_GaussianMean, m_GaussianPrecision, + m_GammaShape, m_GammaRate); if (this->isInteger()) { CIntegration::logGaussLegendre( logMarginalLikelihood, 0.0, 1.0, result); @@ -1212,7 +1167,6 @@ void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSam if (numberSamples == 0 || this->numberSamples() == 0.0) { return; } - if (this->isNonInformative()) { // We can't sample the marginal likelihood directly. This should // only happen if we've had one sample so just return that sample. @@ -1260,7 +1214,7 @@ void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSam double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape); try { - boost::math::lognormal_distribution<> lognormal(m_GaussianMean, scale); + boost::math::lognormal lognormal(m_GaussianMean, scale); double mean = boost::math::mean(lognormal); @@ -1316,17 +1270,16 @@ void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSam } } -bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdf = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; - TMinusLogCdf minusLogCdf(weightStyles, samples, weights, - this->isNonInformative(), m_Offset, m_GaussianMean, + TMinusLogCdf minusLogCdf(samples, weights, this->isNonInformative(), m_Offset, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); if (this->isInteger()) { @@ -1356,18 +1309,18 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weight return true; } -bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdfComplement = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; TMinusLogCdfComplement minusLogCdfComplement( - weightStyles, samples, weights, this->isNonInformative(), m_Offset, - m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); + samples, weights, this->isNonInformative(), m_Offset, m_GaussianMean, + m_GaussianPrecision, m_GammaShape, m_GammaRate); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1398,18 +1351,18 @@ bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleV bool CLogNormalMeanPrecConjugate::probabilityOfLessLikelySamples( maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { + lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; detail::CProbabilityOfLessLikelySamples probability( - calculation, weightStyles, samples, weights, this->isNonInformative(), - m_Offset, m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); + calculation, samples, weights, this->isNonInformative(), m_Offset, + m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1447,6 +1400,7 @@ bool CLogNormalMeanPrecConjugate::isNonInformative() const { } void CLogNormalMeanPrecConjugate::print(const std::string& indent, std::string& result) const { + result += core_t::LINE_ENDING + indent + "log-normal "; if (this->isNonInformative()) { result += "non-informative"; @@ -1456,7 +1410,7 @@ void CLogNormalMeanPrecConjugate::print(const std::string& indent, std::string& double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape); try { - boost::math::lognormal_distribution<> lognormal(m_GaussianMean, scale); + boost::math::lognormal lognormal(m_GaussianMean, scale); double mean = boost::math::mean(lognormal); double deviation = boost::math::standard_deviation(lognormal); result += "mean = " + core::CStringUtils::typeToStringPretty(mean - m_Offset) + @@ -1467,6 +1421,7 @@ void CLogNormalMeanPrecConjugate::print(const std::string& indent, std::string& } std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const { + if (this->isNonInformative()) { // The non-informative prior is improper and effectively 0 everywhere. return std::string(); @@ -1480,7 +1435,7 @@ std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const { boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); double precision = m_GaussianPrecision * this->normalPrecision(); - boost::math::normal_distribution<> gaussian(m_GaussianMean, 1.0 / std::sqrt(precision)); + boost::math::normal gaussian(m_GaussianMean, 1.0 / std::sqrt(precision)); double xStart = boost::math::quantile(gamma, (1.0 - RANGE) / 2.0); double xEnd = boost::math::quantile(gamma, (1.0 + RANGE) / 2.0); @@ -1510,7 +1465,7 @@ std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const { y = yStart; for (unsigned int j = 0u; j < POINTS; ++j, y += yIncrement) { double conditionalPrecision = m_GaussianPrecision * x; - boost::math::normal_distribution<> conditionalGaussian( + boost::math::normal conditionalGaussian( m_GaussianMean, 1.0 / std::sqrt(conditionalPrecision)); pdf << (CTools::safePdf(gamma, x) * CTools::safePdf(conditionalGaussian, y)) @@ -1561,6 +1516,7 @@ double CLogNormalMeanPrecConjugate::normalMean() const { } double CLogNormalMeanPrecConjugate::normalPrecision() const { + if (this->isNonInformative()) { return 0.0; } @@ -1578,9 +1534,10 @@ double CLogNormalMeanPrecConjugate::normalPrecision() const { CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) const { + if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } // Compute the symmetric confidence interval around the median of the @@ -1605,7 +1562,7 @@ CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) con double lowerPercentile = 0.5 * (1.0 - percentage); double upperPercentile = 0.5 * (1.0 + percentage); - boost::math::students_t_distribution<> students(2.0 * m_GammaShape); + boost::math::students_t students(2.0 * m_GammaShape); double xLower = boost::math::quantile(students, lowerPercentile); double xUpper = boost::math::quantile(students, upperPercentile); @@ -1615,14 +1572,15 @@ CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) con xLower = m_GaussianMean + xLower / std::sqrt(precision); xUpper = m_GaussianMean + xUpper / std::sqrt(precision); - return std::make_pair(xLower, xUpper); + return {xLower, xUpper}; } CLogNormalMeanPrecConjugate::TDoubleDoublePr CLogNormalMeanPrecConjugate::confidenceIntervalNormalPrecision(double percentage) const { + if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } percentage /= 100.0; @@ -1632,8 +1590,8 @@ CLogNormalMeanPrecConjugate::confidenceIntervalNormalPrecision(double percentage // The marginal prior distribution for the precision is gamma. boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile), - boost::math::quantile(gamma, upperPercentile)); + return {boost::math::quantile(gamma, lowerPercentile), + boost::math::quantile(gamma, upperPercentile)}; } bool CLogNormalMeanPrecConjugate::equalTolerance(const CLogNormalMeanPrecConjugate& rhs, @@ -1647,6 +1605,7 @@ bool CLogNormalMeanPrecConjugate::equalTolerance(const CLogNormalMeanPrecConjuga } double CLogNormalMeanPrecConjugate::mean() const { + if (this->isNonInformative()) { return std::exp(m_GaussianMean) - m_Offset; } diff --git a/lib/maths/CModel.cc b/lib/maths/CModel.cc index 9834dc3517..a9f89ed3f9 100644 --- a/lib/maths/CModel.cc +++ b/lib/maths/CModel.cc @@ -112,8 +112,7 @@ double CModelParams::probabilityBucketEmpty() const { CModelAddSamplesParams::CModelAddSamplesParams() : m_Type(maths_t::E_MixedData), m_IsNonNegative(false), - m_PropagationInterval(1.0), m_WeightStyles(nullptr), - m_TrendWeights(nullptr), m_PriorWeights(nullptr) { + m_PropagationInterval(1.0), m_TrendWeights(nullptr), m_PriorWeights(nullptr) { } CModelAddSamplesParams& CModelAddSamplesParams::integer(bool integer) { @@ -144,36 +143,30 @@ double CModelAddSamplesParams::propagationInterval() const { } CModelAddSamplesParams& -CModelAddSamplesParams::weightStyles(const maths_t::TWeightStyleVec& styles) { - m_WeightStyles = &styles; - return *this; -} - -const maths_t::TWeightStyleVec& CModelAddSamplesParams::weightStyles() const { - return *m_WeightStyles; -} - -CModelAddSamplesParams& CModelAddSamplesParams::trendWeights(const TDouble2Vec4VecVec& weights) { +CModelAddSamplesParams::trendWeights(const TDouble2VecWeightsAryVec& weights) { m_TrendWeights = &weights; return *this; } -const CModelAddSamplesParams::TDouble2Vec4VecVec& CModelAddSamplesParams::trendWeights() const { +const CModelAddSamplesParams::TDouble2VecWeightsAryVec& +CModelAddSamplesParams::trendWeights() const { return *m_TrendWeights; } -CModelAddSamplesParams& CModelAddSamplesParams::priorWeights(const TDouble2Vec4VecVec& weights) { +CModelAddSamplesParams& +CModelAddSamplesParams::priorWeights(const TDouble2VecWeightsAryVec& weights) { m_PriorWeights = &weights; return *this; } -const CModelAddSamplesParams::TDouble2Vec4VecVec& CModelAddSamplesParams::priorWeights() const { +const CModelAddSamplesParams::TDouble2VecWeightsAryVec& +CModelAddSamplesParams::priorWeights() const { return *m_PriorWeights; } CModelProbabilityParams::CModelProbabilityParams() : m_Tag(0), m_SeasonalConfidenceInterval(DEFAULT_SEASONAL_CONFIDENCE_INTERVAL), - m_WeightStyles(nullptr), m_UpdateAnomalyModel(true) { + m_UpdateAnomalyModel(true) { } CModelProbabilityParams& CModelProbabilityParams::tag(std::size_t tag) { @@ -218,30 +211,23 @@ const CModelProbabilityParams::TBool2Vec1Vec& CModelProbabilityParams::bucketEmp } CModelProbabilityParams& -CModelProbabilityParams::weightStyles(const maths_t::TWeightStyleVec& styles) { - m_WeightStyles = &styles; - return *this; -} - -const maths_t::TWeightStyleVec& CModelProbabilityParams::weightStyles() const { - return *m_WeightStyles; -} - -CModelProbabilityParams& CModelProbabilityParams::addWeights(const TDouble2Vec4Vec& weights) { +CModelProbabilityParams::addWeights(const TDouble2VecWeightsAry& weights) { m_Weights.push_back(weights); return *this; } -CModelProbabilityParams& CModelProbabilityParams::weights(const TDouble2Vec4Vec1Vec& weights) { +CModelProbabilityParams& +CModelProbabilityParams::weights(const TDouble2VecWeightsAry1Vec& weights) { m_Weights = weights; return *this; } -const CModelProbabilityParams::TDouble2Vec4Vec1Vec& CModelProbabilityParams::weights() const { +const CModelProbabilityParams::TDouble2VecWeightsAry1Vec& +CModelProbabilityParams::weights() const { return m_Weights; } -CModelProbabilityParams::TDouble2Vec4Vec1Vec& CModelProbabilityParams::weights() { +CModelProbabilityParams::TDouble2VecWeightsAry1Vec& CModelProbabilityParams::weights() { return m_Weights; } @@ -365,21 +351,18 @@ CModelStub::TSize2Vec1Vec CModelStub::correlates() const { } CModelStub::TDouble2Vec CModelStub::mode(core_t::TTime /*time*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec& /*weights*/) const { + const TDouble2VecWeightsAry& /*weights*/) const { return TDouble2Vec(); } CModelStub::TDouble2Vec1Vec CModelStub::correlateModes(core_t::TTime /*time*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec1Vec& /*weights*/) const { + const TDouble2VecWeightsAry1Vec& /*weights*/) const { return TDouble2Vec1Vec(); } CModelStub::TDouble2Vec1Vec -CModelStub::residualModes(const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec& /*weights*/) const { +CModelStub::residualModes(const TDouble2VecWeightsAry& /*weights*/) const { return TDouble2Vec1Vec(); } @@ -408,8 +391,7 @@ CModelStub::TDouble2Vec CModelStub::predict(core_t::TTime /*time*/, CModelStub::TDouble2Vec3Vec CModelStub::confidenceInterval(core_t::TTime /*time*/, double /*confidenceInterval*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec& /*weights*/) const { + const TDouble2VecWeightsAry& /*weights*/) const { return TDouble2Vec3Vec(); } diff --git a/lib/maths/CMultimodalPrior.cc b/lib/maths/CMultimodalPrior.cc index 8e2bbb68b5..65d6a1d6b8 100644 --- a/lib/maths/CMultimodalPrior.cc +++ b/lib/maths/CMultimodalPrior.cc @@ -240,9 +240,8 @@ bool CMultimodalPrior::needsOffset() const { return false; } -double CMultimodalPrior::adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +double CMultimodalPrior::adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { double result = 0.0; if (this->needsOffset()) { @@ -253,9 +252,7 @@ double CMultimodalPrior::adjustOffset(const TWeightStyleVec& weightStyles, auto j = std::find_if(m_Modes.begin(), m_Modes.end(), CSetTools::CIndexInSet(cluster.first)); if (j != m_Modes.end()) { - result += j->s_Prior->adjustOffset( - weightStyles, TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, weights[i])); + result += j->s_Prior->adjustOffset({samples[i]}, {weights[i]}); } } } @@ -272,13 +269,11 @@ double CMultimodalPrior::offset() const { return offset; } -void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CMultimodalPrior::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -286,7 +281,7 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, return; } - this->adjustOffset(weightStyles_, samples, weights); + this->adjustOffset(samples, weights); // This uses a clustering methodology (defined by m_Clusterer) // to assign each sample to a cluster. Each cluster has its own @@ -310,31 +305,14 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, // Declared outside the loop to minimize the number of times it // is initialized. - TWeightStyleVec weightStyles(weightStyles_); TDouble1Vec sample(1); - TDouble4Vec1Vec weight(1); + TDoubleWeightsAry1Vec weight(1); TSizeDoublePr2Vec clusters; - std::size_t indices[maths_t::NUMBER_WEIGHT_STYLES]; - std::size_t missing = weightStyles.size() + 1; - std::fill_n(indices, maths_t::NUMBER_WEIGHT_STYLES, missing); - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - indices[weightStyles[i]] = i; - } - std::size_t seasonal = indices[maths_t::E_SampleSeasonalVarianceScaleWeight]; - std::size_t count = indices[maths_t::E_SampleCountWeight]; - std::size_t winsorisation = indices[maths_t::E_SampleWinsorisationWeight]; - if (count == missing) { - count = weightStyles.size(); - weightStyles.push_back(maths_t::E_SampleCountWeight); - } - try { - bool hasSeasonalScale = !this->isNonInformative() && seasonal != missing; - double mean = (!this->isNonInformative() && - maths_t::hasSeasonalVarianceScale(weightStyles_, weights)) - ? this->marginalLikelihoodMean() - : 0.0; + bool hasSeasonalScale = !this->isNonInformative() && + maths_t::hasSeasonalVarianceScale(weights); + double mean = hasSeasonalScale ? this->marginalLikelihoodMean() : 0.0; for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; @@ -343,22 +321,22 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, continue; } if (hasSeasonalScale) { - x = mean + (x - mean) / std::sqrt(weights[i][seasonal]); + x = mean + (x - mean) / + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); } sample[0] = x; weight[0] = weights[i]; - weight[0].resize(weightStyles.size(), 1.0); - if (seasonal != missing) { - weight[0][seasonal] = 1.0; - } + maths_t::setSeasonalVarianceScale(1.0, weight[0]); clusters.clear(); - m_Clusterer->add(x, clusters, weight[0][count]); + m_Clusterer->add(x, clusters, maths_t::count(weight[0])); - double Z = std::accumulate( - m_Modes.begin(), m_Modes.end(), weight[0][count], - [](double sum, const TMode& mode) { return sum + mode.weight(); }); + auto addModeWeight = [](double sum, const TMode& mode) { + return sum + mode.weight(); + }; + double Z = std::accumulate(m_Modes.begin(), m_Modes.end(), + maths_t::count(weight[0]), addModeWeight); double n = 0.0; for (const auto& cluster : clusters) { @@ -369,14 +347,15 @@ void CMultimodalPrior::addSamples(const TWeightStyleVec& weightStyles_, m_Modes.emplace_back(cluster.first, m_SeedPrior); k = m_Modes.end() - 1; } - weight[0][count] = cluster.second; - if (winsorisation != missing) { - double& ww = weight[0][winsorisation]; + maths_t::setCount(cluster.second, weight[0]); + if (maths_t::isWinsorised(weight)) { + double ww = maths_t::winsorisationWeight(weight[0]); double f = (k->weight() + cluster.second) / Z; - ww = std::max(1.0 - (1.0 - ww) / f, ww * f); + maths_t::setWinsorisationWeight( + std::max(1.0 - (1.0 - ww) / f, ww * f), weight[0]); } - k->s_Prior->addSamples(weightStyles, sample, weight); - n += maths_t::countForUpdate(weightStyles, weight[0]); + k->s_Prior->addSamples(sample, weight); + n += maths_t::countForUpdate(weight[0]); } this->addSamples(n); } @@ -390,7 +369,6 @@ void CMultimodalPrior::propagateForwardsByTime(double time) { LOG_ERROR(<< "Bad propagation time " << time); return; } - if (this->isNonInformative()) { // Nothing to be done. return; @@ -421,6 +399,7 @@ double CMultimodalPrior::marginalLikelihoodMean() const { } double CMultimodalPrior::nearestMarginalLikelihoodMean(double value) const { + if (m_Modes.empty()) { return 0.0; } @@ -438,53 +417,47 @@ double CMultimodalPrior::nearestMarginalLikelihoodMean(double value) const { return result; } -double CMultimodalPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { - return CMultimodalPriorUtils::marginalLikelihoodMode(m_Modes, weightStyles, weights); +double CMultimodalPrior::marginalLikelihoodMode(const TDoubleWeightsAry& weights) const { + return CMultimodalPriorUtils::marginalLikelihoodMode(m_Modes, weights); } CMultimodalPrior::TDouble1Vec -CMultimodalPrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +CMultimodalPrior::marginalLikelihoodModes(const TDoubleWeightsAry& weights) const { TDouble1Vec result(m_Modes.size()); for (std::size_t i = 0u; i < m_Modes.size(); ++i) { - result[i] = m_Modes[i].s_Prior->marginalLikelihoodMode(weightStyles, weights); + result[i] = m_Modes[i].s_Prior->marginalLikelihoodMode(weights); } return result; } -double CMultimodalPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { - return CMultimodalPriorUtils::marginalLikelihoodVariance(m_Modes, weightStyles, weights); +double CMultimodalPrior::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + return CMultimodalPriorUtils::marginalLikelihoodVariance(m_Modes, weights); } TDoubleDoublePr CMultimodalPrior::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { + const TDoubleWeightsAry& weights) const { return CMultimodalPriorUtils::marginalLikelihoodConfidenceInterval( - *this, m_Modes, percentage, weightStyles, weights); + *this, m_Modes, percentage, weights); } maths_t::EFloatingPointErrorStatus -CMultimodalPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CMultimodalPrior::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { + result = 0.0; if (samples.empty()) { LOG_ERROR(<< "Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } - if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because @@ -498,14 +471,15 @@ CMultimodalPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles return maths_t::E_FpOverflowed; } - return m_Modes.size() == 1 ? m_Modes[0].s_Prior->jointLogMarginalLikelihood( - weightStyles, samples, weights, result) - : CMultimodalPriorUtils::jointLogMarginalLikelihood( - m_Modes, weightStyles, samples, weights, result); + return m_Modes.size() == 1 + ? m_Modes[0].s_Prior->jointLogMarginalLikelihood(samples, weights, result) + : CMultimodalPriorUtils::jointLogMarginalLikelihood( + m_Modes, samples, weights, result); } void CMultimodalPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { + samples.clear(); if (numberSamples == 0 || this->numberSamples() == 0.0) { @@ -515,34 +489,30 @@ void CMultimodalPrior::sampleMarginalLikelihood(std::size_t numberSamples, CMultimodalPriorUtils::sampleMarginalLikelihood(m_Modes, numberSamples, samples); } -bool CMultimodalPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CMultimodalPrior::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { - return CMultimodalPriorUtils::minusLogJointCdf(m_Modes, weightStyles, samples, - weights, lowerBound, upperBound); + return CMultimodalPriorUtils::minusLogJointCdf(m_Modes, samples, weights, + lowerBound, upperBound); } -bool CMultimodalPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CMultimodalPrior::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { return CMultimodalPriorUtils::minusLogJointCdfComplement( - m_Modes, weightStyles, samples, weights, lowerBound, upperBound); + m_Modes, samples, weights, lowerBound, upperBound); } bool CMultimodalPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { return CMultimodalPriorUtils::probabilityOfLessLikelySamples( - *this, m_Modes, calculation, weightStyles, samples, weights, lowerBound, - upperBound, tail); + *this, m_Modes, calculation, samples, weights, lowerBound, upperBound, tail); } bool CMultimodalPrior::isNonInformative() const { @@ -602,6 +572,7 @@ std::size_t CMultimodalPrior::numberModes() const { } bool CMultimodalPrior::checkInvariants(const std::string& tag) const { + bool result = true; if (m_Modes.size() != m_Clusterer->numberClusters()) { @@ -652,6 +623,7 @@ CMultimodalPrior::CModeSplitCallback::CModeSplitCallback(CMultimodalPrior& prior void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex, std::size_t leftSplitIndex, std::size_t rightSplitIndex) const { + LOG_TRACE(<< "Splitting mode with index " << sourceIndex); TModeVec& modes = m_Prior->m_Modes; @@ -682,19 +654,17 @@ void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex, } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - double nl = pLeft * numberSamples; - double ns = std::min(nl, 4.0); + double wl = pLeft * numberSamples; + double ws = std::min(wl, 4.0); double n = static_cast(samples.size()); - LOG_TRACE(<< "# left = " << nl); + LOG_TRACE(<< "# left = " << wl); - double seedWeight = ns / n; - TDouble4Vec1Vec weights(samples.size(), TDouble4Vec{seedWeight}); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights); + TDoubleWeightsAry1Vec weights(samples.size(), maths_t::countWeight(ws / n)); + modes.back().s_Prior->addSamples(samples, weights); - double weight = (nl - ns) / n; - if (weight > 0.0) { - weights.assign(weights.size(), TDouble4Vec{weight}); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights); + if (wl > ws) { + weights.assign(weights.size(), maths_t::countWeight((wl - ws) / n)); + modes.back().s_Prior->addSamples(samples, weights); LOG_TRACE(<< modes.back().s_Prior->print()); } } @@ -708,19 +678,17 @@ void CMultimodalPrior::CModeSplitCallback::operator()(std::size_t sourceIndex, } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - double nr = pRight * numberSamples; - double ns = std::min(nr, 4.0); + double wr = pRight * numberSamples; + double ws = std::min(wr, 4.0); double n = static_cast(samples.size()); LOG_TRACE(<< "# right = " << nr); - double seedWeight = ns / n; - TDouble4Vec1Vec weights(samples.size(), TDouble4Vec{seedWeight}); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights); + TDoubleWeightsAry1Vec weights(samples.size(), maths_t::countWeight(ws / n)); + modes.back().s_Prior->addSamples(samples, weights); - double weight = (nr - ns) / n; - if (weight > 0.0) { - weights.assign(weights.size(), TDouble4Vec{weight}); - modes.back().s_Prior->addSamples(TWeights::COUNT, samples, weights); + if (wr > ws) { + weights.assign(weights.size(), maths_t::countWeight((wr - ws) / n)); + modes.back().s_Prior->addSamples(samples, weights); LOG_TRACE(<< modes.back().s_Prior->print()); } } @@ -742,6 +710,7 @@ CMultimodalPrior::CModeMergeCallback::CModeMergeCallback(CMultimodalPrior& prior void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex, std::size_t rightMergeIndex, std::size_t targetIndex) const { + LOG_TRACE(<< "Merging modes with indices " << leftMergeIndex << " " << rightMergeIndex); TModeVec& modes = m_Prior->m_Modes; @@ -751,7 +720,7 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex double wl = 0.0; double wr = 0.0; - double n = 0.0; + double w = 0.0; std::size_t nl = 0; std::size_t nr = 0; TDouble1Vec samples; @@ -760,7 +729,7 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex CSetTools::CIndexInSet(leftMergeIndex)); if (leftMode != modes.end()) { wl = leftMode->s_Prior->numberSamples(); - n += wl; + w += wl; TDouble1Vec leftSamples; leftMode->s_Prior->sampleMarginalLikelihood(MODE_MERGE_NUMBER_SAMPLES, leftSamples); nl = leftSamples.size(); @@ -773,7 +742,7 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex CSetTools::CIndexInSet(rightMergeIndex)); if (rightMode != modes.end()) { wr = rightMode->s_Prior->numberSamples(); - n += wr; + w += wr; TDouble1Vec rightSamples; rightMode->s_Prior->sampleMarginalLikelihood(MODE_MERGE_NUMBER_SAMPLES, rightSamples); nr = rightSamples.size(); @@ -782,7 +751,7 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex LOG_ERROR(<< "Couldn't find mode for " << rightMergeIndex); } - if (n > 0.0) { + if (w > 0.0) { double nl_ = static_cast(nl); double nr_ = static_cast(nr); double Z = (nl_ * wl + nr_ * wr) / (nl_ + nr_); @@ -791,24 +760,22 @@ void CMultimodalPrior::CModeMergeCallback::operator()(std::size_t leftMergeIndex } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - LOG_TRACE(<< "n = " << n << ", wl = " << wl << ", wr = " << wr); + LOG_TRACE(<< "w = " << w << ", wl = " << wl << ", wr = " << wr); - double ns = std::min(n, 4.0); - double s = static_cast(samples.size()); + double ws = std::min(w, 4.0); + double n = static_cast(samples.size()); - double seedWeight = ns / s; - TDouble4Vec1Vec weights; + TDoubleWeightsAry1Vec weights; weights.reserve(samples.size()); - weights.resize(nl, TDouble1Vec{wl * seedWeight}); - weights.resize(nl + nr, TDouble1Vec{wr * seedWeight}); - newMode.s_Prior->addSamples(TWeights::COUNT, samples, weights); - - double weight = (n - ns) / s; - if (weight > 0.0) { - for (std::size_t i = 0u; i < weights.size(); ++i) { - weights[i][0] *= weight / seedWeight; - } - newMode.s_Prior->addSamples(TWeights::COUNT, samples, weights); + weights.resize(nl, maths_t::countWeight(wl * ws / n)); + weights.resize(nl + nr, maths_t::countWeight(wr * ws / n)); + newMode.s_Prior->addSamples(samples, weights); + + if (w > ws) { + weights.clear(); + weights.resize(nl, maths_t::countWeight(wl * (w - ws) / n)); + weights.resize(nl + nr, maths_t::countWeight(wr * (w - ws) / n)); + newMode.s_Prior->addSamples(samples, weights); } // Remove the merged modes. diff --git a/lib/maths/CMultinomialConjugate.cc b/lib/maths/CMultinomialConjugate.cc index ff3a6f2b3b..1761c70163 100644 --- a/lib/maths/CMultinomialConjugate.cc +++ b/lib/maths/CMultinomialConjugate.cc @@ -360,9 +360,8 @@ bool CMultinomialConjugate::needsOffset() const { return false; } -double CMultinomialConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles*/, - const TDouble1Vec& /*samples*/, - const TDouble4Vec1Vec& /*weights*/) { +double CMultinomialConjugate::adjustOffset(const TDouble1Vec& /*samples*/, + const TDoubleWeightsAry1Vec& /*weights*/) { return 1.0; } @@ -370,13 +369,11 @@ double CMultinomialConjugate::offset() const { return 0.0; } -void CMultinomialConjugate::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CMultinomialConjugate::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -384,7 +381,7 @@ void CMultinomialConjugate::addSamples(const TWeightStyleVec& weightStyles, return; } - this->CPrior::addSamples(weightStyles, samples, weights); + this->CPrior::addSamples(samples, weights); // If x = {x(i)} denotes the sample vector, then x are multinomially // distributed with probabilities {p(i)}. Let n(i) denote the counts @@ -416,7 +413,7 @@ void CMultinomialConjugate::addSamples(const TWeightStyleVec& weightStyles, LOG_ERROR(<< "Discarding " << x); continue; } - double n = maths_t::countForUpdate(weightStyles, weights[i]); + double n = maths_t::countForUpdate(weights[i]); if (!CMathsFuncs::isFinite(n)) { LOG_ERROR(<< "Bad count weight " << n); continue; @@ -496,16 +493,18 @@ void CMultinomialConjugate::propagateForwardsByTime(double time) { } CMultinomialConjugate::TDoubleDoublePr CMultinomialConjugate::marginalLikelihoodSupport() const { + // Strictly speaking for a particular likelihood this is the // set of discrete values or categories, but we are interested // in the support for the possible discrete values which can // be any real numbers. - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } double CMultinomialConjugate::marginalLikelihoodMean() const { + if (this->isNonInformative()) { return 0.0; } @@ -524,19 +523,17 @@ double CMultinomialConjugate::marginalLikelihoodMean() const { return CBasicStatistics::mean(result); } -double CMultinomialConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CMultinomialConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& /*weights*/) const { + if (this->isNonInformative()) { return 0.0; } // This is just the category with the maximum concentration. - double modeConcentration = m_Concentrations[0]; std::size_t mode = 0u; for (std::size_t i = 1u; i < m_Concentrations.size(); ++i) { - if (m_Concentrations[i] > modeConcentration) { - modeConcentration = m_Concentrations[i]; + if (m_Concentrations[i] > m_Concentrations[mode]) { mode = i; } } @@ -544,8 +541,8 @@ double CMultinomialConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*we return m_Categories[mode]; } -double CMultinomialConjugate::marginalLikelihoodVariance(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CMultinomialConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& /*weights*/) const { + using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar::TAccumulator; if (this->isNonInformative()) { @@ -568,8 +565,8 @@ double CMultinomialConjugate::marginalLikelihoodVariance(const TWeightStyleVec& CMultinomialConjugate::TDoubleDoublePr CMultinomialConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { + const TDoubleWeightsAry& /*weights*/) const { + if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -606,13 +603,12 @@ CMultinomialConjugate::marginalLikelihoodConfidenceInterval(double percentage, LOG_TRACE(<< "quantiles = " << core::CContainerPrinter::print(quantiles)); LOG_TRACE(<< " " << core::CContainerPrinter::print(m_Categories)); - return std::make_pair(x1, x2); + return {x1, x2}; } maths_t::EFloatingPointErrorStatus -CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CMultinomialConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -620,14 +616,12 @@ CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightS LOG_ERROR(<< "Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); return maths_t::E_FpFailed; } - if (this->isNonInformative()) { // The non-informative likelihood is improper and effectively // zero everywhere. We use minus max double because @@ -666,7 +660,7 @@ CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightS double numberSamples = 0.0; for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); + double n = maths_t::countForUpdate(weights[i]); numberSamples += n; categoryCounts[samples[i]] += n; } @@ -716,6 +710,7 @@ CMultinomialConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightS void CMultinomialConjugate::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { + samples.clear(); if (numberSamples == 0 || this->isNonInformative()) { @@ -759,9 +754,8 @@ void CMultinomialConjugate::sampleMarginalLikelihood(std::size_t numberSamples, LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); } -bool CMultinomialConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CMultinomialConjugate::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { lowerBound = upperBound = 0.0; @@ -803,7 +797,7 @@ bool CMultinomialConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double sampleLowerBound; double sampleUpperBound; @@ -822,11 +816,11 @@ bool CMultinomialConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles return true; } -bool CMultinomialConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CMultinomialConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + // See minusLogJointCdf for the rationale behind this approximation. detail::CCdfComplement cdfComplement(m_Categories, m_Concentrations, m_TotalConcentration); @@ -835,7 +829,7 @@ bool CMultinomialConjugate::minusLogJointCdfComplement(const TWeightStyleVec& we for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double sampleLowerBound; double sampleUpperBound; @@ -855,9 +849,8 @@ bool CMultinomialConjugate::minusLogJointCdfComplement(const TWeightStyleVec& we } bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { @@ -898,7 +891,7 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability detail::CCdf cdf(m_Categories, m_Concentrations, m_TotalConcentration); for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double sampleLowerBound, sampleUpperBound; cdf(x, sampleLowerBound, sampleUpperBound); jointLowerBound.add(sampleLowerBound, n); @@ -1130,7 +1123,7 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability // Count the occurrences of each category in the sample set. for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); categoryCounts[x] += n; } @@ -1179,7 +1172,7 @@ bool CMultinomialConjugate::probabilityOfLessLikelySamples(maths_t::EProbability detail::CCdfComplement cdfComplement(m_Categories, m_Concentrations, m_TotalConcentration); for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double sampleLowerBound, sampleUpperBound; cdfComplement(x, sampleLowerBound, sampleUpperBound); jointLowerBound.add(sampleLowerBound, n); @@ -1512,7 +1505,7 @@ void CMultinomialConjugate::probabilitiesOfLessLikelyCategories(maths_t::EProbab CMultinomialConjugate::TDoubleDoublePrVec CMultinomialConjugate::confidenceIntervalProbabilities(double percentage) const { if (this->isNonInformative()) { - return TDoubleDoublePrVec(m_Concentrations.size(), std::make_pair(0.0, 1.0)); + return TDoubleDoublePrVec(m_Concentrations.size(), {0.0, 1.0}); } // The marginal distribution over each probability is beta. diff --git a/lib/maths/CMultivariateConstantPrior.cc b/lib/maths/CMultivariateConstantPrior.cc index 1f1e04cb56..ce29f91385 100644 --- a/lib/maths/CMultivariateConstantPrior.cc +++ b/lib/maths/CMultivariateConstantPrior.cc @@ -106,14 +106,12 @@ void CMultivariateConstantPrior::setToNonInformative(double /*offset*/, double / m_Constant.reset(); } -void CMultivariateConstantPrior::adjustOffset(const TWeightStyleVec& /*weightStyle*/, - const TDouble10Vec1Vec& /*samples*/, - const TDouble10Vec4Vec1Vec& /*weights*/) { +void CMultivariateConstantPrior::adjustOffset(const TDouble10Vec1Vec& /*samples*/, + const TDouble10VecWeightsAry1Vec& /*weights*/) { } -void CMultivariateConstantPrior::addSamples(const TWeightStyleVec& /*weightStyle*/, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& /*weights*/) { +void CMultivariateConstantPrior::addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& /*weights*/) { if (m_Constant || samples.empty()) { return; } @@ -195,8 +193,7 @@ CMultivariateConstantPrior::marginalLikelihoodMean() const { } CMultivariateConstantPrior::TDouble10Vec -CMultivariateConstantPrior::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble10Vec4Vec& /*weights*/) const { +CMultivariateConstantPrior::marginalLikelihoodMode(const TDouble10VecWeightsAry& /*weights*/) const { return this->marginalLikelihoodMean(); } @@ -219,9 +216,8 @@ CMultivariateConstantPrior::marginalLikelihoodVariances() const { } maths_t::EFloatingPointErrorStatus -CMultivariateConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, +CMultivariateConstantPrior::jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -263,8 +259,7 @@ CMultivariateConstantPrior::jointLogMarginalLikelihood(const TWeightStyleVec& we return maths_t::E_FpOverflowed; } - numberSamples += this->smallest( - maths_t::countForUpdate(m_Dimension, weightStyles, weights[i])); + numberSamples += this->smallest(maths_t::countForUpdate(weights[i])); } result = numberSamples * core::constants::LOG_MAX_DOUBLE; diff --git a/lib/maths/CMultivariateMultimodalPrior.cc b/lib/maths/CMultivariateMultimodalPrior.cc index 4b4db72c2e..269c031063 100644 --- a/lib/maths/CMultivariateMultimodalPrior.cc +++ b/lib/maths/CMultivariateMultimodalPrior.cc @@ -25,8 +25,8 @@ namespace maths { namespace multivariate_multimodal_prior_detail { using TDoubleVec = std::vector; -using TDouble10Vec = CMultivariatePrior::TDouble10Vec; -using TDouble10Vec4Vec = CMultivariatePrior::TDouble10Vec4Vec; +using TDouble10Vec1Vec = CMultivariatePrior::TDouble10Vec1Vec; +using TDouble10VecWeightsAry1Vec = CMultivariatePrior::TDouble10VecWeightsAry1Vec; namespace { @@ -47,9 +47,8 @@ std::string printIndices(const TModeVec& modes) { maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const TModeVec& modes, - const maths_t::TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& sample, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, TSizeDoublePr3Vec& modeLogLikelihoods, double& result) { try { @@ -60,14 +59,14 @@ jointLogMarginalLikelihood(const TModeVec& modes, for (std::size_t i = 0u; i < modes.size(); ++i) { double modeLogLikelihood; - maths_t::EFloatingPointErrorStatus status = modes[i].s_Prior->jointLogMarginalLikelihood( - weightStyles, sample, weights, modeLogLikelihood); + maths_t::EFloatingPointErrorStatus status = + modes[i].s_Prior->jointLogMarginalLikelihood(sample, weights, modeLogLikelihood); if (status & maths_t::E_FpFailed) { // Logging handled at a lower level. return status; } if (!(status & maths_t::E_FpOverflowed)) { - modeLogLikelihoods.push_back({i, modeLogLikelihood}); + modeLogLikelihoods.emplace_back(i, modeLogLikelihood); maxLogLikelihood = std::max(maxLogLikelihood, modeLogLikelihood); } } @@ -160,9 +159,10 @@ void sampleMarginalLikelihood(const TModeVec& modes, } void print(const TModeVec& modes, const std::string& separator, std::string& result) { - double Z = std::accumulate( - modes.begin(), modes.end(), 0.0, - [](double sum, const TMode& mode) { return sum + mode.weight(); }); + auto addWeight = [](double sum, const TMode& mode) { + return sum + mode.weight(); + }; + double Z = std::accumulate(modes.begin(), modes.end(), 0.0, addWeight); std::string separator_ = separator + separator; @@ -191,7 +191,7 @@ void modeMergeCallback(std::size_t dimension, double wl = 0.0; double wr = 0.0; - double n = 0.0; + double w = 0.0; std::size_t nl = 0; std::size_t nr = 0; TDouble10Vec1Vec samples; @@ -200,7 +200,7 @@ void modeMergeCallback(std::size_t dimension, CSetTools::CIndexInSet(leftMergeIndex)); if (leftMode != modes.end()) { wl = leftMode->s_Prior->numberSamples(); - n += wl; + w += wl; TDouble10Vec1Vec leftSamples; leftMode->s_Prior->sampleMarginalLikelihood(numberSamples, leftSamples); nl = leftSamples.size(); @@ -215,7 +215,7 @@ void modeMergeCallback(std::size_t dimension, CSetTools::CIndexInSet(rightMergeIndex)); if (rightMode != modes.end()) { wr = rightMode->s_Prior->numberSamples(); - n += wr; + w += wr; TDouble10Vec1Vec rightSamples; rightMode->s_Prior->sampleMarginalLikelihood(numberSamples, rightSamples); nr = rightSamples.size(); @@ -226,7 +226,7 @@ void modeMergeCallback(std::size_t dimension, << ", merged index = " << targetIndex); } - if (n > 0.0) { + if (w > 0.0) { double nl_ = static_cast(nl); double nr_ = static_cast(nr); double Z = (nl_ * wl + nr_ * wr) / (nl_ + nr_); @@ -235,37 +235,30 @@ void modeMergeCallback(std::size_t dimension, } LOG_TRACE(<< "samples = " << core::CContainerPrinter::print(samples)); - LOG_TRACE(<< "n = " << n << ", wl = " << wl << ", wr = " << wr); + LOG_TRACE(<< "w = " << w << ", wl = " << wl << ", wr = " << wr); - double ns = std::min(n, 4.0); - double s = static_cast(samples.size()); + double ws = std::min(w, 4.0); + double n = static_cast(samples.size()); - TDouble10Vec leftSeedWeight(dimension, wl * ns / s); - TDouble10Vec rightSeedWeight(dimension, wl * ns / s); - TDouble10Vec4Vec1Vec weights; + TDouble10VecWeightsAry1Vec weights; weights.reserve(samples.size()); - weights.resize(nl, TDouble10Vec1Vec(1, leftSeedWeight)); - weights.resize(nl + nr, TDouble10Vec1Vec(1, rightSeedWeight)); - newMode.s_Prior->addSamples(CConstantWeights::COUNT, samples, weights); - - double weight = (n - ns) / s; - if (weight > 0.0) { - for (std::size_t i = 0u; i < dimension; ++i) { - leftSeedWeight[i] = wl * weight; - rightSeedWeight[i] = wr * weight; - } + weights.resize(nl, maths_t::countWeight(wl * ws / n, dimension)); + weights.resize(nl + nr, maths_t::countWeight(wr * ws / n, dimension)); + newMode.s_Prior->addSamples(samples, weights); + + if (w > ws) { weights.clear(); - weights.resize(nl, TDouble10Vec1Vec(1, leftSeedWeight)); - weights.resize(nl + nr, TDouble10Vec1Vec(1, rightSeedWeight)); - newMode.s_Prior->addSamples(CConstantWeights::COUNT, samples, weights); + weights.resize(nl, maths_t::countWeight(wl * (w - ws) / n, dimension)); + weights.resize(nl + nr, maths_t::countWeight(wr * (w - ws) / n, dimension)); + newMode.s_Prior->addSamples(samples, weights); } // Remove the merged modes. TSizeSet mergedIndices; mergedIndices.insert(leftMergeIndex); mergedIndices.insert(rightMergeIndex); - modes.erase(std::remove_if(modes.begin(), modes.end(), CSetTools::CIndexInSet(mergedIndices)), - modes.end()); + auto isMergeIndex = CSetTools::CIndexInSet(mergedIndices); + modes.erase(std::remove_if(modes.begin(), modes.end(), isMergeIndex), modes.end()); // Add the new mode. LOG_TRACE(<< "Creating mode with index " << targetIndex); diff --git a/lib/maths/CMultivariateOneOfNPrior.cc b/lib/maths/CMultivariateOneOfNPrior.cc index c368ec2ec5..79a4ddd708 100644 --- a/lib/maths/CMultivariateOneOfNPrior.cc +++ b/lib/maths/CMultivariateOneOfNPrior.cc @@ -50,7 +50,7 @@ using TDouble10Vec = CMultivariateOneOfNPrior::TDouble10Vec; using TDouble10VecDouble10VecPr = CMultivariateOneOfNPrior::TDouble10VecDouble10VecPr; using TDouble10Vec1Vec = CMultivariateOneOfNPrior::TDouble10Vec1Vec; using TDouble10Vec10Vec = CMultivariateOneOfNPrior::TDouble10Vec10Vec; -using TDouble10Vec4Vec1Vec = CMultivariateOneOfNPrior::TDouble10Vec4Vec1Vec; +using TDouble10VecWeightsAry1Vec = CMultivariateOneOfNPrior::TDouble10VecWeightsAry1Vec; using TPriorPtr = CMultivariateOneOfNPrior::TPriorPtr; using TWeightPriorPtrPr = CMultivariateOneOfNPrior::TWeightPriorPtrPr; using TWeightPriorPtrPrVec = CMultivariateOneOfNPrior::TWeightPriorPtrPrVec; @@ -296,17 +296,15 @@ void CMultivariateOneOfNPrior::setToNonInformative(double offset, double decayRa this->numberSamples(0.0); } -void CMultivariateOneOfNPrior::adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) { +void CMultivariateOneOfNPrior::adjustOffset(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) { for (auto& model : m_Models) { - model.second->adjustOffset(weightStyles, samples, weights); + model.second->adjustOffset(samples, weights); } } -void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) { +void CMultivariateOneOfNPrior::addSamples(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights) { if (samples.empty()) { return; } @@ -314,10 +312,10 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, return; } - this->adjustOffset(weightStyles, samples, weights); + this->adjustOffset(samples, weights); double penalty = CTools::fastLog(this->numberSamples()); - this->CMultivariatePrior::addSamples(weightStyles, samples, weights); + this->CMultivariatePrior::addSamples(samples, weights); penalty = (penalty - CTools::fastLog(this->numberSamples())) / 2.0; // See COneOfNPrior::addSamples for a discussion. @@ -339,8 +337,7 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, // Update the weights with the marginal likelihoods. double logLikelihood = 0.0; maths_t::EFloatingPointErrorStatus status = - use ? model.second->jointLogMarginalLikelihood(weightStyles, samples, - weights, logLikelihood) + use ? model.second->jointLogMarginalLikelihood(samples, weights, logLikelihood) : maths_t::E_FpOverflowed; if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Failed to compute log-likelihood"); @@ -355,7 +352,7 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, } // Update the component prior distribution. - model.second->addSamples(weightStyles, samples, weights); + model.second->addSamples(samples, weights); used.push_back(use); uses.push_back(model.second->participatesInModelSelection()); @@ -363,13 +360,8 @@ void CMultivariateOneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, } TDouble10Vec n(m_Dimension, 0.0); - try { - for (const auto& weight : weights) { - add(maths_t::count(m_Dimension, weightStyles, weight), n); - } - } catch (std::exception& e) { - LOG_ERROR(<< "Failed to add samples: " << e.what()); - return; + for (const auto& weight : weights) { + add(maths_t::count(weight), n); } if (!isNonInformative && maxLogLikelihood.count() > 0) { @@ -455,9 +447,8 @@ CMultivariateOneOfNPrior::univariate(const TSize10Vec& marginalize, models[i].first *= std::exp(weights[i] - maxWeight[0]) / Z; } - return std::make_pair(TUnivariatePriorPtr(new COneOfNPrior( - models, this->dataType(), this->decayRate())), - maxWeight.count() > 0 ? maxWeight[0] : 0.0); + return {TUnivariatePriorPtr(new COneOfNPrior(models, this->dataType(), this->decayRate())), + maxWeight.count() > 0 ? maxWeight[0] : 0.0}; } CMultivariateOneOfNPrior::TPriorPtrDoublePr @@ -489,12 +480,13 @@ CMultivariateOneOfNPrior::bivariate(const TSize10Vec& marginalize, models[i].first *= std::exp(weights[i] - maxWeight[0]) / Z; } - return std::make_pair(TPriorPtr(new CMultivariateOneOfNPrior( - 2, models, this->dataType(), this->decayRate())), - maxWeight.count() > 0 ? maxWeight[0] : 0.0); + return {TPriorPtr(new CMultivariateOneOfNPrior(2, models, this->dataType(), + this->decayRate())), + maxWeight.count() > 0 ? maxWeight[0] : 0.0}; } TDouble10VecDouble10VecPr CMultivariateOneOfNPrior::marginalLikelihoodSupport() const { + // We define this is as the intersection of the component model // supports. @@ -513,6 +505,7 @@ TDouble10VecDouble10VecPr CMultivariateOneOfNPrior::marginalLikelihoodSupport() } TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMean() const { + // This is E_{P(i)}[ E[X | P(i)] ] and the conditional expectation // is just the individual model expectation. Note we exclude models // with low weight because typically the means are similar between @@ -532,6 +525,7 @@ TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodMean() const { TDouble10Vec CMultivariateOneOfNPrior::nearestMarginalLikelihoodMean(const TDouble10Vec& value) const { + // See marginalLikelihoodMean for discussion. TDouble10Vec result(m_Dimension, 0.0); @@ -546,6 +540,7 @@ CMultivariateOneOfNPrior::nearestMarginalLikelihoodMean(const TDouble10Vec& valu } TDouble10Vec10Vec CMultivariateOneOfNPrior::marginalLikelihoodCovariance() const { + TDouble10Vec10Vec result(m_Dimension, TDouble10Vec(m_Dimension, 0.0)); if (this->isNonInformative()) { for (std::size_t i = 0u; i < m_Dimension; ++i) { @@ -571,6 +566,7 @@ TDouble10Vec10Vec CMultivariateOneOfNPrior::marginalLikelihoodCovariance() const } TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodVariances() const { + if (this->isNonInformative()) { return TDouble10Vec(m_Dimension, INF); } @@ -587,24 +583,23 @@ TDouble10Vec CMultivariateOneOfNPrior::marginalLikelihoodVariances() const { } TDouble10Vec -CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const { +CMultivariateOneOfNPrior::marginalLikelihoodMode(const TDouble10VecWeightsAry& weights) const { + // We approximate this as the weighted average of the component // model modes. // Declared outside the loop to minimize the number of times // it is created. TDouble10Vec1Vec sample(1); - TDouble10Vec4Vec1Vec sampleWeights(1, weights); + TDouble10VecWeightsAry1Vec sampleWeights(1, weights); TDouble10Vec result(m_Dimension, 0.0); double w = 0.0; for (const auto& model : m_Models) { if (model.second->participatesInModelSelection()) { - sample[0] = model.second->marginalLikelihoodMode(weightStyles, weights); + sample[0] = model.second->marginalLikelihoodMode(weights); double logLikelihood; - model.second->jointLogMarginalLikelihood(weightStyles, sample, - sampleWeights, logLikelihood); + model.second->jointLogMarginalLikelihood(sample, sampleWeights, logLikelihood); updateMean(sample[0], model.first * std::exp(logLikelihood), result, w); } } @@ -614,9 +609,8 @@ CMultivariateOneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightSt } maths_t::EFloatingPointErrorStatus -CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, +CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TDouble10Vec1Vec& samples, + const TDouble10VecWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -639,8 +633,8 @@ CMultivariateOneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weig for (const auto& model : m_Models) { if (model.second->participatesInModelSelection()) { double logLikelihood; - maths_t::EFloatingPointErrorStatus status = model.second->jointLogMarginalLikelihood( - weightStyles, samples, weights, logLikelihood); + maths_t::EFloatingPointErrorStatus status = + model.second->jointLogMarginalLikelihood(samples, weights, logLikelihood); if (status & maths_t::E_FpFailed) { return status; } @@ -719,7 +713,7 @@ void CMultivariateOneOfNPrior::sampleMarginalLikelihood(std::size_t numberSample for (std::size_t i = 0u; i < m_Models.size(); ++i) { modelSamples.clear(); m_Models[i].second->sampleMarginalLikelihood(sampling[i], modelSamples); - for (auto sample : modelSamples) { + for (const auto& sample : modelSamples) { samples.push_back(CTools::truncate(sample, support.first, support.second)); } } diff --git a/lib/maths/CMultivariatePrior.cc b/lib/maths/CMultivariatePrior.cc index fabf45946b..15677a584d 100644 --- a/lib/maths/CMultivariatePrior.cc +++ b/lib/maths/CMultivariatePrior.cc @@ -92,20 +92,15 @@ void CMultivariatePrior::decayRate(double value) { setDecayRate(value, FALLBACK_DECAY_RATE, m_DecayRate); } -void CMultivariatePrior::addSamples(const TWeightStyleVec& weightStyles, - const TDouble10Vec1Vec& /*samples*/, - const TDouble10Vec4Vec1Vec& weights) { +void CMultivariatePrior::addSamples(const TDouble10Vec1Vec& /*samples*/, + const TDouble10VecWeightsAry1Vec& weights) { std::size_t d = this->dimension(); TDouble10Vec n(d, 0.0); - try { - for (std::size_t i = 0u; i < weights.size(); ++i) { - TDouble10Vec wi = maths_t::countForUpdate(d, weightStyles, weights[i]); - for (std::size_t j = 0u; j < d; ++j) { - n[j] += wi[j]; - } + for (const auto& weight : weights) { + TDouble10Vec n_ = maths_t::countForUpdate(weight); + for (std::size_t i = 0u; i < d; ++i) { + n[i] += n_[i]; } - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to extract sample counts: " << e.what()); } this->addSamples(smallest(n)); } @@ -116,15 +111,13 @@ CMultivariatePrior::nearestMarginalLikelihoodMean(const TDouble10Vec& /*value*/) } CMultivariatePrior::TDouble10Vec1Vec -CMultivariatePrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) const { - return TDouble10Vec1Vec{this->marginalLikelihoodMode(weightStyles, weights)}; +CMultivariatePrior::marginalLikelihoodModes(const TDouble10VecWeightsAry& weights) const { + return TDouble10Vec1Vec{this->marginalLikelihoodMode(weights)}; } bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, const TSize10Vec& coordinates, TDouble10Vec2Vec& lowerBounds, TDouble10Vec2Vec& upperBounds, @@ -149,18 +142,17 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal } using TDouble1Vec = core::CSmallVector; - using TDouble4Vec = core::CSmallVector; - using TDouble4Vec1Vec = core::CSmallVector; + using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TJointProbabilityOfLessLikelySamplesVec = core::CSmallVector; static const TSize10Vec NO_MARGINS; static const TSizeDoublePr10Vec NO_CONDITIONS; - TJointProbabilityOfLessLikelySamplesVec lowerBounds_[2] = { + TJointProbabilityOfLessLikelySamplesVec lowerBounds_[2]{ TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), TJointProbabilityOfLessLikelySamplesVec(coordinates.size())}; - TJointProbabilityOfLessLikelySamplesVec upperBounds_[2] = { + TJointProbabilityOfLessLikelySamplesVec upperBounds_[2]{ TJointProbabilityOfLessLikelySamplesVec(coordinates.size()), TJointProbabilityOfLessLikelySamplesVec(coordinates.size())}; @@ -168,7 +160,7 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal TSize10Vec marginalize(d - 1); TSizeDoublePr10Vec condition(d - 1); TDouble1Vec sc(1); - TDouble4Vec1Vec wc{TDouble4Vec(weightStyles.size())}; + TDoubleWeightsAry1Vec wc(1); for (std::size_t i = 0; i < coordinates.size(); ++i) { std::size_t coordinate = coordinates[i]; @@ -196,8 +188,8 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal double lb[2], ub[2]; maths_t::ETail tc[2]; - if (!margin->probabilityOfLessLikelySamples( - calculation, weightStyles, sc, wc, lb[0], ub[0], tc[0])) { + if (!margin->probabilityOfLessLikelySamples(calculation, sc, wc, + lb[0], ub[0], tc[0])) { LOG_ERROR(<< "Failed to compute probability for coordinate " << coordinate); return false; } @@ -206,8 +198,8 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal TUnivariatePriorPtr conditional( this->univariate(NO_MARGINS, condition).first); - if (!conditional->probabilityOfLessLikelySamples( - calculation, weightStyles, sc, wc, lb[1], ub[1], tc[1])) { + if (!conditional->probabilityOfLessLikelySamples(calculation, sc, wc, + lb[1], ub[1], tc[1])) { LOG_ERROR(<< "Failed to compute probability for coordinate " << coordinate); return false; } @@ -236,9 +228,8 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal } bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights, + const TDouble10VecWeightsAry1Vec& weights, double& lowerBound, double& upperBound, TTail10Vec& tail) const { @@ -256,13 +247,13 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal std::iota(coordinates.begin(), coordinates.end(), 0); TDouble10Vec1Vec sample(1); - TDouble10Vec4Vec1Vec weight(1); + TDouble10VecWeightsAry1Vec weight(1); TDouble10Vec2Vec lbs; TDouble10Vec2Vec ubs; for (std::size_t i = 0u; i < samples.size(); ++i) { sample[0] = samples[i]; weight[0] = weights[i]; - if (!this->probabilityOfLessLikelySamples(calculation, weightStyles, sample, weight, + if (!this->probabilityOfLessLikelySamples(calculation, sample, weight, coordinates, lbs, ubs, tail)) { return false; } @@ -290,6 +281,7 @@ bool CMultivariatePrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCal std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, std::size_t y) const { + // We'll plot the marginal likelihood function over a range where // most of the mass is, i.e. the 99% confidence interval. @@ -351,8 +343,8 @@ std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, yabscissa << "];" << core_t::LINE_ENDING; likelihood << "likelihood = ["; - TDouble10Vec1Vec sample(1, TDouble10Vec(2)); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); + TDouble10Vec1Vec sample{TDouble10Vec(2)}; + TDouble10VecWeightsAry1Vec weight(TWeights::singleUnit(2)); x_ = xRange.first; for (std::size_t i = 0u; i < POINTS; ++i, x_ += dx) { y_ = yRange.first; @@ -360,7 +352,7 @@ std::string CMultivariatePrior::printMarginalLikelihoodFunction(std::size_t x, sample[0][0] = x_; sample[0][1] = y_; double l; - xyMargin->jointLogMarginalLikelihood(CConstantWeights::COUNT, sample, weight, l); + xyMargin->jointLogMarginalLikelihood(sample, weight, l); likelihood << std::exp(l) << " "; } likelihood << core_t::LINE_ENDING; @@ -412,7 +404,7 @@ void CMultivariatePrior::addSamples(double n) { } bool CMultivariatePrior::check(const TDouble10Vec1Vec& samples, - const TDouble10Vec4Vec1Vec& weights) const { + const TDouble10VecWeightsAry1Vec& weights) const { if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << samples << "' and weights '" << weights << "'"); @@ -435,16 +427,14 @@ bool CMultivariatePrior::check(const TDouble10Vec1Vec& samples, bool CMultivariatePrior::check(const TSize10Vec& marginalize, const TSizeDoublePr10Vec& condition) const { - static const auto FIRST = [](const TSizeDoublePr& pair) { - return pair.first; - }; + const auto first = [](const TSizeDoublePr& pair) { return pair.first; }; std::size_t d = this->dimension(); if ((marginalize.size() > 0 && marginalize.back() >= d) || (condition.size() > 0 && condition.back().first >= d) || CSetTools::setIntersectSize( marginalize.begin(), marginalize.end(), - boost::make_transform_iterator(condition.begin(), FIRST), - boost::make_transform_iterator(condition.end(), FIRST)) != 0) { + boost::make_transform_iterator(condition.begin(), first), + boost::make_transform_iterator(condition.end(), first)) != 0) { LOG_ERROR(<< "Invalid variables for computing univariate distribution: " << "marginalize '" << marginalize << "'" << ", condition '" << condition << "'"); diff --git a/lib/maths/CNormalMeanPrecConjugate.cc b/lib/maths/CNormalMeanPrecConjugate.cc index 9865e17286..568f2a19e2 100644 --- a/lib/maths/CNormalMeanPrecConjugate.cc +++ b/lib/maths/CNormalMeanPrecConjugate.cc @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -57,10 +58,8 @@ const double MINIMUM_GAUSSIAN_SHAPE = 100.0; namespace detail { -using TWeightStyleVec = maths_t::TWeightStyleVec; using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TDoubleDoublePr = std::pair; using TDoubleDoublePrVec = std::vector; @@ -75,9 +74,6 @@ struct SPlusWeight { //! (integrating over the prior for the normal mean and precision) and //! aggregate the results using \p aggregate. //! -//! \param weightStyles Controls the interpretation of the weights that -//! are associated with each sample. See maths_t::ESampleWeightStyle for -//! more details. //! \param samples The weighted samples. //! \param weights The weights of each sample in \p samples. //! \param func The function to evaluate. @@ -92,9 +88,8 @@ struct SPlusWeight { //! \param precision The precision of the conditional mean prior. //! \param result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool evaluateFunctionOnJointDistribution(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, FUNC func, AGGREGATOR aggregate, bool isNonInformative, @@ -132,7 +127,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // of this distribution.) for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i]; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); if (!CMathsFuncs::isFinite(n)) { LOG_ERROR(<< "Bad count weight " << n); return false; @@ -147,11 +142,10 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // and the error function is significantly cheaper to compute. for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); - double seasonalScale = std::sqrt( - maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double seasonalScale = + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); + double countVarianceScale = maths_t::countVarianceScale(weights[i]); double x = seasonalScale != 1.0 ? predictionMean + (samples[i] - predictionMean) / seasonalScale @@ -163,7 +157,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, double deviation = std::sqrt((scaledPrecision + 1.0) / scaledPrecision * scaledRate / shape); - boost::math::normal_distribution<> normal(mean, deviation); + boost::math::normal normal(mean, deviation); result = aggregate(result, func(normal, x + offset), n); } } else { @@ -174,14 +168,13 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // // and using the student's t distribution with 2*a degrees of freedom. - boost::math::students_t_distribution<> students(2.0 * shape); + boost::math::students_t students(2.0 * shape); for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); - double seasonalScale = std::sqrt( - maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); + double seasonalScale = + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); + double countVarianceScale = maths_t::countVarianceScale(weights[i]); double x = seasonalScale != 1.0 ? predictionMean + (samples[i] - predictionMean) / seasonalScale @@ -216,29 +209,27 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, template class CEvaluateOnSamples : core::CNonCopyable { public: - CEvaluateOnSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CEvaluateOnSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double mean, double precision, double shape, double rate, double predictionMean) - : m_WeightStyles(weightStyles), m_Samples(samples), m_Weights(weights), + : m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), m_Mean(mean), m_Precision(precision), m_Shape(shape), m_Rate(rate), m_PredictionMean(predictionMean) {} bool operator()(double x, double& result) const { return evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, - x, m_Shape, m_Rate, m_Mean, m_Precision, m_PredictionMean, result); + m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative, x, + m_Shape, m_Rate, m_Mean, m_Precision, m_PredictionMean, result); } private: - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Mean; double m_Precision; @@ -255,26 +246,26 @@ class CEvaluateOnSamples : core::CNonCopyable { class CProbabilityOfLessLikelySamples : core::CNonCopyable { public: CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, bool isNonInformative, double mean, double precision, double shape, double rate, double predictionMean) - : m_Calculation(calculation), m_WeightStyles(weightStyles), - m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative), - m_Mean(mean), m_Precision(precision), m_Shape(shape), m_Rate(rate), + : m_Calculation(calculation), m_Samples(samples), m_Weights(weights), + m_IsNonInformative(isNonInformative), m_Mean(mean), + m_Precision(precision), m_Shape(shape), m_Rate(rate), m_PredictionMean(predictionMean), m_Tail(0) {} bool operator()(double x, double& result) const { + CJointProbabilityOfLessLikelySamples probability; maths_t::ETail tail = maths_t::E_UndeterminedTail; if (!evaluateFunctionOnJointDistribution( - m_WeightStyles, m_Samples, m_Weights, + m_Samples, m_Weights, boost::bind(CTools::CProbabilityOfLessLikelySample(m_Calculation), _1, _2, boost::ref(tail)), CJointProbabilityOfLessLikelySamples::SAddProbability(), m_IsNonInformative, @@ -293,9 +284,8 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { private: maths_t::EProbabilityCalculation m_Calculation; - const TWeightStyleVec& m_WeightStyles; const TDouble1Vec& m_Samples; - const TDouble4Vec1Vec& m_Weights; + const TDoubleWeightsAry1Vec& m_Weights; bool m_IsNonInformative; double m_Mean; double m_Precision; @@ -325,9 +315,8 @@ class CProbabilityOfLessLikelySamples : core::CNonCopyable { //! a and b are the prior Gamma shape and rate, respectively. class CLogMarginalLikelihood : core::CNonCopyable { public: - CLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + CLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double mean, double precision, double shape, @@ -337,11 +326,12 @@ class CLogMarginalLikelihood : core::CNonCopyable { m_NumberSamples(0.0), m_WeightedNumberSamples(0.0), m_SampleMean(0.0), m_SampleSquareDeviation(0.0), m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) { - this->precompute(weightStyles, samples, weights, predictionMean); + this->precompute(samples, weights, predictionMean); } //! Evaluate the log marginal likelihood at the offset \p x. bool operator()(double x, double& result) const { + if (m_ErrorStatus & maths_t::E_FpFailed) { return false; } @@ -368,9 +358,8 @@ class CLogMarginalLikelihood : core::CNonCopyable { private: //! Compute all the constants in the integrand. - void precompute(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + void precompute(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double predictionMean) { m_NumberSamples = 0.0; TMeanVarAccumulator sampleMoments; @@ -378,11 +367,10 @@ class CLogMarginalLikelihood : core::CNonCopyable { try { for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double seasonalScale = std::sqrt( - maths_t::seasonalVarianceScale(weightStyles, weights[i])); - double countVarianceScale = - maths_t::countVarianceScale(weightStyles, weights[i]); + double n = maths_t::countForUpdate(weights[i]); + double seasonalScale = + std::sqrt(maths_t::seasonalVarianceScale(weights[i])); + double countVarianceScale = maths_t::countVarianceScale(weights[i]); double w = 1.0 / countVarianceScale; m_NumberSamples += n; if (seasonalScale != 1.0) { @@ -551,9 +539,8 @@ void CNormalMeanPrecConjugate::setToNonInformative(double /*offset*/, double dec *this = nonInformativePrior(this->dataType(), decayRate); } -double CNormalMeanPrecConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles*/, - const TDouble1Vec& /*samples*/, - const TDouble4Vec1Vec& /*weights*/) { +double CNormalMeanPrecConjugate::adjustOffset(const TDouble1Vec& /*samples*/, + const TDoubleWeightsAry1Vec& /*weights*/) { return 0.0; } @@ -561,13 +548,11 @@ double CNormalMeanPrecConjugate::offset() const { return 0.0; } -void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CNormalMeanPrecConjugate::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -575,7 +560,7 @@ void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, return; } - this->CPrior::addSamples(weightStyles, samples, weights); + this->CPrior::addSamples(samples, weights); // If {x(i)} denotes the sample vector, the likelihood function is: // likelihood(x | p', m') ~ @@ -632,18 +617,12 @@ void CNormalMeanPrecConjugate::addSamples(const TWeightStyleVec& weightStyles, double numberSamples = 0.0; TMeanVarAccumulator sampleMoments; - try { - for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double varianceScale = - maths_t::seasonalVarianceScale(weightStyles, weights[i]) * - maths_t::countVarianceScale(weightStyles, weights[i]); - numberSamples += n; - sampleMoments.add(samples[i], n / varianceScale); - } - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to update likelihood: " << e.what()); - return; + for (std::size_t i = 0u; i < samples.size(); ++i) { + double n = maths_t::countForUpdate(weights[i]); + double varianceScale = maths_t::seasonalVarianceScale(weights[i]) * + maths_t::countVarianceScale(weights[i]); + numberSamples += n; + sampleMoments.add(samples[i], n / varianceScale); } double scaledNumberSamples = CBasicStatistics::count(sampleMoments); double sampleMean = CBasicStatistics::mean(sampleMoments); @@ -734,21 +713,20 @@ void CNormalMeanPrecConjugate::propagateForwardsByTime(double time) { CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::marginalLikelihoodSupport() const { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } double CNormalMeanPrecConjugate::marginalLikelihoodMean() const { return this->isInteger() ? this->mean() - 0.5 : this->mean(); } -double CNormalMeanPrecConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CNormalMeanPrecConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& /*weights*/) const { return this->marginalLikelihoodMean(); } -double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative() || m_GammaShape <= 1.0) { return boost::numeric::bounds::highest(); } @@ -762,13 +740,8 @@ double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVe // and use the fact that X conditioned on M and P is a normal. The // first term evaluates to 1 / P and the second term 1 / p / t whence... - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); double a = m_GammaShape; double b = m_GammaRate; double t = m_GaussianPrecision; @@ -777,8 +750,7 @@ double CNormalMeanPrecConjugate::marginalLikelihoodVariance(const TWeightStyleVe CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { + const TDoubleWeightsAry& weights) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -789,9 +761,8 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage // We use the fact that the marginal likelihood is a t-distribution. try { - double seasonalScale = - std::sqrt(maths_t::seasonalVarianceScale(weightStyles, weights)); - double countVarianceScale = maths_t::countVarianceScale(weightStyles, weights); + double seasonalScale = std::sqrt(maths_t::seasonalVarianceScale(weights)); + double countVarianceScale = maths_t::countVarianceScale(weights); double scaledPrecision = countVarianceScale * m_GaussianPrecision; double scaledRate = countVarianceScale * m_GammaRate; @@ -800,7 +771,7 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage double m = this->marginalLikelihoodMean(); if (m_GammaShape > MINIMUM_GAUSSIAN_SHAPE) { - boost::math::normal_distribution<> normal(m_GaussianMean, scale); + boost::math::normal normal(m_GaussianMean, scale); double x1 = boost::math::quantile(normal, (1.0 - percentage) / 2.0) - (this->isInteger() ? 0.5 : 0.0); x1 = seasonalScale != 1.0 ? m + seasonalScale * (x1 - m) : x1; @@ -810,9 +781,9 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage : x1; x2 = seasonalScale != 1.0 ? m + seasonalScale * (x2 - m) : x2; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2 << ", scale = " << scale); - return std::make_pair(x1, x2); + return {x1, x2}; } - boost::math::students_t_distribution<> students(2.0 * m_GammaShape); + boost::math::students_t students(2.0 * m_GammaShape); double x1 = m_GaussianMean + scale * boost::math::quantile(students, (1.0 - percentage) / 2.0) - (this->isInteger() ? 0.5 : 0.0); @@ -825,7 +796,7 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage : x1; x2 = seasonalScale != 1.0 ? m + seasonalScale * (x2 - m) : x2; LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2 << ", scale = " << scale); - return std::make_pair(x1, x2); + return {x1, x2}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); } @@ -834,9 +805,8 @@ CNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage } maths_t::EFloatingPointErrorStatus -CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -866,8 +836,8 @@ CNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weig } detail::CLogMarginalLikelihood logMarginalLikelihood( - weightStyles, samples, weights, m_GaussianMean, m_GaussianPrecision, - m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); + samples, weights, m_GaussianMean, m_GaussianPrecision, m_GammaShape, + m_GammaRate, this->marginalLikelihoodMean()); if (this->isInteger()) { CIntegration::logGaussLegendre( logMarginalLikelihood, 0.0, 1.0, result); @@ -962,7 +932,7 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample << ", numberSamples = " << numberSamples); try { - boost::math::normal_distribution<> normal(m_GaussianMean, std::sqrt(variance)); + boost::math::normal normal(m_GaussianMean, std::sqrt(variance)); for (std::size_t i = 1u; i < numberSamples; ++i) { double q = static_cast(i) / static_cast(numberSamples); @@ -995,7 +965,7 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample double degreesFreedom = 2.0 * m_GammaShape; try { - boost::math::students_t_distribution<> students(degreesFreedom); + boost::math::students_t students(degreesFreedom); double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision * m_GammaRate / m_GammaShape); @@ -1054,16 +1024,16 @@ void CNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSample } } -bool CNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CNormalMeanPrecConjugate::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdf = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; - TMinusLogCdf minusLogCdf(weightStyles, samples, weights, this->isNonInformative(), + TMinusLogCdf minusLogCdf(samples, weights, this->isNonInformative(), m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); @@ -1094,18 +1064,18 @@ bool CNormalMeanPrecConjugate::minusLogJointCdf(const TWeightStyleVec& weightSty return true; } -bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + using TMinusLogCdfComplement = detail::CEvaluateOnSamples; lowerBound = upperBound = 0.0; TMinusLogCdfComplement minusLogCdfComplement( - weightStyles, samples, weights, this->isNonInformative(), m_GaussianMean, - m_GaussianPrecision, m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); + samples, weights, this->isNonInformative(), m_GaussianMean, m_GaussianPrecision, + m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1136,19 +1106,18 @@ bool CNormalMeanPrecConjugate::minusLogJointCdfComplement(const TWeightStyleVec& bool CNormalMeanPrecConjugate::probabilityOfLessLikelySamples( maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { + lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; detail::CProbabilityOfLessLikelySamples probability( - calculation, weightStyles, samples, weights, this->isNonInformative(), - m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate, - this->marginalLikelihoodMean()); + calculation, samples, weights, this->isNonInformative(), m_GaussianMean, + m_GaussianPrecision, m_GammaShape, m_GammaRate, this->marginalLikelihoodMean()); if (this->isInteger()) { // If the data are discrete we compute the approximate expectation @@ -1191,13 +1160,14 @@ void CNormalMeanPrecConjugate::print(const std::string& indent, std::string& res result += "non-informative"; return; } - result += "mean = " + core::CStringUtils::typeToStringPretty(this->marginalLikelihoodMean()) + - " sd = " + - core::CStringUtils::typeToStringPretty( - std::sqrt(this->marginalLikelihoodVariance())); + double mean = this->marginalLikelihoodMean(); + double sd = std::sqrt(this->marginalLikelihoodVariance()); + result += "mean = " + core::CStringUtils::typeToStringPretty(mean); + result += " sd = " + core::CStringUtils::typeToStringPretty(sd); } std::string CNormalMeanPrecConjugate::printJointDensityFunction() const { + if (this->isNonInformative()) { // The non-informative prior is improper and effectively 0 everywhere. return std::string(); @@ -1211,7 +1181,7 @@ std::string CNormalMeanPrecConjugate::printJointDensityFunction() const { boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); double precision = m_GaussianPrecision * this->precision(); - boost::math::normal_distribution<> gaussian(m_GaussianMean, 1.0 / std::sqrt(precision)); + boost::math::normal gaussian(m_GaussianMean, 1.0 / std::sqrt(precision)); double xStart = boost::math::quantile(gamma, (1.0 - RANGE) / 2.0); double xEnd = boost::math::quantile(gamma, (1.0 + RANGE) / 2.0); @@ -1241,7 +1211,7 @@ std::string CNormalMeanPrecConjugate::printJointDensityFunction() const { y = yStart; for (unsigned int j = 0u; j < POINTS; ++j, y += yIncrement) { double conditionalPrecision = m_GaussianPrecision * x; - boost::math::normal_distribution<> conditionalGaussian( + boost::math::normal conditionalGaussian( m_GaussianMean, 1.0 / std::sqrt(conditionalPrecision)); pdf << (CTools::safePdf(gamma, x) * CTools::safePdf(conditionalGaussian, y)) @@ -1300,8 +1270,8 @@ double CNormalMeanPrecConjugate::precision() const { CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::confidenceIntervalMean(double percentage) const { if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } // Compute the symmetric confidence interval around the median of the @@ -1325,7 +1295,7 @@ CNormalMeanPrecConjugate::confidenceIntervalMean(double percentage) const { double lowerPercentile = 0.5 * (1.0 - percentage); double upperPercentile = 0.5 * (1.0 + percentage); - boost::math::students_t_distribution<> students(2.0 * m_GammaShape); + boost::math::students_t students(2.0 * m_GammaShape); double xLower = boost::math::quantile(students, lowerPercentile); xLower = m_GaussianMean + @@ -1334,14 +1304,14 @@ CNormalMeanPrecConjugate::confidenceIntervalMean(double percentage) const { xUpper = m_GaussianMean + xUpper / std::sqrt(m_GaussianPrecision * m_GammaShape / m_GammaRate); - return std::make_pair(xLower, xUpper); + return {xLower, xUpper}; } CNormalMeanPrecConjugate::TDoubleDoublePr CNormalMeanPrecConjugate::confidenceIntervalPrecision(double percentage) const { if (this->isNonInformative()) { - return std::make_pair(boost::numeric::bounds::lowest(), - boost::numeric::bounds::highest()); + return {boost::numeric::bounds::lowest(), + boost::numeric::bounds::highest()}; } percentage /= 100.0; @@ -1351,8 +1321,8 @@ CNormalMeanPrecConjugate::confidenceIntervalPrecision(double percentage) const { // The marginal prior distribution for the precision is gamma. boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile), - boost::math::quantile(gamma, upperPercentile)); + return {boost::math::quantile(gamma, lowerPercentile), + boost::math::quantile(gamma, upperPercentile)}; } bool CNormalMeanPrecConjugate::equalTolerance(const CNormalMeanPrecConjugate& rhs, diff --git a/lib/maths/COneOfNPrior.cc b/lib/maths/COneOfNPrior.cc index 583db82405..7720e31b83 100644 --- a/lib/maths/COneOfNPrior.cc +++ b/lib/maths/COneOfNPrior.cc @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -229,14 +230,13 @@ bool COneOfNPrior::needsOffset() const { return false; } -double COneOfNPrior::adjustOffset(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +double COneOfNPrior::adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { TMeanAccumulator result; TDouble5Vec penalties; for (auto& model : m_Models) { - double penalty = model.second->adjustOffset(weightStyles, samples, weights); + double penalty = model.second->adjustOffset(samples, weights); penalties.push_back(penalty); result.add(penalty, model.first); } @@ -263,13 +263,11 @@ double COneOfNPrior::offset() const { return offset; } -void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void COneOfNPrior::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -277,10 +275,10 @@ void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, return; } - this->adjustOffset(weightStyles, samples, weights); + this->adjustOffset(samples, weights); double penalty = CTools::fastLog(this->numberSamples()); - this->CPrior::addSamples(weightStyles, samples, weights); + this->CPrior::addSamples(samples, weights); penalty = (penalty - CTools::fastLog(this->numberSamples())) / 2.0; // For this 1-of-n model we assume that all the data come from one @@ -347,8 +345,7 @@ void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, // Update the weights with the marginal likelihoods. double logLikelihood = 0.0; maths_t::EFloatingPointErrorStatus status = - use ? model.second->jointLogMarginalLikelihood(weightStyles, samples, - weights, logLikelihood) + use ? model.second->jointLogMarginalLikelihood(samples, weights, logLikelihood) : maths_t::E_FpOverflowed; if (status & maths_t::E_FpFailed) { @@ -366,7 +363,7 @@ void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, } // Update the component prior distribution. - model.second->addSamples(weightStyles, samples, weights); + model.second->addSamples(samples, weights); used.push_back(use); uses.push_back(model.second->participatesInModelSelection()); @@ -385,7 +382,7 @@ void COneOfNPrior::addSamples(const TWeightStyleVec& weightStyles, double n = 0.0; try { for (const auto& weight : weights) { - n += maths_t::count(weightStyles, weight); + n += maths_t::count(weight); } } catch (const std::exception& e) { LOG_ERROR(<< "Failed to add samples: " << e.what()); @@ -443,6 +440,7 @@ void COneOfNPrior::propagateForwardsByTime(double time) { } COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodSupport() const { + TDoubleDoublePr result(MINUS_INF, INF); // We define this is as the intersection of the component model supports. @@ -458,6 +456,7 @@ COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodSupport() const { } double COneOfNPrior::marginalLikelihoodMean() const { + if (this->isNonInformative()) { return this->medianModelMean(); } @@ -481,6 +480,7 @@ double COneOfNPrior::marginalLikelihoodMean() const { } double COneOfNPrior::nearestMarginalLikelihoodMean(double value) const { + if (this->isNonInformative()) { return this->medianModelMean(); } @@ -499,24 +499,24 @@ double COneOfNPrior::nearestMarginalLikelihoodMean(double value) const { return result / Z; } -double COneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double COneOfNPrior::marginalLikelihoodMode(const TDoubleWeightsAry& weights) const { + // We approximate this as the weighted average of the component // model modes. // Declared outside the loop to minimize the number of times // they are created. TDouble1Vec sample(1); - TDouble4Vec1Vec weight(1, weights); + TDoubleWeightsAry1Vec weight(1, weights); TMeanAccumulator mode; for (const auto& model : m_Models) { if (model.second->participatesInModelSelection()) { double wi = model.first; - double mi = model.second->marginalLikelihoodMode(weightStyles, weights); + double mi = model.second->marginalLikelihoodMode(weights); double logLikelihood; sample[0] = mi; - model.second->jointLogMarginalLikelihood(weightStyles, sample, weight, logLikelihood); + model.second->jointLogMarginalLikelihood(sample, weight, logLikelihood); mode.add(mi, wi * std::exp(logLikelihood)); } } @@ -526,8 +526,8 @@ double COneOfNPrior::marginalLikelihoodMode(const TWeightStyleVec& weightStyles, return CTools::truncate(result, support.first, support.second); } -double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double COneOfNPrior::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative()) { return INF; } @@ -543,7 +543,7 @@ double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightSty for (const auto& model : m_Models) { double wi = model.first; if (wi > MINIMUM_SIGNIFICANT_WEIGHT) { - result += wi * model.second->marginalLikelihoodVariance(weightStyles, weights); + result += wi * model.second->marginalLikelihoodVariance(weights); Z += wi; } } @@ -552,8 +552,8 @@ double COneOfNPrior::marginalLikelihoodVariance(const TWeightStyleVec& weightSty COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { + const TDoubleWeightsAry& weights) const { + // We approximate this as the weighted sum of the component model // intervals. To compute the weights we expand all component model // marginal likelihoods about a reasonable estimate for the true @@ -579,22 +579,22 @@ COneOfNPrior::marginalLikelihoodConfidenceInterval(double percentage, for (const auto& model : m_Models) { double weight = model.first; if (weight >= MAXIMUM_RELATIVE_ERROR) { - TDoubleDoublePr interval = model.second->marginalLikelihoodConfidenceInterval( - percentage, weightStyles, weights); + TDoubleDoublePr interval = + model.second->marginalLikelihoodConfidenceInterval(percentage, weights); x1.add(interval.first, weight); x2.add(interval.second, weight); } } LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2); - return std::make_pair(CBasicStatistics::mean(x1), CBasicStatistics::mean(x2)); + return {CBasicStatistics::mean(x1), CBasicStatistics::mean(x2)}; } maths_t::EFloatingPointErrorStatus -COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +COneOfNPrior::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { + result = 0.0; if (samples.empty()) { @@ -627,8 +627,8 @@ COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, for (const auto& model : m_Models) { if (model.second->participatesInModelSelection()) { double logLikelihood; - maths_t::EFloatingPointErrorStatus status = model.second->jointLogMarginalLikelihood( - weightStyles, samples, weights, logLikelihood); + maths_t::EFloatingPointErrorStatus status = + model.second->jointLogMarginalLikelihood(samples, weights, logLikelihood); if (status & maths_t::E_FpFailed) { return status; } @@ -678,6 +678,7 @@ COneOfNPrior::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, TDouble1Vec& samples) const { + samples.clear(); if (numberSamples == 0 || this->isNonInformative()) { @@ -721,11 +722,11 @@ void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples, } bool COneOfNPrior::minusLogJointCdfImpl(bool complement, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { + lowerBound = upperBound = 0.0; if (samples.empty()) { @@ -765,13 +766,11 @@ bool COneOfNPrior::minusLogJointCdfImpl(bool complement, double li = 0.0; double ui = 0.0; - if (complement && !model.minusLogJointCdfComplement(weightStyles, samples, - weights, li, ui)) { + if (complement && !model.minusLogJointCdfComplement(samples, weights, li, ui)) { LOG_ERROR(<< "Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); return false; - } else if (!complement && - !model.minusLogJointCdf(weightStyles, samples, weights, li, ui)) { + } else if (!complement && !model.minusLogJointCdf(samples, weights, li, ui)) { LOG_ERROR(<< "Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); return false; @@ -818,31 +817,29 @@ bool COneOfNPrior::minusLogJointCdfImpl(bool complement, return true; } -bool COneOfNPrior::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool COneOfNPrior::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { - return this->minusLogJointCdfImpl(false, // complement - weightStyles, samples, weights, lowerBound, upperBound); + return this->minusLogJointCdfImpl(false /*complement*/, samples, weights, + lowerBound, upperBound); } -bool COneOfNPrior::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool COneOfNPrior::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { - return this->minusLogJointCdfImpl(true, // complement - weightStyles, samples, weights, lowerBound, upperBound); + return this->minusLogJointCdfImpl(true /*complement*/, samples, weights, + lowerBound, upperBound); } bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { + lowerBound = upperBound = 0.0; tail = maths_t::E_UndeterminedTail; @@ -850,7 +847,6 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati LOG_ERROR(<< "Can't compute distribution for empty sample set"); return false; } - if (this->isNonInformative()) { lowerBound = upperBound = 1.0; return true; @@ -885,8 +881,7 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati double modelLowerBound, modelUpperBound; maths_t::ETail modelTail; - if (!model.probabilityOfLessLikelySamples(calculation, weightStyles, - samples, weights, modelLowerBound, + if (!model.probabilityOfLessLikelySamples(calculation, samples, weights, modelLowerBound, modelUpperBound, modelTail)) { // Logging handled at a lower level. return false; @@ -897,7 +892,7 @@ bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculati lowerBound += weight * modelLowerBound; upperBound += weight * modelUpperBound; - tail_.add(TDoubleTailPr(weight * (modelLowerBound + modelUpperBound), modelTail)); + tail_.add({weight * (modelLowerBound + modelUpperBound), modelTail}); } if (!(lowerBound >= 0.0 && lowerBound <= 1.001) || @@ -932,6 +927,7 @@ bool COneOfNPrior::isNonInformative() const { } void COneOfNPrior::print(const std::string& indent, std::string& result) const { + result += core_t::LINE_ENDING + indent + "one-of-n"; if (this->isNonInformative()) { result += " non-informative"; @@ -994,6 +990,7 @@ COneOfNPrior::TDoubleVec COneOfNPrior::weights() const { } COneOfNPrior::TDoubleVec COneOfNPrior::logWeights() const { + TDoubleVec result; result.reserve(m_Models.size()); @@ -1052,6 +1049,7 @@ bool COneOfNPrior::modelAcceptRestoreTraverser(const SDistributionRestoreParams& } COneOfNPrior::TDoubleSizePr5Vec COneOfNPrior::normalizedLogWeights() const { + TDoubleSizePr5Vec result; double Z = 0.0; for (std::size_t i = 0u; i < m_Models.size(); ++i) { diff --git a/lib/maths/CPoissonMeanConjugate.cc b/lib/maths/CPoissonMeanConjugate.cc index 593104aa4a..9fbe98f698 100644 --- a/lib/maths/CPoissonMeanConjugate.cc +++ b/lib/maths/CPoissonMeanConjugate.cc @@ -64,9 +64,7 @@ struct SStaticCast { namespace detail { using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; -using TWeightStyleVec = maths_t::TWeightStyleVec; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; //! Adds "weight" x "right operand" to the "left operand". struct SPlusWeight { @@ -79,9 +77,6 @@ struct SPlusWeight { //! (integrating over the prior for the Poisson rate) and aggregate the //! results using \p aggregate. //! -//! \param[in] weightStyles Controls the interpretation of the weight(s) that -//! are associated with each sample. See maths_t::ESampleWeightStyle for more -//! details. //! \param[in] samples The weighted samples. //! \param[in] func The function to evaluate. //! \param[in] aggregate The function to aggregate the results of \p func. @@ -91,9 +86,8 @@ struct SPlusWeight { //! \param[in] rate The rate of the rate prior. //! \param[out] result Filled in with the aggregation of results of \p func. template -bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool evaluateFunctionOnJointDistribution(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, FUNC func, AGGREGATOR aggregate, double offset, @@ -130,7 +124,7 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // of this distribution.) for (std::size_t i = 0u; i < samples.size(); ++i) { double x = samples[i] + offset; - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); result = aggregate(result, func(CTools::SImproperDistribution(), x), n); } } else { @@ -148,18 +142,18 @@ bool evaluateFunctionOnJointDistribution(const TWeightStyleVec& weightStyles, // and the error function is significantly cheaper to compute. for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::count(weightStyles, weights[i]); + double n = maths_t::count(weights[i]); double x = samples[i] + offset; double mean = shape / rate; if (mean > MINIMUM_GAUSSIAN_MEAN) { double deviation = std::sqrt((rate + 1.0) / rate * mean); - boost::math::normal_distribution<> normal(mean, deviation); + boost::math::normal normal(mean, deviation); result = aggregate(result, func(normal, x), n); } else { double r = shape; double p = rate / (rate + 1.0); - boost::math::negative_binomial_distribution<> negativeBinomial(r, p); + boost::math::negative_binomial negativeBinomial(r, p); result = aggregate(result, func(negativeBinomial, x), n); } } @@ -246,9 +240,8 @@ bool CPoissonMeanConjugate::needsOffset() const { return true; } -double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles*/, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& /*weights*/) { +double CPoissonMeanConjugate::adjustOffset(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& /*weights*/) { if (samples.empty() || CMathsFuncs::beginFinite(samples) == CMathsFuncs::endFinite(samples)) { return 0.0; @@ -273,16 +266,15 @@ double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles return 0.0; } - TWeightStyleVec weightStyle(1, maths_t::E_SampleCountWeight); double offset = OFFSET_MARGIN - minimumSample; TDouble1Vec resamples; this->sampleMarginalLikelihood(ADJUST_OFFSET_SAMPLE_SIZE, resamples); double weight = this->numberSamples() / static_cast(resamples.size()); - TDouble4Vec1Vec weights(resamples.size(), TDouble4Vec(1, weight)); + TDoubleWeightsAry1Vec weights(resamples.size(), maths_t::countWeight(weight)); double before = 0.0; if (!resamples.empty()) { - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, weights, before); + this->jointLogMarginalLikelihood(resamples, weights, before); } // Reset the parameters. @@ -302,10 +294,10 @@ double CPoissonMeanConjugate::adjustOffset(const TWeightStyleVec& /*weightStyles LOG_TRACE(<< "resamples = " << core::CContainerPrinter::print(resamples) << ", weight = " << weight << ", offset = " << m_Offset); - this->addSamples(weightStyle, resamples, weights); + this->addSamples(resamples, weights); double after; - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, weights, after); + this->jointLogMarginalLikelihood(resamples, weights, after); return std::min(after - before, 0.0); } @@ -314,13 +306,11 @@ double CPoissonMeanConjugate::offset() const { return m_Offset; } -void CPoissonMeanConjugate::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { +void CPoissonMeanConjugate::addSamples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { if (samples.empty()) { return; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -328,8 +318,8 @@ void CPoissonMeanConjugate::addSamples(const TWeightStyleVec& weightStyles, return; } - this->adjustOffset(weightStyles, samples, weights); - this->CPrior::addSamples(weightStyles, samples, weights); + this->adjustOffset(samples, weights); + this->CPrior::addSamples(samples, weights); // The update of the posterior with n independent samples of the // Poisson distribution comes from: @@ -351,20 +341,15 @@ void CPoissonMeanConjugate::addSamples(const TWeightStyleVec& weightStyles, double numberSamples = 0.0; double sampleSum = 0.0; - try { - for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); - double x = samples[i] + m_Offset; - if (!CMathsFuncs::isFinite(x) || x < 0.0) { - LOG_ERROR(<< "Discarding " << x << " it's not Poisson"); - continue; - } - numberSamples += n; - sampleSum += n * x; + for (std::size_t i = 0u; i < samples.size(); ++i) { + double n = maths_t::countForUpdate(weights[i]); + double x = samples[i] + m_Offset; + if (!CMathsFuncs::isFinite(x) || x < 0.0) { + LOG_ERROR(<< "Discarding " << x << " it's not Poisson"); + continue; } - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to update likelihood: " << e.what()); - return; + numberSamples += n; + sampleSum += n * x; } m_Shape += sampleSum; @@ -380,7 +365,6 @@ void CPoissonMeanConjugate::propagateForwardsByTime(double time) { LOG_ERROR(<< "Bad propagation time " << time); return; } - if (this->isNonInformative()) { // There is nothing to be done. return; @@ -412,10 +396,11 @@ void CPoissonMeanConjugate::propagateForwardsByTime(double time) { } CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::marginalLikelihoodSupport() const { - return std::make_pair(-m_Offset, boost::numeric::bounds::highest()); + return {-m_Offset, boost::numeric::bounds::highest()}; } double CPoissonMeanConjugate::marginalLikelihoodMean() const { + if (this->isNonInformative()) { return -m_Offset; } @@ -427,8 +412,8 @@ double CPoissonMeanConjugate::marginalLikelihoodMean() const { return this->priorMean() - m_Offset; } -double CPoissonMeanConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { +double CPoissonMeanConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& /*weights*/) const { + if (this->isNonInformative()) { return -m_Offset; } @@ -446,7 +431,7 @@ double CPoissonMeanConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*we try { double r = m_Shape; double p = m_Rate / (m_Rate + 1.0); - boost::math::negative_binomial_distribution<> negativeBinomial(r, p); + boost::math::negative_binomial negativeBinomial(r, p); return boost::math::mode(negativeBinomial) - m_Offset; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute marginal likelihood mode: " << e.what() @@ -456,8 +441,8 @@ double CPoissonMeanConjugate::marginalLikelihoodMode(const TWeightStyleVec& /*we return -m_Offset; } -double CPoissonMeanConjugate::marginalLikelihoodVariance(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { +double CPoissonMeanConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const { + if (this->isNonInformative()) { return boost::numeric::bounds::highest(); } @@ -466,20 +451,14 @@ double CPoissonMeanConjugate::marginalLikelihoodVariance(const TWeightStyleVec& // = E_{R}[ R + (R - a/b)^2 ] // = "prior mean" + "prior variance" - double varianceScale = 1.0; - try { - varianceScale = maths_t::seasonalVarianceScale(weightStyles, weights) * - maths_t::countVarianceScale(weightStyles, weights); - } catch (const std::exception& e) { - LOG_ERROR(<< "Failed to get variance scale: " << e.what()); - } + double varianceScale = maths_t::seasonalVarianceScale(weights) * + maths_t::countVarianceScale(weights); return varianceScale * (this->priorMean() + this->priorVariance()); } CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::marginalLikelihoodConfidenceInterval(double percentage, - const TWeightStyleVec& /*weightStyles*/, - const TDouble4Vec& /*weights*/) const { + const TDoubleWeightsAry& /*weights*/) const { if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -493,12 +472,12 @@ CPoissonMeanConjugate::marginalLikelihoodConfidenceInterval(double percentage, try { double r = m_Shape; double p = m_Rate / (m_Rate + 1.0); - boost::math::negative_binomial_distribution<> negativeBinomial(r, p); + boost::math::negative_binomial negativeBinomial(r, p); double x1 = boost::math::quantile(negativeBinomial, (1.0 - percentage) / 2.0) - m_Offset; double x2 = percentage > 0.0 ? boost::math::quantile(negativeBinomial, (1.0 + percentage) / 2.0) - m_Offset : x1; - return std::make_pair(x1, x2); + return {x1, x2}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()); } @@ -507,9 +486,8 @@ CPoissonMeanConjugate::marginalLikelihoodConfidenceInterval(double percentage, } maths_t::EFloatingPointErrorStatus -CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +CPoissonMeanConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& result) const { result = 0.0; @@ -517,7 +495,6 @@ CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightS LOG_ERROR(<< "Can't compute likelihood for empty sample set"); return maths_t::E_FpFailed; } - if (samples.size() != weights.size()) { LOG_ERROR(<< "Mismatch in samples '" << core::CContainerPrinter::print(samples) << "' and weights '" @@ -561,7 +538,7 @@ CPoissonMeanConjugate::jointLogMarginalLikelihood(const TWeightStyleVec& weightS double sampleLogFactorialSum = 0.0; for (std::size_t i = 0u; i < samples.size(); ++i) { - double n = maths_t::countForUpdate(weightStyles, weights[i]); + double n = maths_t::countForUpdate(weights[i]); double x = samples[i] + m_Offset; if (x < 0.0) { // Technically, the marginal likelihood is zero here @@ -666,7 +643,7 @@ void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, LOG_TRACE(<< "mean = " << mean << ", variance = " << variance); try { - boost::math::normal_distribution<> normal(mean, std::sqrt(variance)); + boost::math::normal normal(mean, std::sqrt(variance)); for (std::size_t i = 1u; i < numberSamples; ++i) { double q = static_cast(i) / static_cast(numberSamples); @@ -756,16 +733,15 @@ void CPoissonMeanConjugate::sampleMarginalLikelihood(std::size_t numberSamples, } } -bool CPoissonMeanConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CPoissonMeanConjugate::minusLogJointCdf(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { lowerBound = upperBound = 0.0; double value; if (!detail::evaluateFunctionOnJointDistribution( - weightStyles, samples, weights, CTools::SMinusLogCdf(), detail::SPlusWeight(), + samples, weights, CTools::SMinusLogCdf(), detail::SPlusWeight(), m_Offset, this->isNonInformative(), m_Shape, m_Rate, value)) { LOG_ERROR(<< "Failed computing c.d.f. for " << core::CContainerPrinter::print(samples)); @@ -776,18 +752,16 @@ bool CPoissonMeanConjugate::minusLogJointCdf(const TWeightStyleVec& weightStyles return true; } -bool CPoissonMeanConjugate::minusLogJointCdfComplement(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +bool CPoissonMeanConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound) const { lowerBound = upperBound = 0.0; double value; if (!detail::evaluateFunctionOnJointDistribution( - weightStyles, samples, weights, CTools::SMinusLogCdfComplement(), - detail::SPlusWeight(), m_Offset, this->isNonInformative(), m_Shape, - m_Rate, value)) { + samples, weights, CTools::SMinusLogCdfComplement(), detail::SPlusWeight(), + m_Offset, this->isNonInformative(), m_Shape, m_Rate, value)) { LOG_ERROR(<< "Failed computing c.d.f. complement for " << core::CContainerPrinter::print(samples)); return false; @@ -798,9 +772,8 @@ bool CPoissonMeanConjugate::minusLogJointCdfComplement(const TWeightStyleVec& we } bool CPoissonMeanConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, - const TWeightStyleVec& weightStyles, const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double& lowerBound, double& upperBound, maths_t::ETail& tail) const { @@ -812,7 +785,7 @@ bool CPoissonMeanConjugate::probabilityOfLessLikelySamples(maths_t::EProbability CJointProbabilityOfLessLikelySamples probability; if (!detail::evaluateFunctionOnJointDistribution( - weightStyles, samples, weights, + samples, weights, boost::bind(CTools::CProbabilityOfLessLikelySample(calculation), _1, _2, boost::ref(tail_)), CJointProbabilityOfLessLikelySamples::SAddProbability(), m_Offset, @@ -846,6 +819,7 @@ void CPoissonMeanConjugate::print(const std::string& indent, std::string& result } std::string CPoissonMeanConjugate::printJointDensityFunction() const { + if (this->isNonInformative()) { // The non-informative prior is improper and effectively 0 everywhere. return std::string(); @@ -913,6 +887,7 @@ void CPoissonMeanConjugate::acceptPersistInserter(core::CStatePersistInserter& i } double CPoissonMeanConjugate::priorMean() const { + if (this->isNonInformative()) { return 0.0; } @@ -946,6 +921,7 @@ double CPoissonMeanConjugate::priorVariance() const { CPoissonMeanConjugate::TDoubleDoublePr CPoissonMeanConjugate::meanConfidenceInterval(double percentage) const { + if (this->isNonInformative()) { return this->marginalLikelihoodSupport(); } @@ -962,8 +938,8 @@ CPoissonMeanConjugate::meanConfidenceInterval(double percentage) const { try { boost::math::gamma_distribution<> gamma(m_Shape, 1.0 / m_Rate); - return std::make_pair(boost::math::quantile(gamma, lowerPercentile) - m_Offset, - boost::math::quantile(gamma, upperPercentile) - m_Offset); + return {boost::math::quantile(gamma, lowerPercentile) - m_Offset, + boost::math::quantile(gamma, upperPercentile) - m_Offset}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed to compute mean confidence interval: " << e.what() << ", prior shape = " << m_Shape << ", prior rate = " << m_Rate); diff --git a/lib/maths/CPrior.cc b/lib/maths/CPrior.cc index b56c3d9d7c..7ca24eeb99 100644 --- a/lib/maths/CPrior.cc +++ b/lib/maths/CPrior.cc @@ -102,13 +102,11 @@ double CPrior::offsetMargin() const { return 0.0; } -void CPrior::addSamples(const TWeightStyleVec& weightStyles, - const TDouble1Vec& /*samples*/, - const TDouble4Vec1Vec& weights) { +void CPrior::addSamples(const TDouble1Vec& /*samples*/, const TDoubleWeightsAry1Vec& weights) { double n = 0.0; try { for (const auto& weight : weights) { - n += maths_t::countForUpdate(weightStyles, weight); + n += maths_t::countForUpdate(weight); } } catch (const std::exception& e) { LOG_ERROR(<< "Failed to extract sample counts: " << e.what()); @@ -120,9 +118,8 @@ double CPrior::nearestMarginalLikelihoodMean(double /*value*/) const { return this->marginalLikelihoodMean(); } -CPrior::TDouble1Vec CPrior::marginalLikelihoodModes(const TWeightStyleVec& weightStyles, - const TDouble4Vec& weights) const { - return TDouble1Vec{this->marginalLikelihoodMode(weightStyles, weights)}; +CPrior::TDouble1Vec CPrior::marginalLikelihoodModes(const TDoubleWeightsAry& weights) const { + return TDouble1Vec{this->marginalLikelihoodMode(weights)}; } std::string CPrior::print() const { @@ -172,8 +169,8 @@ CPrior::SPlot CPrior::marginalLikelihoodPlot(unsigned int numberPoints, double w for (auto x : plot.s_Abscissa) { double likelihood; - maths_t::EFloatingPointErrorStatus status = this->jointLogMarginalLikelihood( - CConstantWeights::COUNT, {x}, CConstantWeights::SINGLE_UNIT, likelihood); + maths_t::EFloatingPointErrorStatus status = + this->jointLogMarginalLikelihood({x}, TWeights::SINGLE_UNIT, likelihood); if (status & maths_t::E_FpFailed) { // Ignore point. } else if (status & maths_t::E_FpOverflowed) { @@ -210,7 +207,8 @@ double CPrior::unmarginalizedParameters() const { void CPrior::adjustOffsetResamples(double minimumSample, TDouble1Vec& resamples, - TDouble4Vec1Vec& resamplesWeights) const { + TDoubleWeightsAry1Vec& resamplesWeights) const { + this->sampleMarginalLikelihood(ADJUST_OFFSET_SAMPLE_SIZE, resamples); std::size_t n = resamples.size(); resamples.erase(std::remove_if(resamples.begin(), resamples.end(), @@ -227,13 +225,12 @@ void CPrior::adjustOffsetResamples(double minimumSample, double resamplesWeight = 1.0; if (n > 0) { resamplesWeight = this->numberSamples() / static_cast(n); - resamplesWeights.resize(n, TDouble4Vec(1, resamplesWeight)); + resamplesWeights.resize(n, maths_t::countWeight(resamplesWeight)); } } -double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights, +double CPrior::adjustOffsetWithCost(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights, COffsetCost& cost, CApplyOffset& apply) { if (samples.empty() || @@ -264,7 +261,7 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, double offset = margin - minimumSample; offset *= (offset < 0.0 ? (1.0 - EPS) : (1.0 + EPS)); - cost.samples(weightStyles, samples, weights); + cost.samples(samples, weights); cost.resample(minimumSample); apply.resample(minimumSample); @@ -274,12 +271,11 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, } TDouble1Vec resamples; - TDouble4Vec1Vec resamplesWeights; + TDoubleWeightsAry1Vec resamplesWeights; this->adjustOffsetResamples(minimumSample, resamples, resamplesWeights); double before; - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, - resamplesWeights, before); + this->jointLogMarginalLikelihood(resamples, resamplesWeights, before); double maximumSample = *std::max_element(samples.begin(), samples.end()); double range = resamples.empty() @@ -305,8 +301,7 @@ double CPrior::adjustOffsetWithCost(const TWeightStyleVec& weightStyles, apply(offset); double after; - this->jointLogMarginalLikelihood(CConstantWeights::COUNT, resamples, - resamplesWeights, after); + this->jointLogMarginalLikelihood(resamples, resamplesWeights, after); return std::min(after - before, 0.0); } @@ -338,9 +333,8 @@ bool CPrior::CModelFilter::operator()(EPrior model) const { ////////// CPrior::CLogMarginalLikelihood Implementation ////////// CPrior::CLogMarginalLikelihood::CLogMarginalLikelihood(const CPrior& prior, - const TWeightStyleVec& weightStyles, - const TDouble4Vec1Vec& weights) - : m_Prior(&prior), m_WeightStyles(&weightStyles), m_Weights(&weights), m_X(1) { + const TDoubleWeightsAry1Vec& weights) + : m_Prior(&prior), m_Weights(&weights), m_X(1) { } double CPrior::CLogMarginalLikelihood::operator()(double x) const { @@ -354,21 +348,17 @@ double CPrior::CLogMarginalLikelihood::operator()(double x) const { bool CPrior::CLogMarginalLikelihood::operator()(double x, double& result) const { m_X[0] = x; - return !(m_Prior->jointLogMarginalLikelihood(*m_WeightStyles, m_X, *m_Weights, result) & - maths_t::E_FpFailed); + return !(m_Prior->jointLogMarginalLikelihood(m_X, *m_Weights, result) & maths_t::E_FpFailed); } ////////// CPrior::COffsetParameters Implementation ////////// CPrior::COffsetParameters::COffsetParameters(CPrior& prior) - : m_Prior(&prior), m_WeightStyles(nullptr), m_Samples(nullptr), - m_Weights(nullptr), m_Resamples(0), m_ResamplesWeights(0) { + : m_Prior(&prior), m_Samples(nullptr), m_Weights(nullptr) { } -void CPrior::COffsetParameters::samples(const maths_t::TWeightStyleVec& weightStyles, - const TDouble1Vec& samples, - const TDouble4Vec1Vec& weights) { - m_WeightStyles = &weightStyles; +void CPrior::COffsetParameters::samples(const TDouble1Vec& samples, + const TDoubleWeightsAry1Vec& weights) { m_Samples = &samples; m_Weights = &weights; } @@ -381,15 +371,11 @@ CPrior& CPrior::COffsetParameters::prior() const { return *m_Prior; } -const maths_t::TWeightStyleVec& CPrior::COffsetParameters::weightStyles() const { - return *m_WeightStyles; -} - const CPrior::TDouble1Vec& CPrior::COffsetParameters::samples() const { return *m_Samples; } -const CPrior::TDouble4Vec1Vec& CPrior::COffsetParameters::weights() const { +const CPrior::TDoubleWeightsAry1Vec& CPrior::COffsetParameters::weights() const { return *m_Weights; } @@ -397,7 +383,7 @@ const CPrior::TDouble1Vec& CPrior::COffsetParameters::resamples() const { return m_Resamples; } -const CPrior::TDouble4Vec1Vec& CPrior::COffsetParameters::resamplesWeights() const { +const CPrior::TDoubleWeightsAry1Vec& CPrior::COffsetParameters::resamplesWeights() const { return m_ResamplesWeights; } @@ -413,8 +399,8 @@ double CPrior::COffsetCost::operator()(double offset) const { void CPrior::COffsetCost::resetPriors(double offset) const { this->prior().setToNonInformative(offset, this->prior().decayRate()); - this->prior().addSamples(TWeights::COUNT, this->resamples(), this->resamplesWeights()); - this->prior().addSamples(this->weightStyles(), this->samples(), this->weights()); + this->prior().addSamples(this->resamples(), this->resamplesWeights()); + this->prior().addSamples(this->samples(), this->weights()); } double CPrior::COffsetCost::computeCost(double offset) const { @@ -422,7 +408,7 @@ double CPrior::COffsetCost::computeCost(double offset) const { maths_t::EFloatingPointErrorStatus status; if (this->resamples().size() > 0) { status = this->prior().jointLogMarginalLikelihood( - TWeights::COUNT, this->resamples(), this->resamplesWeights(), resamplesLogLikelihood); + this->resamples(), this->resamplesWeights(), resamplesLogLikelihood); if (status != maths_t::E_FpNoErrors) { LOG_ERROR(<< "Failed evaluating log-likelihood at " << offset << " for samples " << core::CContainerPrinter::print(this->resamples()) << " and weights " @@ -432,7 +418,7 @@ double CPrior::COffsetCost::computeCost(double offset) const { } double samplesLogLikelihood; status = this->prior().jointLogMarginalLikelihood( - this->weightStyles(), this->samples(), this->weights(), samplesLogLikelihood); + this->samples(), this->weights(), samplesLogLikelihood); if (status != maths_t::E_FpNoErrors) { LOG_ERROR(<< "Failed evaluating log-likelihood at " << offset << " for " << core::CContainerPrinter::print(this->samples()) << " and weights " @@ -449,7 +435,7 @@ CPrior::CApplyOffset::CApplyOffset(CPrior& prior) : COffsetParameters(prior) { void CPrior::CApplyOffset::operator()(double offset) const { this->prior().setToNonInformative(offset, this->prior().decayRate()); - this->prior().addSamples(TWeights::COUNT, this->resamples(), this->resamplesWeights()); + this->prior().addSamples(this->resamples(), this->resamplesWeights()); } } } diff --git a/lib/maths/CTimeSeriesDecomposition.cc b/lib/maths/CTimeSeriesDecomposition.cc index 498d701ebd..36bcc79292 100644 --- a/lib/maths/CTimeSeriesDecomposition.cc +++ b/lib/maths/CTimeSeriesDecomposition.cc @@ -208,8 +208,7 @@ bool CTimeSeriesDecomposition::initialized() const { bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, double value, - const maths_t::TWeightStyleVec& weightStyles, - const maths_t::TDouble4Vec& weights) { + const maths_t::TDoubleWeightsAry& weights) { CComponents::CScopeNotifyOnStateChange result{m_Components}; core_t::TTime lastTime{std::max(m_LastValueTime, m_LastPropagationTime)}; @@ -220,7 +219,6 @@ bool CTimeSeriesDecomposition::addPoint(core_t::TTime time, SAddValue message{time, lastTime, value, - weightStyles, weights, CBasicStatistics::mean(this->baseline(time, 0.0, E_TrendForced)), CBasicStatistics::mean(this->baseline(time, 0.0, E_Seasonal)), diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index ed72c5b34a..1133d1a497 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -369,20 +370,18 @@ CTimeSeriesDecompositionDetail::SMessage::SMessage(core_t::TTime time, core_t::T //////// SAddValue //////// -CTimeSeriesDecompositionDetail::SAddValue::SAddValue( - core_t::TTime time, - core_t::TTime lastTime, - double value, - const maths_t::TWeightStyleVec& weightStyles, - const maths_t::TDouble4Vec& weights, - double trend, - double seasonal, - double calendar, - const TPredictor& predictor, - const CPeriodicityHypothesisTestsConfig& periodicityTestConfig) - : SMessage{time, lastTime}, s_Value{value}, s_WeightStyles{weightStyles}, - s_Weights{weights}, s_Trend{trend}, s_Seasonal{seasonal}, s_Calendar{calendar}, - s_Predictor{predictor}, s_PeriodicityTestConfig{periodicityTestConfig} { +CTimeSeriesDecompositionDetail::SAddValue::SAddValue(core_t::TTime time, + core_t::TTime lastTime, + double value, + const maths_t::TDoubleWeightsAry& weights, + double trend, + double seasonal, + double calendar, + const TPredictor& predictor, + const CPeriodicityHypothesisTestsConfig& periodicityTestConfig) + : SMessage{time, lastTime}, s_Value{value}, s_Weights{weights}, s_Trend{trend}, + s_Seasonal{seasonal}, s_Calendar{calendar}, s_Predictor{predictor}, + s_PeriodicityTestConfig{periodicityTestConfig} { } //////// SDetectedSeasonal //////// @@ -537,9 +536,8 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::swap(CPeriodicityTest& ot void CTimeSeriesDecompositionDetail::CPeriodicityTest::handle(const SAddValue& message) { core_t::TTime time{message.s_Time}; double value{message.s_Value}; - const maths_t::TWeightStyleVec& weightStyles{message.s_WeightStyles}; - const maths_t::TDouble4Vec& weights{message.s_Weights}; - double weight{maths_t::countForUpdate(weightStyles, weights)}; + const maths_t::TDoubleWeightsAry& weights{message.s_Weights}; + double weight{maths_t::countForUpdate(weights)}; this->test(message); @@ -686,6 +684,7 @@ void CTimeSeriesDecompositionDetail::CPeriodicityTest::apply(std::size_t symbol, bool CTimeSeriesDecompositionDetail::CPeriodicityTest::shouldTest(const TExpandingWindowPtr& window, core_t::TTime time) const { + // We need to test more frequently than when we compress, because // this only happens after we've seen 336 buckets, this would thus // significantly delay when we first detect a daily periodic for @@ -704,6 +703,7 @@ bool CTimeSeriesDecompositionDetail::CPeriodicityTest::shouldTest(const TExpandi } CExpandingWindow* CTimeSeriesDecompositionDetail::CPeriodicityTest::newWindow(ETest test) const { + using TTimeCRng = CExpandingWindow::TTimeCRng; auto newWindow = [this](const TTimeVec& bucketLengths) { @@ -790,14 +790,13 @@ void CTimeSeriesDecompositionDetail::CCalendarTest::handle(const SAddValue& mess core_t::TTime time{message.s_Time}; double error{message.s_Value - message.s_Trend - message.s_Seasonal - message.s_Calendar}; - const maths_t::TWeightStyleVec& weightStyles{message.s_WeightStyles}; - const maths_t::TDouble4Vec& weights{message.s_Weights}; + const maths_t::TDoubleWeightsAry& weights{message.s_Weights}; this->test(message); switch (m_Machine.state()) { case CC_TEST: - m_Test->add(time, error, maths_t::countForUpdate(weightStyles, weights)); + m_Test->add(time, error, maths_t::countForUpdate(weights)); break; case CC_NOT_TESTING: break; @@ -1019,6 +1018,7 @@ bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser(core::C void CTimeSeriesDecompositionDetail::CComponents::acceptPersistInserter( core::CStatePersistInserter& inserter) const { + inserter.insertValue(VERSION_6_3_TAG, ""); inserter.insertLevel( COMPONENTS_MACHINE_6_3_TAG, @@ -1066,8 +1066,7 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& messag double trend{message.s_Trend}; double seasonal{message.s_Seasonal}; double calendar{message.s_Calendar}; - const maths_t::TWeightStyleVec& weightStyles{message.s_WeightStyles}; - const maths_t::TDouble4Vec& weights{message.s_Weights}; + const maths_t::TDoubleWeightsAry& weights{message.s_Weights}; TSeasonalComponentPtrVec seasonalComponents; TCalendarComponentPtrVec calendarComponents; @@ -1083,7 +1082,7 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& messag m_Calendar->componentsAndErrors(time, calendarComponents, calendarErrors); } - double weight{maths_t::countForUpdate(weightStyles, weights)}; + double weight{maths_t::countForUpdate(weights)}; std::size_t m{seasonalComponents.size()}; std::size_t n{calendarComponents.size()}; @@ -1842,10 +1841,12 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime shifted.reserve(s_Components.size()); for (auto& component : s_Components) { const CSeasonalTime& time_ = component.time(); - if (std::find_if(shifted.begin(), shifted.end(), [&time_](const TTimeTimePr& window) { - return !(time_.windowEnd() <= window.first || - time_.windowStart() >= window.second); - }) == shifted.end()) { + auto containsWindow = [&time_](const TTimeTimePr& window) { + return !(time_.windowEnd() <= window.first || + time_.windowStart() >= window.second); + }; + if (std::find_if(shifted.begin(), shifted.end(), + containsWindow) == shifted.end()) { component.shiftLevel(shift.second); } } diff --git a/lib/maths/CTimeSeriesDecompositionStub.cc b/lib/maths/CTimeSeriesDecompositionStub.cc index b1859143cb..7dd8116bc6 100644 --- a/lib/maths/CTimeSeriesDecompositionStub.cc +++ b/lib/maths/CTimeSeriesDecompositionStub.cc @@ -40,8 +40,7 @@ bool CTimeSeriesDecompositionStub::initialized() const { bool CTimeSeriesDecompositionStub::addPoint(core_t::TTime /*time*/, double /*value*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const maths_t::TDouble4Vec& /*weights*/) { + const maths_t::TDoubleWeightsAry& /*weights*/) { return false; } diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 1901d19515..8e3b6356c9 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -47,8 +47,8 @@ using TDouble2Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; using TDouble10Vec1Vec = core::CSmallVector; using TDouble10Vec2Vec = core::CSmallVector; -using TDouble10Vec4Vec = core::CSmallVector; -using TDouble10Vec4Vec1Vec = core::CSmallVector; +using TDouble10VecWeightsAry = maths_t::TDouble10VecWeightsAry; +using TDouble10VecWeightsAry1Vec = maths_t::TDouble10VecWeightsAry1Vec; using TSizeVec = std::vector; using TSize10Vec = core::CSmallVector; using TSizeDoublePr = std::pair; @@ -56,12 +56,14 @@ using TSizeDoublePr10Vec = core::CSmallVector; using TTail10Vec = core::CSmallVector; using TTime1Vec = CTimeSeriesCorrelations::TTime1Vec; using TDouble1Vec = CTimeSeriesCorrelations::TDouble1Vec; -using TDouble4Vec = CTimeSeriesCorrelations::TDouble4Vec; -using TDouble4Vec1Vec = CTimeSeriesCorrelations::TDouble4Vec1Vec; +using TDoubleWeightsAry = maths_t::TDoubleWeightsAry; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TSize1Vec = CTimeSeriesCorrelations::TSize1Vec; using TSize2Vec1Vec = CTimeSeriesCorrelations::TSize2Vec1Vec; using TMultivariatePriorCPtrSizePr1Vec = CTimeSeriesCorrelations::TMultivariatePriorCPtrSizePr1Vec; +const std::size_t SLIDING_WINDOW_SIZE{12}; + //! Computes the Winsorisation weight for \p value. double computeWinsorisationWeight(const CPrior& prior, double derate, double scale, double value) { static const double WINSORISED_FRACTION = 1e-4; @@ -73,15 +75,14 @@ double computeWinsorisationWeight(const CPrior& prior, double derate, double sca double deratedMinimumWeight = MINIMUM_WEIGHT + (0.5 - MINIMUM_WEIGHT) * CTools::truncate(derate, 0.0, 1.0); + auto weight = maths_t::seasonalVarianceScaleWeight(scale); double lowerBound; double upperBound; - if (!prior.minusLogJointCdf(CConstantWeights::SEASONAL_VARIANCE, {value}, - {{scale}}, lowerBound, upperBound)) { + if (!prior.minusLogJointCdf({value}, {weight}, lowerBound, upperBound)) { return 1.0; } if (upperBound < MINUS_LOG_TOLERANCE && - !prior.minusLogJointCdfComplement(CConstantWeights::SEASONAL_VARIANCE, {value}, - {{scale}}, lowerBound, upperBound)) { + !prior.minusLogJointCdfComplement({value}, {weight}, lowerBound, upperBound)) { return 1.0; } @@ -145,6 +146,12 @@ double computeWinsorisationWeight(const CMultivariatePrior& prior, return computeWinsorisationWeight(*conditional, derate, scale, value[dimension]); } +//! Get the count weight to use to initialise the residual model +//! from the sliding window. +double slidingWindowCountWeight(double learnRate) { + return std::max(learnRate, 5.0 / static_cast(SLIDING_WINDOW_SIZE)); +} + //! The decay rate controllers we maintain. enum EDecayRateController { E_TrendControl = 0, @@ -196,7 +203,6 @@ const std::string CORRELATION_TAG{"d"}; const std::size_t MAXIMUM_CORRELATIONS{5000}; const double MINIMUM_CORRELATE_PRIOR_SAMPLE_COUNT{24.0}; -const std::size_t SLIDING_WINDOW_SIZE{12}; const TSize10Vec NOTHING_TO_MARGINALIZE; const TSizeDoublePr10Vec NOTHING_TO_CONDITION; @@ -351,15 +357,14 @@ class CTimeSeriesAnomalyModel { //! significantly anomalous. static const double LOG_SMALL_PROBABILITY; //! A unit weight. - static const TDouble10Vec4Vec1Vec UNIT; + static const TDouble10VecWeightsAry1Vec UNIT; private: //! Update the appropriate anomaly model with \p anomaly. void sample(core_t::TTime time, const CAnomaly& anomaly, double weight) { std::size_t index(anomaly.positive() ? 0 : 1); TDouble10Vec1Vec features{anomaly.features(this->scale(time))}; - m_Priors[index].addSamples(CConstantWeights::COUNT, features, - {{TDouble10Vec(2, weight)}}); + m_Priors[index].addSamples(features, {maths_t::countWeight(weight, 2)}); } //! Get the scaled time. @@ -464,8 +469,7 @@ void CTimeSeriesAnomalyModel::probability(const CModelProbabilityParams& params, if (probability < LARGEST_ANOMALOUS_PROBABILITY && !m_Priors[index].isNonInformative() && m_Priors[index].probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, CConstantWeights::COUNT, features, - UNIT, pl, pu, tail)) { + maths_t::E_OneSidedAbove, features, UNIT, pl, pu, tail)) { double logp{CTools::fastLog(probability)}; double alpha{0.5 * std::min((logp - LOG_LARGEST_ANOMALOUS_PROBABILITY) / (LOG_SMALL_PROBABILITY - LOG_LARGEST_ANOMALOUS_PROBABILITY), @@ -535,8 +539,8 @@ const double CTimeSeriesAnomalyModel::LARGEST_ANOMALOUS_PROBABILITY{0.1}; const double CTimeSeriesAnomalyModel::LOG_LARGEST_ANOMALOUS_PROBABILITY{ CTools::fastLog(LARGEST_ANOMALOUS_PROBABILITY)}; const double CTimeSeriesAnomalyModel::LOG_SMALL_PROBABILITY{CTools::fastLog(SMALL_PROBABILITY)}; -const TDouble10Vec4Vec1Vec CTimeSeriesAnomalyModel::UNIT{ - CConstantWeights::unit(2)}; +const TDouble10VecWeightsAry1Vec CTimeSeriesAnomalyModel::UNIT{ + maths_t::CUnitWeights::unit(2)}; CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CModelParams& params, std::size_t id, @@ -617,9 +621,8 @@ TSize2Vec1Vec CUnivariateTimeSeriesModel::correlates() const { void CUnivariateTimeSeriesModel::addBucketValue(const TTimeDouble2VecSizeTrVec& values) { for (const auto& value : values) { - m_Prior->adjustOffset(CConstantWeights::COUNT, - {m_Trend->detrend(value.first, value.second[0], 0.0)}, - CConstantWeights::SINGLE_UNIT); + m_Prior->adjustOffset({m_Trend->detrend(value.first, value.second[0], 0.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); } } @@ -651,8 +654,7 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, m_IsNonNegative = params.isNonNegative(); - EUpdateResult result{this->updateTrend(params.weightStyles(), samples, - params.trendWeights())}; + EUpdateResult result{this->updateTrend(samples, params.trendWeights())}; for (auto& sample : samples) { sample.second[0] = m_Trend->detrend(sample.first, sample.second[0], 0.0); @@ -667,23 +669,22 @@ CUnivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, m_Prior->dataType(type); TDouble1Vec samples_; - TDouble4Vec1Vec weights; + TDoubleWeightsAry1Vec weights; samples_.reserve(samples.size()); weights.reserve(samples.size()); TMeanAccumulator averageTime; for (auto i : valueorder) { samples_.push_back(samples[i].second[0]); - TDouble4Vec1Vec wi(1); - wi[0].reserve(params.priorWeights()[i].size()); - for (const auto& weight : params.priorWeights()[i]) { - wi[0].push_back(weight[0]); + TDoubleWeightsAry1Vec wi(1); + for (std::size_t j = 0u; j < maths_t::NUMBER_WEIGHT_STYLES; ++j) { + wi[0][j] = params.priorWeights()[i][j][0]; } weights.push_back(wi[0]); averageTime.add(static_cast(samples[i].first)); } - m_Prior->addSamples(params.weightStyles(), samples_, weights); + m_Prior->addSamples(samples_, weights); m_Prior->propagateForwardsByTime(params.propagationInterval()); if (m_AnomalyModel) { m_AnomalyModel->propagateForwardsByTime(params.propagationInterval()); @@ -741,21 +742,19 @@ void CUnivariateTimeSeriesModel::skipTime(core_t::TTime gap) { CUnivariateTimeSeriesModel::TDouble2Vec CUnivariateTimeSeriesModel::mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights_) const { - TDouble4Vec weights; - weights.reserve(weights_.size()); - for (const auto& weight : weights_) { - weights.push_back(weight[0]); - } - return {m_Prior->marginalLikelihoodMode(weightStyles, weights) + + const TDouble2VecWeightsAry& weights_) const { + TDoubleWeightsAry weights; + for (std::size_t i = 0u; i < weights_.size(); ++i) { + weights[i] = weights_[i][0]; + } + return {m_Prior->marginalLikelihoodMode(weights) + CBasicStatistics::mean(m_Trend->baseline(time))}; } CUnivariateTimeSeriesModel::TDouble2Vec1Vec CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec1Vec& weights_) const { + const TDouble2VecWeightsAry1Vec& weights_) const { + TDouble2Vec1Vec result; TSize1Vec correlated; @@ -771,15 +770,12 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, for (std::size_t i = 0u; i < correlated.size(); ++i) { baseline[1] = CBasicStatistics::mean( correlatedTimeSeriesModels[i]->m_Trend->baseline(time)); - TDouble10Vec4Vec weights; - weights.resize(weights_[i].size(), TDouble10Vec(2)); + TDouble10VecWeightsAry weights; for (std::size_t j = 0u; j < weights_[i].size(); ++j) { - for (std::size_t d = 0u; d < 2; ++d) { - weights[j][d] = weights_[i][j][d]; - } + weights[j] = weights_[i][j]; } - TDouble10Vec mode(correlationDistributionModels[i].first->marginalLikelihoodMode( - weightStyles, weights)); + TDouble10Vec mode( + correlationDistributionModels[i].first->marginalLikelihoodMode(weights)); result[i][variables[i][0]] = baseline[0] + mode[variables[i][0]]; result[i][variables[i][1]] = baseline[1] + mode[variables[i][1]]; } @@ -789,17 +785,16 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, } CUnivariateTimeSeriesModel::TDouble2Vec1Vec -CUnivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights_) const { +CUnivariateTimeSeriesModel::residualModes(const TDouble2VecWeightsAry& weights_) const { + TDouble2Vec1Vec result; - TDouble4Vec weights; - weights.reserve(weights_.size()); - for (const auto& weight : weights_) { - weights.push_back(weight[0]); + TDoubleWeightsAry weights; + for (std::size_t i = 0u; i < weights_.size(); ++i) { + weights[i] = weights_[i][0]; } - TDouble1Vec modes(m_Prior->marginalLikelihoodModes(weightStyles, weights)); + TDouble1Vec modes(m_Prior->marginalLikelihoodModes(weights)); result.reserve(modes.size()); for (auto mode : modes) { result.push_back({mode}); @@ -811,6 +806,7 @@ CUnivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec& weight void CUnivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time, double confidenceInterval, TDouble2Vec1Vec& value) const { + if (value.empty()) { return; } @@ -842,6 +838,7 @@ CUnivariateTimeSeriesModel::TDouble2Vec CUnivariateTimeSeriesModel::predict(core_t::TTime time, const TSizeDoublePr1Vec& correlatedValue, TDouble2Vec hint) const { + double correlateCorrection{0.0}; if (!correlatedValue.empty()) { TSize1Vec correlated{correlatedValue[0].first}; @@ -888,8 +885,8 @@ CUnivariateTimeSeriesModel::predict(core_t::TTime time, CUnivariateTimeSeriesModel::TDouble2Vec3Vec CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights_) const { + const TDouble2VecWeightsAry& weights_) const { + if (m_Prior->isNonInformative()) { return TDouble2Vec3Vec(); } @@ -900,16 +897,15 @@ CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, ? CBasicStatistics::mean(m_Trend->baseline(time, confidenceInterval)) : 0.0}; - TDouble4Vec weights; - weights.reserve(weights_.size()); - for (const auto& weight : weights_) { - weights.push_back(weight[0]); + TDoubleWeightsAry weights; + for (std::size_t i = 0u; i < weights_.size(); ++i) { + weights[i] = weights_[i][0]; } double median{CBasicStatistics::mean( - m_Prior->marginalLikelihoodConfidenceInterval(0.0, weightStyles, weights))}; - TDoubleDoublePr interval{m_Prior->marginalLikelihoodConfidenceInterval( - confidenceInterval, weightStyles, weights)}; + m_Prior->marginalLikelihoodConfidenceInterval(0.0, weights))}; + TDoubleDoublePr interval{ + m_Prior->marginalLikelihoodConfidenceInterval(confidenceInterval, weights)}; double result[]{scale * (seasonalOffset + interval.first), scale * (seasonalOffset + median), @@ -967,6 +963,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para TTail2Vec& tail, bool& conditional, TSize1Vec& mostAnomalousCorrelate) const { + probability = 1.0; tail.resize(1, maths_t::E_UndeterminedTail); conditional = false; @@ -981,16 +978,14 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para TDouble1Vec sample{m_Trend->detrend(time, value[0][0], params.seasonalConfidenceInterval())}; - TDouble4Vec1Vec weights(1); - weights[0].reserve(params.weights()[0].size()); - for (const auto& weight : params.weights()[0]) { - weights[0].push_back(weight[0]); + TDoubleWeightsAry1Vec weights(1); + for (std::size_t i = 0u; i < params.weights()[0].size(); ++i) { + weights[0][i] = params.weights()[0][i][0]; } double pl, pu; maths_t::ETail tail_; - if (m_Prior->probabilityOfLessLikelySamples(params.calculation(0), - params.weightStyles(), sample, + if (m_Prior->probabilityOfLessLikelySamples(params.calculation(0), sample, weights, pl, pu, tail_)) { LOG_TRACE(<< "P(" << sample << " | weight = " << weights << ", time = " << time << ") = " << (pl + pu) / 2.0); @@ -1029,8 +1024,8 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para // Declared outside the loop to minimize the number of times they are created. TSize10Vec variable(1); TDouble10Vec1Vec sample{TDouble10Vec(2)}; - TDouble10Vec4Vec1Vec weights{ - TDouble10Vec4Vec(params.weightStyles().size(), TDouble10Vec(2))}; + TDouble10VecWeightsAry1Vec weights{ + maths_t::CUnitWeights::singleUnit(2)}; TDouble2Vec probabilityBucketEmpty(2); TDouble10Vec2Vec pli, pui; TTail10Vec ti; @@ -1056,8 +1051,7 @@ bool CUnivariateTimeSeriesModel::probability(const CModelProbabilityParams& para } if (correlationDistributionModels[i].first->probabilityOfLessLikelySamples( - params.calculation(0), params.weightStyles(), sample, - weights, variable, pli, pui, ti)) { + params.calculation(0), sample, weights, variable, pli, pui, ti)) { LOG_TRACE(<< "Marginal P(" << sample << " | weight = " << weights << ", coordinate = " << variable << ") = " << (pli[0][0] + pui[0][0]) / 2.0); @@ -1231,6 +1225,7 @@ bool CUnivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestoreParam } void CUnivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + // Note that we don't persist this->params() or the correlations // because that state is reinitialized. inserter.insertValue(VERSION_6_3_TAG, ""); @@ -1286,9 +1281,8 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeri } CUnivariateTimeSeriesModel::EUpdateResult -CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightStyles, - const TTimeDouble2VecSizeTrVec& samples, - const TDouble2Vec4VecVec& weights) { +CUnivariateTimeSeriesModel::updateTrend(const TTimeDouble2VecSizeTrVec& samples, + const TDouble2VecWeightsAryVec& weights) { for (const auto& sample : samples) { if (sample.second.size() != 1) { LOG_ERROR(<< "Dimension mismatch: '" << sample.second.size() << " != 1'"); @@ -1309,25 +1303,25 @@ CUnivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightSt EUpdateResult result = E_Success; { - TDouble4Vec weight(weightStyles.size()); + TDoubleWeightsAry weight; for (auto i : timeorder) { core_t::TTime time{samples[i].first}; double value{samples[i].second[0]}; for (std::size_t j = 0u; j < weights[i].size(); ++j) { weight[j] = weights[i][j][0]; } - if (m_Trend->addPoint(time, value, weightStyles, weight)) { + if (m_Trend->addPoint(time, value, weight)) { result = E_Reset; } } } if (result == E_Reset) { m_Prior->setToNonInformative(0.0, m_Prior->decayRate()); - TDouble4Vec1Vec weight{{std::max(this->params().learnRate(), - 5.0 / static_cast(SLIDING_WINDOW_SIZE))}}; + TDoubleWeightsAry1Vec weight{maths_t::countWeight( + slidingWindowCountWeight(this->params().learnRate()))}; for (const auto& value : m_SlidingWindow) { TDouble1Vec sample{m_Trend->detrend(value.first, value.second, 0.0)}; - m_Prior->addSamples(CConstantWeights::COUNT, sample, weight); + m_Prior->addSamples(sample, weight); } if (m_Correlations) { m_Correlations->removeTimeSeries(m_Id); @@ -1403,11 +1397,9 @@ CTimeSeriesCorrelations* CTimeSeriesCorrelations::cloneForPersistence() const { return new CTimeSeriesCorrelations(*this, true); } -void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& weightStyles) { - using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItr = - TSizeSizePrMultivariatePriorPtrDoublePrUMap::const_iterator; +void CTimeSeriesCorrelations::processSamples() { using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec = - std::vector; + std::vector; // The priors use a shared pseudo random number generator which // generates a fixed sequence of random numbers. Since the order @@ -1427,7 +1419,7 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei core::CFunctional::SDereference()); TDouble10Vec1Vec multivariateSamples; - TDouble10Vec4Vec1Vec multivariateWeights; + TDouble10VecWeightsAry1Vec multivariateWeights; for (auto i : iterators) { std::size_t pid1{i->first.first}; std::size_t pid2{i->first.second}; @@ -1449,8 +1441,7 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei std::swap(indices[0], indices[1]); } multivariateSamples.assign(n1, TDouble10Vec(2)); - multivariateWeights.assign( - n1, TDouble10Vec4Vec(weightStyles.size(), TDouble10Vec(2))); + multivariateWeights.assign(n1, maths_t::CUnitWeights::unit(2)); TSize1Vec& tags2{samples2->s_Tags}; TTime1Vec& times2{samples2->s_Times}; @@ -1486,7 +1477,7 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei } multivariateSamples[j1][indices[0]] = samples1->s_Samples[j1]; multivariateSamples[j1][indices[1]] = samples2->s_Samples[j2]; - for (std::size_t w = 0u; w < weightStyles.size(); ++w) { + for (std::size_t w = 0u; w < maths_t::NUMBER_WEIGHT_STYLES; ++w) { multivariateWeights[j1][w][indices[0]] = samples1->s_Weights[j1][w]; multivariateWeights[j1][w][indices[1]] = samples2->s_Weights[j2][w]; } @@ -1499,7 +1490,7 @@ void CTimeSeriesCorrelations::processSamples(const maths_t::TWeightStyleVec& wei samples2->s_Type == maths_t::E_IntegerData ? maths_t::E_IntegerData : maths_t::E_ContinuousData); - prior->addSamples(weightStyles, multivariateSamples, multivariateWeights); + prior->addSamples(multivariateSamples, multivariateWeights); prior->propagateForwardsByTime(std::min(samples1->s_Interval, samples2->s_Interval)); prior->decayRate(std::sqrt(samples1->s_Multiplier * samples2->s_Multiplier) * prior->decayRate()); @@ -1675,6 +1666,7 @@ bool CTimeSeriesCorrelations::restoreCorrelatePriors(const SDistributionRestoreP } void CTimeSeriesCorrelations::persistCorrelatePriors(core::CStatePersistInserter& inserter) const { + using TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec = std::vector; TSizeSizePrMultivariatePriorPtrDoublePrUMapCItrVec ordered; @@ -1742,7 +1734,7 @@ void CTimeSeriesCorrelations::removeTimeSeries(std::size_t id) { void CTimeSeriesCorrelations::addSamples(std::size_t id, maths_t::EDataType type, const TTimeDouble2VecSizeTrVec& samples, - const TDouble4Vec1Vec& weights, + const TDoubleWeightsAry1Vec& weights, double interval, double multiplier) { SSampleData& data{m_SampleData[id]}; @@ -1771,6 +1763,7 @@ bool CTimeSeriesCorrelations::correlationModels(std::size_t id, TSize2Vec1Vec& variables, TMultivariatePriorCPtrSizePr1Vec& correlationDistributionModels, TModelCPtr1Vec& correlatedTimeSeriesModels) const { + variables.clear(); correlationDistributionModels.clear(); correlatedTimeSeriesModels.clear(); @@ -1936,8 +1929,7 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, std::size_t dimension{this->dimension()}; - EUpdateResult result{this->updateTrend(params.weightStyles(), samples, - params.trendWeights())}; + EUpdateResult result{this->updateTrend(samples, params.trendWeights())}; for (auto& sample : samples) { if (sample.second.size() != dimension) { @@ -1960,15 +1952,15 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, m_Prior->dataType(type); TDouble10Vec1Vec samples_; - TDouble10Vec4Vec1Vec weights; + TDouble10VecWeightsAry1Vec weights; samples_.reserve(samples.size()); weights.reserve(samples.size()); TMeanAccumulator averageTime; for (auto i : valueorder) { samples_.push_back(samples[i].second); - TDouble10Vec4Vec wi(params.weightStyles().size(), TDouble10Vec(dimension)); - for (std::size_t j = 0u; j < params.priorWeights()[i].size(); ++j) { + TDouble10VecWeightsAry wi(maths_t::CUnitWeights::unit(dimension)); + for (std::size_t j = 0u; j < maths_t::NUMBER_WEIGHT_STYLES; ++j) { const TDouble2Vec& weight{params.priorWeights()[i][j]}; for (std::size_t d = 0u; d < dimension; ++d) { wi[j][d] = weight[d]; @@ -1978,7 +1970,7 @@ CMultivariateTimeSeriesModel::addSamples(const CModelAddSamplesParams& params, averageTime.add(static_cast(samples[i].first)); } - m_Prior->addSamples(params.weightStyles(), samples_, weights); + m_Prior->addSamples(samples_, weights); m_Prior->propagateForwardsByTime(params.propagationInterval()); if (m_AnomalyModel) { m_AnomalyModel->propagateForwardsByTime(params.propagationInterval()); @@ -2037,20 +2029,18 @@ void CMultivariateTimeSeriesModel::skipTime(core_t::TTime gap) { CMultivariateTimeSeriesModel::TDouble2Vec CMultivariateTimeSeriesModel::mode(core_t::TTime time, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights_) const { + const TDouble2VecWeightsAry& weights_) const { + std::size_t dimension = this->dimension(); TDouble2Vec result(dimension); - TDouble10Vec4Vec weights(weights_.size()); + TDouble10VecWeightsAry weights; for (std::size_t i = 0u; i < weights_.size(); ++i) { - for (std::size_t d = 0u; d < dimension; ++d) { - weights[i].push_back(weights_[i][d]); - } + weights[i] = weights_[i]; } - TDouble10Vec mode(m_Prior->marginalLikelihoodMode(weightStyles, weights)); + TDouble10Vec mode(m_Prior->marginalLikelihoodMode(weights)); for (std::size_t d = 0u; d < dimension; ++d) { result[d] = mode[d] + CBasicStatistics::mean(m_Trend[d]->baseline(time)); @@ -2061,20 +2051,18 @@ CMultivariateTimeSeriesModel::mode(core_t::TTime time, CMultivariateTimeSeriesModel::TDouble2Vec1Vec CMultivariateTimeSeriesModel::correlateModes(core_t::TTime /*time*/, - const maths_t::TWeightStyleVec& /*weightStyles*/, - const TDouble2Vec4Vec1Vec& /*weights*/) const { + const TDouble2VecWeightsAry1Vec& /*weights*/) const { return TDouble2Vec1Vec(); } CMultivariateTimeSeriesModel::TDouble2Vec1Vec -CMultivariateTimeSeriesModel::residualModes(const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights_) const { - TDouble10Vec4Vec weights; - weights.reserve(weights_.size()); - for (const auto& weight : weights_) { - weights.emplace_back(weight[0]); - } - TDouble10Vec1Vec modes(m_Prior->marginalLikelihoodModes(weightStyles, weights)); +CMultivariateTimeSeriesModel::residualModes(const TDouble2VecWeightsAry& weights_) const { + + TDouble10VecWeightsAry weights; + for (std::size_t i = 0u; i < weights_.size(); ++i) { + weights[i] = weights_[i]; + } + TDouble10Vec1Vec modes(m_Prior->marginalLikelihoodModes(weights)); TDouble2Vec1Vec result; result.reserve(modes.size()); for (const auto& mode : modes) { @@ -2142,8 +2130,8 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, CMultivariateTimeSeriesModel::TDouble2Vec3Vec CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, double confidenceInterval, - const maths_t::TWeightStyleVec& weightStyles, - const TDouble2Vec4Vec& weights_) const { + const TDouble2VecWeightsAry& weights_) const { + if (m_Prior->isNonInformative()) { return TDouble2Vec3Vec(); } @@ -2160,24 +2148,22 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, TDouble2Vec3Vec result(3, TDouble2Vec(dimension)); - TDouble4Vec weights; + TDoubleWeightsAry weights; for (std::size_t d = 0u; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { double seasonalOffset{m_Trend[d]->initialized() ? CBasicStatistics::mean(m_Trend[d]->baseline(time, confidenceInterval)) : 0.0}; - weights.clear(); - weights.reserve(weights_.size()); - for (const auto& weight : weights_) { - weights.push_back(weight[d]); + for (std::size_t i = 0u; i < maths_t::NUMBER_WEIGHT_STYLES; ++i) { + weights[i] = weights_[i][d]; } TUnivariatePriorPtr marginal{m_Prior->univariate(marginalize, CONDITION).first}; double median{CBasicStatistics::mean( marginal->marginalLikelihoodConfidenceInterval(0.0))}; TDoubleDoublePr interval{marginal->marginalLikelihoodConfidenceInterval( - confidenceInterval, weightStyles, weights)}; + confidenceInterval, weights)}; result[0][d] = scale * (seasonalOffset + interval.first); result[1][d] = scale * (seasonalOffset + median); @@ -2211,6 +2197,7 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams& pa TTail2Vec& tail, bool& conditional, TSize1Vec& mostAnomalousCorrelate) const { + TSize2Vec coordinates(params.coordinates()); if (coordinates.empty()) { coordinates.resize(this->dimension()); @@ -2225,13 +2212,12 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams& pa std::size_t dimension{this->dimension()}; core_t::TTime time{time_[0][0]}; TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; - TDouble10Vec4Vec1Vec weights{ - TDouble10Vec4Vec(params.weightStyles().size(), TDouble10Vec(dimension))}; + TDouble10VecWeightsAry1Vec weights{maths_t::CUnitWeights::unit(dimension)}; for (std::size_t d = 0u; d < dimension; ++d) { sample[0][d] = m_Trend[d]->detrend(time, value[0][d], params.seasonalConfidenceInterval()); } - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { + for (std::size_t i = 0u; i < maths_t::NUMBER_WEIGHT_STYLES; ++i) { for (std::size_t d = 0u; d < dimension; ++d) { weights[0][i][d] = params.weights()[0][i][d]; } @@ -2249,9 +2235,8 @@ bool CMultivariateTimeSeriesModel::probability(const CModelProbabilityParams& pa for (std::size_t i = 0u; i < coordinates.size(); ++i) { maths_t::EProbabilityCalculation calculation = params.calculation(i); coordinate[0] = coordinates[i]; - if (!m_Prior->probabilityOfLessLikelySamples(calculation, params.weightStyles(), - sample, weights, coordinate, - pls, pus, tail_)) { + if (!m_Prior->probabilityOfLessLikelySamples(calculation, sample, weights, + coordinate, pls, pus, tail_)) { LOG_ERROR(<< "Failed to compute P(" << sample << " | weight = " << weights << ")"); return false; } @@ -2292,6 +2277,7 @@ CMultivariateTimeSeriesModel::TDouble2Vec CMultivariateTimeSeriesModel::winsorisationWeight(double derate, core_t::TTime time, const TDouble2Vec& value) const { + TDouble2Vec result(this->dimension()); std::size_t dimension{this->dimension()}; @@ -2409,6 +2395,7 @@ bool CMultivariateTimeSeriesModel::acceptRestoreTraverser(const SModelRestorePar } void CMultivariateTimeSeriesModel::acceptPersistInserter(core::CStatePersistInserter& inserter) const { + // Note that we don't persist this->params() because that state // is reinitialized. inserter.insertValue(VERSION_6_3_TAG, ""); @@ -2450,9 +2437,8 @@ const CMultivariatePrior& CMultivariateTimeSeriesModel::prior() const { } CMultivariateTimeSeriesModel::EUpdateResult -CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weightStyles, - const TTimeDouble2VecSizeTrVec& samples, - const TDouble2Vec4VecVec& weights) { +CMultivariateTimeSeriesModel::updateTrend(const TTimeDouble2VecSizeTrVec& samples, + const TDouble2VecWeightsAryVec& weights) { std::size_t dimension{this->dimension()}; for (const auto& sample : samples) { @@ -2476,15 +2462,15 @@ CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weight EUpdateResult result{E_Success}; { - TDouble4Vec weight(weightStyles.size()); + TDoubleWeightsAry weight; for (auto i : timeorder) { core_t::TTime time{samples[i].first}; TDouble10Vec value(samples[i].second); for (std::size_t d = 0u; d < dimension; ++d) { - for (std::size_t j = 0u; j < weights[i].size(); ++j) { + for (std::size_t j = 0u; j < maths_t::NUMBER_WEIGHT_STYLES; ++j) { weight[j] = weights[i][j][d]; } - if (m_Trend[d]->addPoint(time, value[d], weightStyles, weight)) { + if (m_Trend[d]->addPoint(time, value[d], weight)) { result = E_Reset; } } @@ -2492,15 +2478,14 @@ CMultivariateTimeSeriesModel::updateTrend(const maths_t::TWeightStyleVec& weight } if (result == E_Reset) { m_Prior->setToNonInformative(0.0, m_Prior->decayRate()); - TDouble10Vec4Vec1Vec weight{{TDouble10Vec( - dimension, std::max(this->params().learnRate(), - 5.0 / static_cast(SLIDING_WINDOW_SIZE)))}}; + TDouble10VecWeightsAry1Vec weight{maths_t::countWeight( + slidingWindowCountWeight(this->params().learnRate()), dimension)}; for (const auto& value : m_SlidingWindow) { TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; for (std::size_t i = 0u; i < dimension; ++i) { sample[0][i] = m_Trend[i]->detrend(value.first, value.second[i], 0.0); } - m_Prior->addSamples(CConstantWeights::COUNT, sample, weight); + m_Prior->addSamples(sample, weight); } if (m_Controllers) { m_Prior->decayRate(m_Prior->decayRate() / diff --git a/lib/maths/CTools.cc b/lib/maths/CTools.cc index c837590b2c..898e6158d4 100644 --- a/lib/maths/CTools.cc +++ b/lib/maths/CTools.cc @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/lib/maths/CXMeansOnline1d.cc b/lib/maths/CXMeansOnline1d.cc index d11d5d9385..8768aa7a20 100644 --- a/lib/maths/CXMeansOnline1d.cc +++ b/lib/maths/CXMeansOnline1d.cc @@ -55,8 +55,6 @@ namespace maths { namespace { using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; using TDoubleDoublePr = std::pair; using TSizeVec = std::vector; using TTuple = CNaturalBreaksClassifier::TTuple; @@ -100,7 +98,7 @@ maths_t::EFloatingPointErrorStatus logLikelihoodFromCluster(double point, double likelihood; maths_t::EFloatingPointErrorStatus status = normal.jointLogMarginalLikelihood( - CConstantWeights::COUNT, {point}, CConstantWeights::SINGLE_UNIT, likelihood); + {point}, maths_t::CUnitWeights::SINGLE_UNIT, likelihood); if (status & maths_t::E_FpFailed) { LOG_ERROR(<< "Unable to compute likelihood for: " << point); return status; @@ -198,6 +196,7 @@ void BICGain(maths_t::EDataType dataType, double& distance, double& nl, double& nr) { + // The basic idea is to compute the difference between the // Bayes Information Content (BIC) for one and two clusters // for the sketch defined by the categories passed to this @@ -416,6 +415,7 @@ void BICGain(maths_t::EDataType dataType, //! \param[in] interval The Winsorisation interval. //! \param[in,out] category The category to Winsorise. void winsorise(const TDoubleDoublePr& interval, TTuple& category) { + double a = interval.first; double b = interval.second; double m = CBasicStatistics::mean(category); @@ -486,6 +486,7 @@ bool splitSearch(double minimumCount, double smallest, const TTupleVec& categories, TSizeVec& result) { + using TSizeSizePr = std::pair; LOG_TRACE(<< "begin split search"); @@ -548,11 +549,11 @@ bool splitSearch(double minimumCount, if (!satisfiesCount) { // Recurse to the (one) node with sufficient count. if (nl > minimumCount && candidate[0] - node.first > 1) { - node = std::make_pair(node.first, candidate[0]); + node = {node.first, candidate[0]}; continue; } if (nr > minimumCount && node.second - candidate[0] > 1) { - node = std::make_pair(candidate[0], node.second); + node = {candidate[0], node.second}; continue; } } else if (satisfiesDistance) { @@ -803,6 +804,7 @@ bool CXMeansOnline1d::clusterSpread(std::size_t index, double& result) const { } void CXMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, double count) const { + result.clear(); if (m_Clusters.empty()) { @@ -810,8 +812,8 @@ void CXMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, do return; } - TClusterVecCItr rightCluster = std::lower_bound( - m_Clusters.begin(), m_Clusters.end(), point, detail::SClusterCentreLess()); + auto rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), + point, detail::SClusterCentreLess()); if (rightCluster == m_Clusters.end()) { --rightCluster; @@ -839,7 +841,7 @@ void CXMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, do // also that we do not want to soft assign the point to a // cluster if its probability is close to zero. - TClusterVecCItr leftCluster = rightCluster; + auto leftCluster = rightCluster; --leftCluster; double likelihoodLeft = leftCluster->logLikelihoodFromCluster(m_WeightCalc, point); double likelihoodRight = rightCluster->logLikelihoodFromCluster(m_WeightCalc, point); @@ -863,14 +865,15 @@ void CXMeansOnline1d::cluster(const double& point, TSizeDoublePr2Vec& result, do } void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, double count) { + m_HistoryLength += 1.0; m_Smallest.add(point); m_Largest.add(point); clusters.clear(); - TClusterVecItr rightCluster = std::lower_bound( - m_Clusters.begin(), m_Clusters.end(), point, detail::SClusterCentreLess()); + auto rightCluster = std::lower_bound(m_Clusters.begin(), m_Clusters.end(), + point, detail::SClusterCentreLess()); if (rightCluster == m_Clusters.end()) { --rightCluster; @@ -880,7 +883,7 @@ void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub if (this->maybeSplit(rightCluster)) { this->cluster(point, clusters, count); } else if (rightCluster != m_Clusters.begin()) { - TClusterVecItr leftCluster = rightCluster; + auto leftCluster = rightCluster; --leftCluster; if (this->maybeMerge(leftCluster, rightCluster)) { this->cluster(point, clusters, count); @@ -893,7 +896,7 @@ void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub if (this->maybeSplit(rightCluster)) { this->cluster(point, clusters, count); } else { - TClusterVecItr leftCluster = rightCluster; + auto leftCluster = rightCluster; ++rightCluster; if (this->maybeMerge(leftCluster, rightCluster)) { this->cluster(point, clusters, count); @@ -902,7 +905,7 @@ void CXMeansOnline1d::add(const double& point, TSizeDoublePr2Vec& clusters, doub } else { // See the cluster member function for more details on // soft assignment. - TClusterVecItr leftCluster = rightCluster; + auto leftCluster = rightCluster; --leftCluster; double likelihoodLeft = leftCluster->logLikelihoodFromCluster(m_WeightCalc, point); double likelihoodRight = rightCluster->logLikelihoodFromCluster(m_WeightCalc, point); @@ -1034,6 +1037,7 @@ const CXMeansOnline1d::TClusterVec& CXMeansOnline1d::clusters() const { } std::string CXMeansOnline1d::printClusters() const { + if (m_Clusters.empty()) { return std::string(); } @@ -1061,10 +1065,7 @@ std::string CXMeansOnline1d::printClusters() const { weightSum += m_Clusters[i].weight(m_WeightCalc); } - static const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); - static const TDouble4Vec1Vec UNIT_WEIGHT(1, TDouble4Vec(1, 1.0)); - - TDouble1Vec x(1, range.first); + TDouble1Vec x{range.first}; double increment = (range.second - range.first) / (POINTS - 1.0); std::ostringstream coordinatesStr; @@ -1076,7 +1077,7 @@ std::string CXMeansOnline1d::printClusters() const { for (std::size_t j = 0u; j < m_Clusters.size(); ++j) { double logLikelihood; const CPrior& prior = m_Clusters[j].prior(); - if (!(prior.jointLogMarginalLikelihood(COUNT_WEIGHT, x, UNIT_WEIGHT, logLikelihood) & + if (!(prior.jointLogMarginalLikelihood(x, maths_t::CUnitWeights::SINGLE_UNIT, logLikelihood) & (maths_t::E_FpFailed | maths_t::E_FpOverflowed))) { likelihood += m_Clusters[j].weight(m_WeightCalc) / weightSum * std::exp(logLikelihood); @@ -1153,6 +1154,7 @@ double CXMeansOnline1d::minimumSplitCount() const { } bool CXMeansOnline1d::maybeSplit(TClusterVecItr cluster) { + if (cluster == m_Clusters.end()) { return false; } @@ -1174,6 +1176,7 @@ bool CXMeansOnline1d::maybeSplit(TClusterVecItr cluster) { } bool CXMeansOnline1d::maybeMerge(TClusterVecItr cluster1, TClusterVecItr cluster2) { + if (cluster1 == m_Clusters.end() || cluster2 == m_Clusters.end()) { return false; } @@ -1196,6 +1199,7 @@ bool CXMeansOnline1d::maybeMerge(TClusterVecItr cluster1, TClusterVecItr cluster } bool CXMeansOnline1d::prune() { + if (m_Clusters.size() <= 1) { return false; } @@ -1225,13 +1229,14 @@ bool CXMeansOnline1d::prune() { } TDoubleDoublePr CXMeansOnline1d::winsorisationInterval() const { + double f = (1.0 - m_WinsorisationConfidenceInterval) / 2.0; if (f * this->count() < 1.0) { // Don't bother if we don't expect a sample outside the // Winsorisation interval. - return std::make_pair(boost::numeric::bounds::lowest() / 2.0, - boost::numeric::bounds::highest() / 2.0); + return {boost::numeric::bounds::lowest() / 2.0, + boost::numeric::bounds::highest() / 2.0}; } // The Winsorisation interval are the positions corresponding @@ -1314,8 +1319,7 @@ void CXMeansOnline1d::CCluster::dataType(maths_t::EDataType dataType) { } void CXMeansOnline1d::CCluster::add(double point, double count) { - m_Prior.addSamples(CConstantWeights::COUNT, TDouble1Vec(1, point), - TDouble4Vec1Vec(1, TDouble4Vec(1, count))); + m_Prior.addSamples({point}, {maths_t::countWeight(count)}); m_Structure.add(point, count); } @@ -1382,6 +1386,7 @@ CXMeansOnline1d::CCluster::split(CAvailableModeDistributions distributions, double smallest, const TDoubleDoublePr& interval, CIndexGenerator& indexGenerator) { + // We do our clustering top down to minimize space and avoid // making splits before we are confident they exist. This is // important for anomaly detection because we do *not* want @@ -1446,6 +1451,7 @@ bool CXMeansOnline1d::CCluster::shouldMerge(CCluster& other, CAvailableModeDistributions distributions, double smallest, const TDoubleDoublePr& interval) { + if (m_Structure.buffering() || m_Structure.size() == 0 || other.m_Structure.size() == 0) { return false; @@ -1478,6 +1484,7 @@ bool CXMeansOnline1d::CCluster::shouldMerge(CCluster& other, CXMeansOnline1d::CCluster CXMeansOnline1d::CCluster::merge(CCluster& other, CIndexGenerator& indexGenerator) { + TTupleVec left, right; m_Structure.categories(1, 0, left); other.m_Structure.categories(1, 0, right); diff --git a/lib/maths/Constants.cc b/lib/maths/Constants.cc index 4961461cd0..c3a062a41d 100644 --- a/lib/maths/Constants.cc +++ b/lib/maths/Constants.cc @@ -18,14 +18,6 @@ namespace ml { namespace maths { -const maths_t::TWeightStyleVec CConstantWeights::COUNT{maths_t::E_SampleCountWeight}; -const maths_t::TWeightStyleVec CConstantWeights::COUNT_VARIANCE{ - maths_t::E_SampleCountVarianceScaleWeight}; -const maths_t::TWeightStyleVec CConstantWeights::SEASONAL_VARIANCE{ - maths_t::E_SampleSeasonalVarianceScaleWeight}; -const CConstantWeights::TDouble4Vec CConstantWeights::UNIT{1.0}; -const CConstantWeights::TDouble4Vec1Vec CConstantWeights::SINGLE_UNIT{UNIT}; - double maxModelPenalty(double numberSamples) { return 10.0 + numberSamples; } diff --git a/lib/maths/MathsTypes.cc b/lib/maths/MathsTypes.cc index 400a375824..a6d2a097cd 100644 --- a/lib/maths/MathsTypes.cc +++ b/lib/maths/MathsTypes.cc @@ -28,320 +28,120 @@ namespace ml { namespace maths_t { namespace { -namespace detail { - -//! Check that the weights styles and weights are consistent. -template -inline bool check(const TWeightStyleVec& weightStyles, - const core::CSmallVector& weights) { - if (weightStyles.size() == weights.size()) { - return true; - } - LOG_ERROR(<< "Mismatch in weight styles '" << core::CContainerPrinter::print(weightStyles) - << "' and weights '" << core::CContainerPrinter::print(weights) << "'"); - return false; +TDoubleWeightsAry unitWeight() { + TDoubleWeightsAry result; + result.assign(1.0); + return result; } - -//! Multiply \p lhs by \p rhs. -inline void multiplyEquals(double rhs, double& lhs) { - lhs *= rhs; } -//! Elementwise multiply \p lhs by \p rhs. -inline void multiplyEquals(const TDouble10Vec& rhs, TDouble10Vec& lhs) { - for (std::size_t i = 0u; i < lhs.size(); ++i) { - lhs[i] *= rhs[i]; - } -} +const TDoubleWeightsAry CUnitWeights::UNIT(unitWeight()); +const TDoubleWeightsAry1Vec CUnitWeights::SINGLE_UNIT{unitWeight()}; -//! Extract the effective sample count from a collection of weights. -template -void count(const TWeightStyleVec& weightStyles, const core::CSmallVector& weights, T& result) { - if (check(weightStyles, weights)) { - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - multiplyEquals(weights[i], result); - return; - case E_SampleSeasonalVarianceScaleWeight: - break; - case E_SampleCountVarianceScaleWeight: - break; - case E_SampleWinsorisationWeight: - break; - } - } - } +TDoubleWeightsAry countWeight(double weight) { + TDoubleWeightsAry result(CUnitWeights::UNIT); + result[E_SampleCountWeight] = weight; + return result; } -//! Extract the effective sample count with which to update a model -//! from a collection of weights. -template -void countForUpdate(const TWeightStyleVec& weightStyles, - const core::CSmallVector& weights, - T& result) { - if (check(weightStyles, weights)) { - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - multiplyEquals(weights[i], result); - break; - case E_SampleSeasonalVarianceScaleWeight: - break; - case E_SampleCountVarianceScaleWeight: - break; - case E_SampleWinsorisationWeight: - multiplyEquals(weights[i], result); - break; - } - } - } +TDouble10VecWeightsAry countWeight(double weight, std::size_t dimension) { + TDouble10VecWeightsAry result(CUnitWeights::unit(dimension)); + result[E_SampleCountWeight] = TDouble10Vec(dimension, weight); + return result; } -//! Extract the variance scale from a collection of weights. -template -void seasonalVarianceScale(const TWeightStyleVec& weightStyles, - const core::CSmallVector& weights, - T& result) { - if (check(weightStyles, weights)) { - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - break; - case E_SampleSeasonalVarianceScaleWeight: - multiplyEquals(weights[i], result); - return; - case E_SampleCountVarianceScaleWeight: - break; - case E_SampleWinsorisationWeight: - break; - } - } - } +void setCount(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights) { + weights[E_SampleCountWeight] = TDouble10Vec(dimension, weight); } -//! Extract the variance scale from a collection of weights. -template -void countVarianceScale(const TWeightStyleVec& weightStyles, - const core::CSmallVector& weights, - T& result) { - if (check(weightStyles, weights)) { - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - break; - case E_SampleSeasonalVarianceScaleWeight: - break; - case E_SampleCountVarianceScaleWeight: - multiplyEquals(weights[i], result); - return; - case E_SampleWinsorisationWeight: - break; - } - } - } -} -} +double countForUpdate(const TDoubleWeightsAry& weights) { + return weights[E_SampleCountWeight] * weights[E_SampleWinsorisationWeight]; } -double count(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - double result = 1.0; - detail::count(weightStyles, weights, result); - if (!maths::CMathsFuncs::isFinite(result) || result < 0.0) { - throw std::runtime_error("Bad count weight " + - core::CStringUtils::typeToString(result)); +TDouble10Vec countForUpdate(const TDouble10VecWeightsAry& weights) { + TDouble10Vec result(weights[E_SampleCountWeight]); + for (std::size_t i = 0u; i < weights[E_SampleWinsorisationWeight].size(); ++i) { + result[i] *= weights[E_SampleWinsorisationWeight][i]; } return result; } -TDouble10Vec count(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - TDouble10Vec result(dimension, 1.0); - detail::count(weightStyles, weights, result); - for (std::size_t i = 0u; i < dimension; ++i) { - if (!maths::CMathsFuncs::isFinite(result[i]) || result[i] < 0.0) { - throw std::runtime_error("Bad count weight: [" + - core::CContainerPrinter::print(result) + "]"); - } - } +TDoubleWeightsAry winsorisationWeight(double weight) { + TDoubleWeightsAry result(CUnitWeights::UNIT); + result[E_SampleWinsorisationWeight] = weight; return result; } -double countForUpdate(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - double result = 1.0; - detail::countForUpdate(weightStyles, weights, result); - if (!maths::CMathsFuncs::isFinite(result) || result < 0.0) { - throw std::runtime_error("Bad count weight " + - core::CStringUtils::typeToString(result)); - } +TDouble10VecWeightsAry winsorisationWeight(double weight, std::size_t dimension) { + TDouble10VecWeightsAry result(CUnitWeights::unit(dimension)); + result[E_SampleWinsorisationWeight] = TDouble10Vec(dimension, weight); return result; } -TDouble10Vec countForUpdate(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - TDouble10Vec result(dimension, 1.0); - detail::countForUpdate(weightStyles, weights, result); - for (std::size_t i = 0u; i < dimension; ++i) { - if (!maths::CMathsFuncs::isFinite(result[i]) || result[i] < 0.0) { - throw std::runtime_error("Bad count weight: [" + - core::CContainerPrinter::print(result) + "]"); - } - } - return result; +void setWinsorisationWeight(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights) { + weights[E_SampleWinsorisationWeight] = TDouble10Vec(dimension, weight); } -double seasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - double result = 1.0; - detail::seasonalVarianceScale(weightStyles, weights, result); - if (!maths::CMathsFuncs::isFinite(result) || result <= 0.0) { - throw std::runtime_error("Bad variance scale " + - core::CStringUtils::typeToString(result)); - } - return result; +bool isWinsorised(const TDoubleWeightsAry& weights) { + return weights[E_SampleWinsorisationWeight] != 1.0; } -TDouble10Vec seasonalVarianceScale(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - TDouble10Vec result(dimension, 1.0); - detail::seasonalVarianceScale(weightStyles, weights, result); - for (std::size_t i = 0u; i < dimension; ++i) { - if (!maths::CMathsFuncs::isFinite(result[i]) || result[i] <= 0.0) { - throw std::runtime_error("Bad count weight: [" + - core::CContainerPrinter::print(result) + "]"); - } - } - return result; +bool isWinsorised(const TDoubleWeightsAry1Vec& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TDoubleWeightsAry& weight) { + return isWinsorised(weight); + }); } -double countVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - double result = 1.0; - detail::countVarianceScale(weightStyles, weights, result); - if (!maths::CMathsFuncs::isFinite(result) || result <= 0.0) { - throw std::runtime_error("Bad variance scale " + - core::CStringUtils::typeToString(result)); - } +TDoubleWeightsAry seasonalVarianceScaleWeight(double weight) { + TDoubleWeightsAry result(CUnitWeights::UNIT); + result[E_SampleSeasonalVarianceScaleWeight] = weight; return result; } -TDouble10Vec countVarianceScale(std::size_t dimension, - const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - TDouble10Vec result(dimension, 1.0); - detail::countVarianceScale(weightStyles, weights, result); - for (std::size_t i = 0u; i < dimension; ++i) { - if (!maths::CMathsFuncs::isFinite(result[i]) || result[i] <= 0.0) { - throw std::runtime_error("Bad count weight: [" + - core::CContainerPrinter::print(result) + "]"); - } - } +TDouble10VecWeightsAry seasonalVarianceScaleWeight(double weight, std::size_t dimension) { + TDouble10VecWeightsAry result(CUnitWeights::unit(dimension)); + result[E_SampleSeasonalVarianceScaleWeight] = TDouble10Vec(dimension, weight); return result; } -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - try { - return seasonalVarianceScale(weightStyles, weights) != 1.0; - } catch (const std::exception&) {} - return true; +void setSeasonalVarianceScale(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights) { + weights[E_SampleSeasonalVarianceScaleWeight] = TDouble10Vec(dimension, weight); } -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble4Vec1Vec& weights) { - for (std::size_t i = 0u; i < weights.size(); ++i) { - if (hasSeasonalVarianceScale(weightStyles, weights[i])) { - return true; - } - } - return false; +bool hasSeasonalVarianceScale(const TDoubleWeightsAry& weights) { + return weights[E_SampleSeasonalVarianceScaleWeight] != 1.0; } -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - if (!detail::check(weightStyles, weights)) { - return false; - } - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - break; - case E_SampleSeasonalVarianceScaleWeight: - for (std::size_t j = 0u; j < weights[i].size(); ++j) { - if (weights[i][j] != 1.0) { - return true; - } - } - break; - case E_SampleCountVarianceScaleWeight: - break; - case E_SampleWinsorisationWeight: - break; - } - } - return false; +bool hasSeasonalVarianceScale(const TDoubleWeightsAry1Vec& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TDoubleWeightsAry& weight) { + return hasSeasonalVarianceScale(weight); + }); } -bool hasSeasonalVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec1Vec& weights) { - for (std::size_t i = 0u; i < weights.size(); ++i) { - if (hasSeasonalVarianceScale(weightStyles, weights[i])) { - return true; - } - } - return false; +TDoubleWeightsAry countVarianceScaleWeight(double weight) { + TDoubleWeightsAry result(CUnitWeights::UNIT); + result[E_SampleCountVarianceScaleWeight] = weight; + return result; } -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, const TDouble4Vec& weights) { - try { - return countVarianceScale(weightStyles, weights) != 1.0; - } catch (const std::exception&) {} - return true; +TDouble10VecWeightsAry countVarianceScaleWeight(double weight, std::size_t dimension) { + TDouble10VecWeightsAry result(CUnitWeights::unit(dimension)); + result[E_SampleCountVarianceScaleWeight] = TDouble10Vec(dimension, weight); + return result; } -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble4Vec1Vec& weights) { - for (std::size_t i = 0u; i < weights.size(); ++i) { - if (hasCountVarianceScale(weightStyles, weights[i])) { - return true; - } - } - return false; +void setCountVarianceScale(double weight, std::size_t dimension, TDouble10VecWeightsAry& weights) { + weights[E_SampleCountVarianceScaleWeight] = TDouble10Vec(dimension, weight); } -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec& weights) { - if (!detail::check(weightStyles, weights)) { - return false; - } - for (std::size_t i = 0u; i < weightStyles.size(); ++i) { - switch (weightStyles[i]) { - case E_SampleCountWeight: - break; - case E_SampleSeasonalVarianceScaleWeight: - break; - case E_SampleCountVarianceScaleWeight: - for (std::size_t j = 0u; j < weights[i].size(); ++j) { - if (weights[i][j] != 1.0) { - return true; - } - } - break; - case E_SampleWinsorisationWeight: - break; - } - } - return false; +bool hasCountVarianceScale(const TDoubleWeightsAry& weights) { + return weights[E_SampleCountVarianceScaleWeight] != 1.0; } -bool hasCountVarianceScale(const TWeightStyleVec& weightStyles, - const TDouble10Vec4Vec1Vec& weights) { - for (std::size_t i = 0u; i < weights.size(); ++i) { - if (hasCountVarianceScale(weightStyles, weights[i])) { - return true; - } - } - return false; +bool hasCountVarianceScale(const TDoubleWeightsAry1Vec& weights) { + return std::any_of(weights.begin(), weights.end(), [](const TDoubleWeightsAry& weight) { + return hasCountVarianceScale(weight); + }); } } } diff --git a/lib/maths/unittest/CForecastTest.cc b/lib/maths/unittest/CForecastTest.cc index 9661b0275a..83ea6655cf 100644 --- a/lib/maths/unittest/CForecastTest.cc +++ b/lib/maths/unittest/CForecastTest.cc @@ -48,8 +48,7 @@ using TDoubleVec = std::vector; using TTimeDoublePr = std::pair; using TTimeDoublePrVec = std::vector; using TDouble2Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble2Vec4VecVec = std::vector; +using TDouble2VecWeightsAryVec = std::vector; using TTimeDouble2VecSizeTr = core::CTriple; using TTimeDouble2VecSizeTrVec = std::vector; using TErrorBarVec = std::vector; @@ -257,7 +256,8 @@ void CForecastTest::testNonNegative() { //TDoubleVec uy; core_t::TTime time{0}; - TDouble2Vec4VecVec weights{{{1.0}}}; + std::vector weights{ + maths_t::CUnitWeights::unit(1)}; for (std::size_t d = 0u; d < 20; ++d) { TDoubleVec noise; rng.generateNormalSamples(2.0, 3.0, 48, noise); @@ -266,7 +266,6 @@ void CForecastTest::testNonNegative() { params.integer(false) .nonNegative(true) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); double y{std::max(*value, 0.0)}; @@ -358,14 +357,10 @@ void CForecastTest::testFinancialIndex() { std::size_t n{5 * timeseries.size() / 6}; - TDouble2Vec4VecVec weights{{{1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; for (std::size_t i = 0u; i < n; ++i) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples( params, {core::make_triple(timeseries[i].first, TDouble2Vec{timeseries[i].second}, TAG)}); @@ -466,18 +461,14 @@ void CForecastTest::test(TTrend trend, &controllers); core_t::TTime time{0}; - TDouble2Vec4VecVec weights{{{1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; for (std::size_t d = 0u; d < daysToLearn; ++d) { TDoubleVec noise; rng.generateNormalSamples(0.0, noiseVariance, 86400 / bucketLength, noise); for (std::size_t i = 0u; i < noise.size(); ++i, time += bucketLength) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); double yi{trend(time, noise[i])}; model.addSamples(params, {core::make_triple(time, TDouble2Vec{yi}, TAG)}); //actual.push_back(yi); diff --git a/lib/maths/unittest/CGammaRateConjugateTest.cc b/lib/maths/unittest/CGammaRateConjugateTest.cc index 153c6568c4..8fe923c8f8 100644 --- a/lib/maths/unittest/CGammaRateConjugateTest.cc +++ b/lib/maths/unittest/CGammaRateConjugateTest.cc @@ -50,6 +50,7 @@ using TDoubleDoublePrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CGammaRateConjugate = CPriorTestInterfaceMixin; +using TWeightFunc = maths_t::TDoubleWeightsAry (*)(double); CGammaRateConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& offset = 0.0, @@ -66,7 +67,7 @@ void CGammaRateConjugateTest::testMultipleUpdate() { // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. - const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; + const maths_t::EDataType dataTypes[]{maths_t::E_IntegerData, maths_t::E_ContinuousData}; const double shape = 2.0; const double scale = 3.0; @@ -81,7 +82,7 @@ void CGammaRateConjugateTest::testMultipleUpdate() { CGammaRateConjugate filter2(filter1); for (std::size_t j = 0; j < samples.size(); ++j) { - filter1.addSamples(TDouble1Vec(1, samples[j])); + filter1.addSamples(TDouble1Vec{samples[j]}); } filter2.addSamples(samples); @@ -100,13 +101,13 @@ void CGammaRateConjugateTest::testMultipleUpdate() { filter1.addSamples(samples); CGammaRateConjugate filter2(filter1); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { - filter1.addSamples(weightStyle, TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + filter1.addSamples({scaledSamples[j]}, + {ml::maths_t::countVarianceScaleWeight(2.0)}); } - filter2.addSamples(weightStyle, scaledSamples, - TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(scaledSamples, maths_t::TDoubleWeightsAry1Vec( + scaledSamples.size(), + maths_t::countVarianceScaleWeight(2.0))); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_RelativeTolerance, 0.03); @@ -123,11 +124,9 @@ void CGammaRateConjugateTest::testMultipleUpdate() { std::size_t count = 10; for (std::size_t j = 0u; j < count; ++j) { - filter1.addSamples(TDouble1Vec(1, x)); + filter1.addSamples(TDouble1Vec{x}); } - filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, x), - TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); + filter2.addSamples({x}, {maths_t::countWeight(static_cast(count))}); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_RelativeTolerance, 0.01); @@ -315,17 +314,14 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { rng.generateGammaSamples(shape, scale, 200, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight}; - double weights[] = {0.1, 1.0, 10.0}; + TWeightFunc weightsFuncs[]{static_cast(maths_t::countWeight), + static_cast(maths_t::winsorisationWeight)}; + double weights[]{0.1, 0.9, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t i = 0u; i < boost::size(weightsFuncs); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf( - maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 1000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); + filter.minusLogJointCdf({1000.0}, {weightsFuncs[i](weights[j])}, lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -474,13 +470,12 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 4e-3); } { - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); TMeanAccumulator totalError; for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { TMeanAccumulator error; double vs = varianceScales[i]; - weight[0] = vs; + maths_t::setCountVarianceScale(vs, weight); LOG_DEBUG(<< "*** vs = " << vs << " ***"); for (std::size_t j = 0u; j < boost::size(percentages); ++j) { boost::math::gamma_distribution<> scaledGamma(shape / vs, vs * scale); @@ -488,8 +483,8 @@ void CGammaRateConjugateTest::testMarginalLikelihood() { scaledGamma, (50.0 - percentages[j] / 2.0) / 100.0); double q2 = boost::math::quantile( scaledGamma, (50.0 + percentages[j] / 2.0) / 100.0); - TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval( - percentages[j], weightStyle, weight); + TDoubleDoublePr interval = + filter.marginalLikelihoodConfidenceInterval(percentages[j], weight); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.4); @@ -578,22 +573,19 @@ void CGammaRateConjugateTest::testMarginalLikelihoodMode() { filter.addSamples(samples); TMeanAccumulator relativeError; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; - weight[0] = vs; + maths_t::setCountVarianceScale(vs, weight); boost::math::gamma_distribution<> scaledGamma(shapes[i] / vs, vs * scales[j]); double expectedMode = boost::math::mode(scaledGamma); - LOG_DEBUG(<< "marginalLikelihoodMode = " - << filter.marginalLikelihoodMode(weightStyle, weight) + LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weight) << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), - 0.28 * expectedMode + 0.3); - double error = std::fabs( - filter.marginalLikelihoodMode(weightStyle, weight) - expectedMode); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, + filter.marginalLikelihoodMode(weight), + 0.28 * expectedMode + 0.3); + double error = std::fabs(filter.marginalLikelihoodMode(weight) - expectedMode); relativeError.add(error == 0.0 ? 0.0 : error / expectedMode); } LOG_DEBUG(<< "relativeError = " << maths::CBasicStatistics::mean(relativeError)); @@ -778,7 +770,6 @@ void CGammaRateConjugateTest::testCdf() { filter.addSamples(samples); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountWeight); double lowerBound; double upperBound; CPPUNIT_ASSERT(!filter.minusLogJointCdf(TDouble1Vec(), lowerBound, upperBound)); @@ -876,11 +867,9 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() { meanError.add(std::fabs(px - (lb + ub) / 2.0)); } - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble1Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode( + maths_t::countVarianceScaleWeight(vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -890,42 +879,52 @@ void CGammaRateConjugateTest::testProbabilityOfLessLikelySamples() { { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[0]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[1]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -1276,9 +1275,7 @@ void CGammaRateConjugateTest::testPersist() { maths::CGammaRateConjugate origFilter(makePrior(maths_t::E_ContinuousData, 0.1)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); @@ -1339,10 +1336,11 @@ void CGammaRateConjugateTest::testVarianceScale() { // Finally, we test update with scaled samples produces the // correct posterior. - maths_t::ESampleWeightStyle scales[] = {maths_t::E_SampleSeasonalVarianceScaleWeight, - maths_t::E_SampleCountVarianceScaleWeight}; + TWeightFunc weightsFuncs[]{ + static_cast(maths_t::seasonalVarianceScaleWeight), + static_cast(maths_t::countVarianceScaleWeight)}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) { + for (std::size_t s = 0u; s < boost::size(weightsFuncs); ++s) { const double shape = 3.0; const double scale = 3.0; @@ -1416,10 +1414,8 @@ void CGammaRateConjugateTest::testVarianceScale() { double lowerBound, upperBound; maths_t::ETail tail; CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), - lowerBound, upperBound, tail)); + maths_t::E_TwoSided, {scaledSamples[j]}, + {weightsFuncs[s](varianceScales[i])}, lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1478,12 +1474,10 @@ void CGammaRateConjugateTest::testVarianceScale() { for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL( - maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood( - maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood( + {scaledSamples[j]}, + {weightsFuncs[s](varianceScales[i])}, logLikelihood)); differentialEntropy -= logLikelihood; } @@ -1504,7 +1498,7 @@ void CGammaRateConjugateTest::testVarianceScale() { const double maximumMeanMeanError[] = {0.01, 0.01}; const double maximumMeanVarianceError[] = {0.08, 0.05}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) { + for (std::size_t s = 0u; s < boost::size(weightsFuncs); ++s) { for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { const double shapes[] = {1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; @@ -1512,9 +1506,8 @@ void CGammaRateConjugateTest::testVarianceScale() { 1000.0, 100000.0, 1000000.0}; const double varianceScales[] = {0.1, 0.5, 1.0, 2.0, 10.0, 100.0}; - maths_t::TWeightStyleVec weightStyle(1, scales[s]); TDoubleVec samples; - TDouble4Vec1Vec weights; + maths_t::TDoubleWeightsAry1Vec weights; test::CRandomNumbers rng; @@ -1553,13 +1546,13 @@ void CGammaRateConjugateTest::testVarianceScale() { rng.generateGammaSamples(shape, 1.0 / rate, 200, samples); weights.clear(); - weights.resize(samples.size(), TDouble4Vec(1, 1.0)); - filter.addSamples(weightStyle, samples, weights); + weights.resize(samples.size(), maths_t::CUnitWeights::UNIT); + filter.addSamples(samples, weights); rng.generateGammaSamples(scaledShape, 1.0 / scaledRate, 200, samples); weights.clear(); - weights.resize(samples.size(), TDouble4Vec(1, scale)); - filter.addSamples(weightStyle, samples, weights); + weights.resize(samples.size(), weightsFuncs[s](scale)); + filter.addSamples(samples, weights); double estimatedMean = filter.likelihoodShape() / filter.likelihoodRate(); diff --git a/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc b/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc index 8b64f11ea7..1776abcae2 100644 --- a/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc +++ b/lib/maths/unittest/CLogNormalMeanPrecConjugateTest.cc @@ -51,6 +51,7 @@ using TDoubleDoublePrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CLogNormalMeanPrecConjugate = CPriorTestInterfaceMixin; +using TWeightFunc = maths_t::TDoubleWeightsAry (*)(double); CLogNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& offset = 0.0, @@ -84,7 +85,7 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() { CLogNormalMeanPrecConjugate filter2(filter1); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter1.addSamples(TDouble1Vec(1, samples[j])); + filter1.addSamples(TDouble1Vec{samples[j]}); } filter2.addSamples(samples); @@ -112,13 +113,12 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() { filter1.addSamples(samples); CLogNormalMeanPrecConjugate filter2(filter1); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); + maths_t::TDoubleWeightsAry1Vec weights; + weights.resize(samples.size(), maths_t::countVarianceScaleWeight(2.0)); for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { - filter1.addSamples(weightStyle, TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + filter1.addSamples({scaledSamples[j]}, {weights[j]}); } - filter2.addSamples(weightStyle, scaledSamples, - TDouble4Vec1Vec(scaledSamples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(scaledSamples, weights); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -135,13 +135,10 @@ void CLogNormalMeanPrecConjugateTest::testMultipleUpdate() { double x = 3.0; std::size_t count = 10; - for (std::size_t j = 0u; j < count; ++j) { - filter1.addSamples(TDouble1Vec(1, x)); + filter1.addSamples(TDouble1Vec{x}); } - filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, x), - TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); + filter2.addSamples({x}, {maths_t::countWeight(static_cast(count))}); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -335,17 +332,14 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { rng.generateLogNormalSamples(location, squareScale, 200, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight}; - double weights[] = {0.1, 1.0, 10.0}; + TWeightFunc weightsFuncs[]{static_cast(maths_t::countWeight), + static_cast(maths_t::winsorisationWeight)}; + double weights[]{0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t i = 0u; i < boost::size(weightsFuncs); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf( - maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 10000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); + filter.minusLogJointCdf({10000.0}, {weightsFuncs[i](weights[j])}, lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -515,9 +509,7 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihood() { double q2 = boost::math::quantile( scaledLogNormal, (50.0 + percentages[j] / 2.0) / 100.0); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval( - percentages[j], - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble4Vec(1, vs)); + percentages[j], maths_t::countVarianceScaleWeight(vs)); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, @@ -618,12 +610,11 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { rng.generateLogNormalSamples(locations[i], squareScales[j], 1000, samples); filter.addSamples(samples); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); TMeanAccumulator error; for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; - weight[0] = vs; + maths_t::setCountVarianceScale(vs, weight); double shift = std::log(1.0 + vs * (std::exp(squareScales[j]) - 1.0)) - squareScales[j]; double shiftedLocation = locations[i] - 0.5 * shift; @@ -631,18 +622,15 @@ void CLogNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { boost::math::lognormal_distribution<> scaledLogNormal( shiftedLocation, std::sqrt(shiftedSquareScale)); double expectedMode = boost::math::mode(scaledLogNormal); - LOG_DEBUG(<< "dm = " - << boost::math::mean(scaledLogNormal) - boost::math::mean(logNormal) - << ", vs = " - << boost::math::variance(scaledLogNormal) / - boost::math::variance(logNormal) - << ", marginalLikelihoodMode = " - << filter.marginalLikelihoodMode(weightStyle, weight) - << ", expectedMode = " << expectedMode); + LOG_DEBUG( + << "dm = " << boost::math::mean(scaledLogNormal) - boost::math::mean(logNormal) + << ", vs = " + << boost::math::variance(scaledLogNormal) / boost::math::variance(logNormal) + << ", marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weight) + << ", expectedMode = " << expectedMode); CPPUNIT_ASSERT_DOUBLES_EQUAL( - expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 1.0); - error.add(std::fabs(filter.marginalLikelihoodMode(weightStyle, weight) - - expectedMode)); + expectedMode, filter.marginalLikelihoodMode(weight), 1.0); + error.add(std::fabs(filter.marginalLikelihoodMode(weight) - expectedMode)); } LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(error)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.26); @@ -910,11 +898,9 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { meanError.add(std::fabs(px - (lb + ub) / 2.0)); } - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode( + maths_t::countVarianceScaleWeight(vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -924,42 +910,52 @@ void CLogNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[0]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[1]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -1340,9 +1336,7 @@ void CLogNormalMeanPrecConjugateTest::testPersist() { maths::CLogNormalMeanPrecConjugate origFilter(makePrior()); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1403,10 +1397,11 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { // Finally, we test update with scaled samples produces the // correct posterior. - maths_t::ESampleWeightStyle scales[] = {maths_t::E_SampleSeasonalVarianceScaleWeight, - maths_t::E_SampleCountVarianceScaleWeight}; + TWeightFunc weightsFuncs[]{ + static_cast(maths_t::seasonalVarianceScaleWeight), + static_cast(maths_t::countVarianceScaleWeight)}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) { + for (std::size_t s = 0u; s < boost::size(weightsFuncs); ++s) { const double location = 2.0; const double squareScale = 1.5; { @@ -1497,10 +1492,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { double lowerBound, upperBound; maths_t::ETail tail; CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[k]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), - lowerBound, upperBound, tail)); + maths_t::E_TwoSided, {scaledSamples[k]}, + {weightsFuncs[s](varianceScales[j])}, lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; probabilities.push_back(probability); @@ -1578,12 +1571,10 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { for (std::size_t j = 0u; j < scaledSamples.size(); ++j) { double logLikelihood = 0.0; - CPPUNIT_ASSERT_EQUAL( - maths_t::E_FpNoErrors, - filter.jointLogMarginalLikelihood( - maths_t::TWeightStyleVec(1, scales[s]), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), logLikelihood)); + CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, + filter.jointLogMarginalLikelihood( + {scaledSamples[j]}, + {weightsFuncs[s](varianceScales[i])}, logLikelihood)); differentialEntropy -= logLikelihood; } @@ -1604,7 +1595,7 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { const double maximumMeanMeanError[] = {0.02, 0.01}; const double maximumMeanVarianceError[] = {0.18, 0.1}; - for (std::size_t s = 0u; s < boost::size(scales); ++s) { + for (std::size_t s = 0u; s < boost::size(weightsFuncs); ++s) { for (std::size_t t = 0u; t < boost::size(dataTypes); ++t) { const double means[] = {0.1, 1.0, 10.0, 100.0, 1000.0, 100000.0, 1000000.0}; @@ -1612,9 +1603,8 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { 1000.0, 100000.0, 1000000.0}; const double varianceScales[] = {0.1, 0.5, 1.0, 2.0, 10.0, 100.0}; - maths_t::TWeightStyleVec weightStyle(1, scales[s]); TDoubleVec samples; - TDouble4Vec1Vec weights; + maths_t::TDoubleWeightsAry1Vec weights; test::CRandomNumbers rng; @@ -1679,13 +1669,13 @@ void CLogNormalMeanPrecConjugateTest::testVarianceScale() { rng.generateLogNormalSamples(location, squareScale, 200, samples); weights.clear(); - weights.resize(samples.size(), TDouble4Vec(1, 1.0)); - filter.addSamples(weightStyle, samples, weights); + weights.resize(samples.size(), maths_t::CUnitWeights::UNIT); + filter.addSamples(samples, weights); rng.generateLogNormalSamples( scaledLocation, scaledSquareScale, 200, samples); weights.clear(); - weights.resize(samples.size(), TDouble4Vec(1, scale)); - filter.addSamples(weightStyle, samples, weights); + weights.resize(samples.size(), weightsFuncs[s](scale)); + filter.addSamples(samples, weights); boost::math::lognormal_distribution<> logNormal( filter.normalMean(), diff --git a/lib/maths/unittest/CMathsMemoryTest.cc b/lib/maths/unittest/CMathsMemoryTest.cc index 9ea016a6a5..0990705291 100644 --- a/lib/maths/unittest/CMathsMemoryTest.cc +++ b/lib/maths/unittest/CMathsMemoryTest.cc @@ -52,41 +52,36 @@ void CMathsMemoryTest::testPriors() { CConstantPrior constantPrior(d); CPPUNIT_ASSERT_EQUAL(std::size_t(0), constantPrior.memoryUsage()); - CGammaRateConjugate::TWeightStyleVec weightStyles; CGammaRateConjugate::TDoubleVec samples; - CGammaRateConjugate::TDoubleVecVec weights; - - weightStyles.push_back(maths_t::E_SampleCountWeight); samples.push_back(0.996); - CGammaRateConjugate::TDoubleVec weight; - weight.push_back(0.2); - weights.push_back(weight); + maths_t::TDoubleWeightsAry weight(maths_t::countWeight(0.2)); + maths_t::TDoubleWeightsAry1Vec weights{weight}; CGammaRateConjugate gammaRateConjugate(maths_t::E_ContinuousData, 0.0, 0.9, 0.8, 0.7); CPPUNIT_ASSERT_EQUAL(std::size_t(0), gammaRateConjugate.memoryUsage()); - gammaRateConjugate.addSamples(weightStyles, samples, weights); + gammaRateConjugate.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), gammaRateConjugate.memoryUsage()); CLogNormalMeanPrecConjugate logNormalConjugate(maths_t::E_ContinuousData, 0.0, 0.9, 0.8, 0.7, 0.2); CPPUNIT_ASSERT_EQUAL(std::size_t(0), logNormalConjugate.memoryUsage()); - logNormalConjugate.addSamples(weightStyles, samples, weights); + logNormalConjugate.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), logNormalConjugate.memoryUsage()); CPoissonMeanConjugate poissonConjugate(0.0, 0.8, 0.7, 0.3); CPPUNIT_ASSERT_EQUAL(std::size_t(0), poissonConjugate.memoryUsage()); - poissonConjugate.addSamples(weightStyles, samples, weights); + poissonConjugate.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), poissonConjugate.memoryUsage()); CNormalMeanPrecConjugate normalConjugate(maths_t::E_ContinuousData, 0.0, 0.9, 0.8, 0.7, 0.2); CPPUNIT_ASSERT_EQUAL(std::size_t(0), normalConjugate.memoryUsage()); - normalConjugate.addSamples(weightStyles, samples, weights); + normalConjugate.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), normalConjugate.memoryUsage()); CMultinomialConjugate multinomialConjugate; CPPUNIT_ASSERT_EQUAL(std::size_t(0), multinomialConjugate.memoryUsage()); - multinomialConjugate.addSamples(weightStyles, samples, weights); + multinomialConjugate.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(std::size_t(0), multinomialConjugate.memoryUsage()); CXMeansOnline1d clusterer(maths_t::E_ContinuousData, @@ -120,7 +115,7 @@ void CMathsMemoryTest::testPriors() { std::size_t initialMultimodalPriorSize = multimodalPrior.memoryUsage(); - multimodalPrior.addSamples(weightStyles, samples, weights); + multimodalPrior.addSamples(samples, weights); CPPUNIT_ASSERT(initialMultimodalPriorSize < multimodalPrior.memoryUsage()); core::CMemoryUsage mem; diff --git a/lib/maths/unittest/CModelTest.cc b/lib/maths/unittest/CModelTest.cc index f0d5293c23..84b649fcf8 100644 --- a/lib/maths/unittest/CModelTest.cc +++ b/lib/maths/unittest/CModelTest.cc @@ -29,6 +29,9 @@ void CModelTest::testAll() { // Test that the various parameter classes work as expected. + using TDouble2Vec = maths_t::TDouble2Vec; + using TDouble2VecWeightsAryVec = std::vector; + { core_t::TTime bucketLength{600}; double learnRate{0.5}; @@ -46,43 +49,37 @@ void CModelTest::testAll() { CPPUNIT_ASSERT_EQUAL(0.2, params.probabilityBucketEmpty()); } { - maths::CModelAddSamplesParams::TDouble2Vec weight1(2, 0.4); - maths::CModelAddSamplesParams::TDouble2Vec weight2(2, 0.7); - maths::CModelAddSamplesParams::TDouble2Vec4Vec weights1(1, weight1); - maths::CModelAddSamplesParams::TDouble2Vec4Vec weights2(1, weight2); - maths::CModelAddSamplesParams::TDouble2Vec4VecVec trendWeights(1, weights1); - maths::CModelAddSamplesParams::TDouble2Vec4VecVec priorWeights(1, weights2); + maths_t::TDouble2VecWeightsAry weight1(maths_t::CUnitWeights::unit(2)); + maths_t::TDouble2VecWeightsAry weight2(maths_t::CUnitWeights::unit(2)); + maths_t::setSeasonalVarianceScale(TDouble2Vec(2, 0.4), weight1); + maths_t::setSeasonalVarianceScale(TDouble2Vec(2, 0.7), weight2); + TDouble2VecWeightsAryVec trendWeights{weight1}; + TDouble2VecWeightsAryVec priorWeights{weight2}; maths::CModelAddSamplesParams params; - params.integer(true) - .propagationInterval(1.5) - .weightStyles(maths::CConstantWeights::SEASONAL_VARIANCE) - .trendWeights(trendWeights) - .priorWeights(priorWeights); + params.integer(true).propagationInterval(1.5).trendWeights(trendWeights).priorWeights(priorWeights); CPPUNIT_ASSERT_EQUAL(maths_t::E_IntegerData, params.type()); CPPUNIT_ASSERT_EQUAL(1.5, params.propagationInterval()); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(maths::CConstantWeights::SEASONAL_VARIANCE), - core::CContainerPrinter::print(params.weightStyles())); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(trendWeights), core::CContainerPrinter::print(params.trendWeights())); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(priorWeights), core::CContainerPrinter::print(params.priorWeights())); } { - maths::CModelProbabilityParams::TDouble2Vec weight1(2, 0.4); - maths::CModelProbabilityParams::TDouble2Vec weight2(2, 0.7); - maths::CModelProbabilityParams::TDouble2Vec4Vec weights1(1, weight1); - maths::CModelProbabilityParams::TDouble2Vec4Vec weights2(1, weight2); + maths_t::TDouble2VecWeightsAry weight1(maths_t::CUnitWeights::unit(2)); + maths_t::TDouble2VecWeightsAry weight2(maths_t::CUnitWeights::unit(2)); + maths_t::setCountVarianceScale(TDouble2Vec(2, 0.4), weight1); + maths_t::setCountVarianceScale(TDouble2Vec(2, 0.7), weight2); + TDouble2VecWeightsAryVec weights{weight1, weight2}; maths::CModelProbabilityParams params; CPPUNIT_ASSERT(!params.mostAnomalousCorrelate()); CPPUNIT_ASSERT(params.coordinates().empty()); params.addCalculation(maths_t::E_OneSidedAbove) .addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(50.0) - .addBucketEmpty(maths::CModelProbabilityParams::TBool2Vec{true, true}) - .addBucketEmpty(maths::CModelProbabilityParams::TBool2Vec{false, true}) - .weightStyles(maths::CConstantWeights::COUNT_VARIANCE) - .addWeights(weights1) - .addWeights(weights2) + .addBucketEmpty({true, true}) + .addBucketEmpty({false, true}) + .addWeights(weight1) + .addWeights(weight2) .mostAnomalousCorrelate(1) .addCoordinate(1) .addCoordinate(0); @@ -92,9 +89,7 @@ void CModelTest::testAll() { CPPUNIT_ASSERT_EQUAL(50.0, params.seasonalConfidenceInterval()); CPPUNIT_ASSERT_EQUAL(std::string("[[true, true], [false, true]]"), core::CContainerPrinter::print(params.bucketEmpty())); - CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(maths::CConstantWeights::COUNT_VARIANCE), - core::CContainerPrinter::print(params.weightStyles())); - CPPUNIT_ASSERT_EQUAL(std::string("[[[0.4, 0.4]], [[0.7, 0.7]]]"), + CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(weights), core::CContainerPrinter::print(params.weights())); CPPUNIT_ASSERT_EQUAL(std::size_t(1), *params.mostAnomalousCorrelate()); CPPUNIT_ASSERT_EQUAL(std::string("[1, 0]"), diff --git a/lib/maths/unittest/CMultimodalPriorTest.cc b/lib/maths/unittest/CMultimodalPriorTest.cc index d61cdd3153..db4968784b 100644 --- a/lib/maths/unittest/CMultimodalPriorTest.cc +++ b/lib/maths/unittest/CMultimodalPriorTest.cc @@ -58,6 +58,7 @@ using CLogNormalMeanPrecConjugate = CPriorTestInterfaceMixin; using CMultimodalPrior = CPriorTestInterfaceMixin; using COneOfNPrior = CPriorTestInterfaceMixin; +using TWeightFunc = maths_t::TDoubleWeightsAry (*)(double); //! Make the default mode prior. COneOfNPrior makeModePrior(const double& decayRate = 0.0) { @@ -723,17 +724,14 @@ void CMultimodalPriorTest::testMarginalLikelihood() { rng.generateLogNormalSamples(location, squareScale, 100, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight}; - double weights[] = {0.1, 1.0, 10.0}; + TWeightFunc weightsFuncs[]{static_cast(maths_t::countWeight), + static_cast(maths_t::winsorisationWeight)}; + double weights[]{0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t i = 0u; i < boost::size(weightsFuncs); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf( - maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 20000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); + filter.minusLogJointCdf({20000.0}, {weightsFuncs[i](weights[j])}, lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -919,30 +917,24 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() { CMultimodalPrior filter(makePrior()); filter.addSamples(samples); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); - TDouble4Vec1Vec weights(1, weight); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); std::size_t totalCount = 0u; for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { double vs = varianceScales[i]; - weight[0] = vs; - weights[0][0] = vs; + maths_t::setCountVarianceScale(vs, weight); LOG_DEBUG(<< "*** vs = " << vs << " ***"); - double mode = filter.marginalLikelihoodMode(weightStyle, weight); + double mode = filter.marginalLikelihoodMode(weight); LOG_DEBUG(<< "marginalLikelihoodMode = " << mode); // Should be near 8. - CPPUNIT_ASSERT_DOUBLES_EQUAL( - 8.0, filter.marginalLikelihoodMode(weightStyle, weight), 2.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL(8.0, filter.marginalLikelihoodMode(weight), 2.0); double eps = 0.01; double modeMinusEps = mode - eps; double modePlusEps = mode + eps; double fMode, fModeMinusEps, fModePlusEps; - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, mode), weights, fMode); - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, modeMinusEps), - weights, fModeMinusEps); - filter.jointLogMarginalLikelihood(weightStyle, TDouble1Vec(1, modePlusEps), - weights, fModePlusEps); + filter.jointLogMarginalLikelihood({mode}, {weight}, fMode); + filter.jointLogMarginalLikelihood({modeMinusEps}, {weight}, fModeMinusEps); + filter.jointLogMarginalLikelihood({modePlusEps}, {weight}, fModePlusEps); fMode = std::exp(fMode); fModeMinusEps = std::exp(fModeMinusEps); fModePlusEps = std::exp(fModePlusEps); @@ -959,8 +951,7 @@ void CMultimodalPriorTest::testMarginalLikelihoodMode() { TDoubleVec fTrials; for (std::size_t j = 0u; j < trials.size(); ++j) { double fTrial; - filter.jointLogMarginalLikelihood( - weightStyle, TDouble1Vec(1, trials[j]), weights, fTrial); + filter.jointLogMarginalLikelihood({trials[j]}, {weight}, fTrial); fTrial = std::exp(fTrial); if (fTrial > fMode) { LOG_DEBUG(<< "f(" << trials[j] << ") = " << fTrial << " > " << fMode); @@ -1064,7 +1055,8 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() { CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.05); } - LOG_DEBUG(<< "Problem Case (Issue 439)") { + LOG_DEBUG(<< "Problem Case (Issue 439)"); + { std::ifstream file; file.open("testfiles/poorly_conditioned_multimodal.txt"); std::ostringstream state; @@ -1080,9 +1072,9 @@ void CMultimodalPriorTest::testMarginalLikelihoodConfidenceInterval() { maths::CPriorStateSerialiser restorer; CPPUNIT_ASSERT(restorer(params, prior, traverser)); TDoubleDoublePr median = prior->marginalLikelihoodConfidenceInterval( - 0, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + 0, maths_t::CUnitWeights::UNIT); TDoubleDoublePr i90 = prior->marginalLikelihoodConfidenceInterval( - 90, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + 90, maths_t::CUnitWeights::UNIT); LOG_DEBUG(<< "median = " << maths::CBasicStatistics::mean(median)); LOG_DEBUG(<< "confidence interval = " << core::CContainerPrinter::print(i90)); @@ -1358,20 +1350,17 @@ void CMultimodalPriorTest::testProbabilityOfLessLikelySamples() { double lb, ub; maths_t::ETail tail; - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 49.0), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, {49.0}, + maths_t::CUnitWeights::SINGLE_UNIT, + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 54.0), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, {54.0}, + maths_t::CUnitWeights::SINGLE_UNIT, + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, 59.0), TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, {59.0}, + maths_t::CUnitWeights::SINGLE_UNIT, + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } { @@ -1651,12 +1640,10 @@ void CMultimodalPriorTest::testLargeValues() { clusterer, modePrior, 0.001); for (auto value : values) { - - multimodalPrior.addSamples(maths::CConstantWeights::COUNT, TDouble1Vec(1, value), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0 / 3.0))); + multimodalPrior.addSamples({value}, {maths_t::countWeight(1.0 / 3.0)}); if (!multimodalPrior.isNonInformative()) { TDoubleDoublePr interval = multimodalPrior.marginalLikelihoodConfidenceInterval( - 95.0, maths::CConstantWeights::COUNT, maths::CConstantWeights::UNIT); + 95.0, maths_t::CUnitWeights::UNIT); if (interval.second - interval.first >= 3e11) { LOG_DEBUG(<< "interval = " << interval.second - interval.first); LOG_DEBUG(<< multimodalPrior.print()); @@ -1697,9 +1684,7 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { rng.generateNormalSamples(mean3, variance3, 100, samples3); double varianceScales[] = {0.2, 0.5, 1.0, 2.0, 5.0}; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleSeasonalVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); - TDouble4Vec1Vec weights(1, weight); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); double m; double v; @@ -1725,37 +1710,36 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { for (std::size_t i = 0u; i < boost::size(varianceScales); ++i) { double vs = varianceScales[i]; - weight[0] = vs; - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); LOG_DEBUG(<< "*** variance scale = " << vs << " ***"); double Z; - filter.expectation(C1dUnitKernel(), 50, Z, weightStyle, weight); + filter.expectation(C1dUnitKernel(), 50, Z, weight); LOG_DEBUG(<< "Z = " << Z); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, Z, 1e-3); - LOG_DEBUG(<< "sv = " << filter.marginalLikelihoodVariance(weightStyle, weight)); + LOG_DEBUG(<< "sv = " << filter.marginalLikelihoodVariance(weight)); double expectationVariance; filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), - 50, expectationVariance, weightStyle, weight); + 50, expectationVariance, weight); LOG_DEBUG(<< "expectationVariance = " << expectationVariance); CPPUNIT_ASSERT_DOUBLES_EQUAL(vs * unscaledExpectationVariance, expectationVariance, 1e-3 * vs * unscaledExpectationVariance); CPPUNIT_ASSERT_DOUBLES_EQUAL( - filter.marginalLikelihoodVariance(weightStyle, weight), expectationVariance, - 1e-3 * filter.marginalLikelihoodVariance(weightStyle, weight)); + filter.marginalLikelihoodVariance(weight), expectationVariance, + 1e-3 * filter.marginalLikelihoodVariance(weight)); TDouble1Vec sample(1, 0.0); for (std::size_t j = 0u; j < boost::size(points); ++j) { TDouble1Vec x(1, points[j]); double fx; - filter.jointLogMarginalLikelihood(weightStyle, x, weights, fx); + filter.jointLogMarginalLikelihood(x, {weight}, fx); TDouble1Vec xMinusEps(1, points[j] - 1e-3); TDouble1Vec xPlusEps(1, points[j] + 1e-3); double lb, ub; - filter.minusLogJointCdf(weightStyle, xPlusEps, weights, lb, ub); + filter.minusLogJointCdf(xPlusEps, {weight}, lb, ub); double FxPlusEps = std::exp(-(lb + ub) / 2.0); - filter.minusLogJointCdf(weightStyle, xMinusEps, weights, lb, ub); + filter.minusLogJointCdf(xMinusEps, {weight}, lb, ub); double FxMinusEps = std::exp(-(lb + ub) / 2.0); LOG_DEBUG(<< "x = " << points[j] << ", log(f(x)) = " << fx << ", log(dF/dx)) = " << std::log((FxPlusEps - FxMinusEps) / 2e-3)); @@ -1763,22 +1747,21 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { 0.05 * std::fabs(fx)); sample[0] = m + (points[j] - m) / std::sqrt(vs); - weights[0][0] = 1.0; + maths_t::setSeasonalVarianceScale(1.0, weight); double expectedLowerBound; double expectedUpperBound; maths_t::ETail expectedTail; - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, sample, weights, - expectedLowerBound, expectedUpperBound, expectedTail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, sample, + {weight}, expectedLowerBound, + expectedUpperBound, expectedTail); sample[0] = points[j]; - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); double lowerBound; double upperBound; maths_t::ETail tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, sample, weights, - lowerBound, upperBound, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, {weight}, lowerBound, upperBound, tail); LOG_DEBUG(<< "expectedLowerBound = " << expectedLowerBound); LOG_DEBUG(<< "lowerBound = " << lowerBound); @@ -1813,9 +1796,9 @@ void CMultimodalPriorTest::testSeasonalVarianceScale() { rng.random_shuffle(samples.begin(), samples.end()); CMultimodalPrior filter(makePrior()); - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(weightStyle, TDouble1Vec(1, samples[j]), weights); + filter.addSamples({samples[j]}, {weight}); } double sm = filter.marginalLikelihoodMean(); @@ -1862,9 +1845,7 @@ void CMultimodalPriorTest::testPersist() { maths::CMultimodalPrior origFilter(maths_t::E_ContinuousData, clusterer, modePrior); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); diff --git a/lib/maths/unittest/CMultinomialConjugateTest.cc b/lib/maths/unittest/CMultinomialConjugateTest.cc index e40cc1be05..f67aef5c6d 100644 --- a/lib/maths/unittest/CMultinomialConjugateTest.cc +++ b/lib/maths/unittest/CMultinomialConjugateTest.cc @@ -509,24 +509,12 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); // Large update limit. - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 10000.0))); // P = 0.10 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 12000.0))); // P = 0.12 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[2]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 29000.0))); // P = 0.29 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[3]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 39000.0))); // P = 0.39 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 4000.0))); // P = 0.04 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 6000.0))); // P = 0.06 + filter.addSamples({categories[0]}, {maths_t::countWeight(10000.0)}); // P = 0.10 + filter.addSamples({categories[1]}, {maths_t::countWeight(12000.0)}); // P = 0.12 + filter.addSamples({categories[2]}, {maths_t::countWeight(29000.0)}); // P = 0.29 + filter.addSamples({categories[3]}, {maths_t::countWeight(39000.0)}); // P = 0.39 + filter.addSamples({categories[4]}, {maths_t::countWeight(4000.0)}); // P = 0.04 + filter.addSamples({categories[5]}, {maths_t::countWeight(6000.0)}); // P = 0.06 // We expect the following probabilities for each category: // P(1.1) = 0.20 @@ -557,24 +545,12 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); // Large update limit. - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 11000.0))); // P = 0.11 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 11000.0))); // P = 0.11 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[2]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 29000.0))); // P = 0.29 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[3]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 39000.0))); // P = 0.39 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 + filter.addSamples({categories[0]}, {maths_t::countWeight(11000.0)}); // P = 0.11 + filter.addSamples({categories[1]}, {maths_t::countWeight(11000.0)}); // P = 0.11 + filter.addSamples({categories[2]}, {maths_t::countWeight(29000.0)}); // P = 0.29 + filter.addSamples({categories[3]}, {maths_t::countWeight(39000.0)}); // P = 0.39 + filter.addSamples({categories[4]}, {maths_t::countWeight(5000.0)}); // P = 0.05 + filter.addSamples({categories[5]}, {maths_t::countWeight(5000.0)}); // P = 0.05 // We expect the following probabilities for each category: // P(1.1) = P(1.2) = 0.32 @@ -602,24 +578,12 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); // Large update limit. - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 15000.0))); // P = 0.15 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 15000.0))); // P = 0.15 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[2]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 30000.0))); // P = 0.30 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[3]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 30000.0))); // P = 0.30 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 5000.0))); // P = 0.05 + filter.addSamples({categories[0]}, {maths_t::countWeight(15000.0)}); // P = 0.15 + filter.addSamples({categories[1]}, {maths_t::countWeight(15000.0)}); // P = 0.15 + filter.addSamples({categories[2]}, {maths_t::countWeight(30000.0)}); // P = 0.30 + filter.addSamples({categories[3]}, {maths_t::countWeight(30000.0)}); // P = 0.30 + filter.addSamples({categories[4]}, {maths_t::countWeight(5000.0)}); // P = 0.05 + filter.addSamples({categories[5]}, {maths_t::countWeight(5000.0)}); // P = 0.05 // We expect the following probabilities for each category: // P(1.1) = P(1.2) = 0.40 @@ -711,24 +675,12 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { CMultinomialConjugate filter(CMultinomialConjugate::nonInformativePrior(6u)); // Large update limit. - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 10000.0))); // P = 0.10 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 12000.0))); // P = 0.12 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[2]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 29000.0))); // P = 0.29 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[3]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 39000.0))); // P = 0.39 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[4]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 4000.0))); // P = 0.04 - filter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[5]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 6000.0))); // P = 0.06 + filter.addSamples({categories[0]}, {maths_t::countWeight(10000.0)}); // P = 0.10 + filter.addSamples({categories[1]}, {maths_t::countWeight(12000.0)}); // P = 0.12 + filter.addSamples({categories[2]}, {maths_t::countWeight(29000.0)}); // P = 0.29 + filter.addSamples({categories[3]}, {maths_t::countWeight(39000.0)}); // P = 0.39 + filter.addSamples({categories[4]}, {maths_t::countWeight(4000.0)}); // P = 0.04 + filter.addSamples({categories[5]}, {maths_t::countWeight(6000.0)}); // P = 0.06 double expectedProbabilities[] = {0.2, 0.32, 0.61, 1.0, 0.04, 0.1}; @@ -826,10 +778,8 @@ void CMultinomialConjugateTest::testProbabilityOfLessLikelySamples() { CMultinomialConjugate filter( CMultinomialConjugate::nonInformativePrior(categories.size())); for (std::size_t i = 0u; i < categories.size(); ++i) { - filter.addSamples( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, categories[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, rawProbabilities[i] * 100.0))); + filter.addSamples({categories[i]}, + {maths_t::countWeight(rawProbabilities[i] * 100.0)}); } TDoubleVec lowerBounds, upperBounds; @@ -962,9 +912,7 @@ void CMultinomialConjugateTest::testPersist() { rng.generateMultinomialSamples(categories, probabilities, 100, samples); maths::CMultinomialConjugate origFilter(CMultinomialConjugate::nonInformativePrior(5)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); diff --git a/lib/maths/unittest/CMultivariateConstantPriorTest.cc b/lib/maths/unittest/CMultivariateConstantPriorTest.cc index abaa4eecb5..9bfc92b97d 100644 --- a/lib/maths/unittest/CMultivariateConstantPriorTest.cc +++ b/lib/maths/unittest/CMultivariateConstantPriorTest.cc @@ -33,19 +33,6 @@ using namespace ml; using namespace handy_typedefs; -namespace { - -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); - -TDouble10Vec4Vec unitWeight(std::size_t dimension) { - return TDouble10Vec4Vec(1, TDouble10Vec(dimension, 1.0)); -} - -TDouble10Vec4Vec1Vec singleUnitWeight(std::size_t dimension) { - return TDouble10Vec4Vec1Vec(1, unitWeight(dimension)); -} -} - void CMultivariateConstantPriorTest::testAddSamples() { LOG_DEBUG(<< "+--------------------------------------------------+"); LOG_DEBUG(<< "| CMultivariateConstantPriorTest::testAddSamples |"); @@ -57,26 +44,20 @@ void CMultivariateConstantPriorTest::testAddSamples() { double wrongDimension[] = {1.3, 2.1, 7.9}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(wrongDimension), - boost::end(wrongDimension))), - singleUnitWeight(3)); + filter.addSamples({TDouble10Vec(boost::begin(wrongDimension), boost::end(wrongDimension))}, + maths_t::CUnitWeights::singleUnit(3)); CPPUNIT_ASSERT(filter.isNonInformative()); double nans[] = {1.3, std::numeric_limits::quiet_NaN()}; - filter.addSamples( - COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(nans), boost::end(nans))), - singleUnitWeight(3)); + filter.addSamples({TDouble10Vec(boost::begin(nans), boost::end(nans))}, + maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT(filter.isNonInformative()); double constant[] = {1.4, 1.0}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), - boost::end(constant))), - singleUnitWeight(2)); + filter.addSamples({TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT(!filter.isNonInformative()); } @@ -96,42 +77,38 @@ void CMultivariateConstantPriorTest::testMarginalLikelihood() { double likelihood; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpFailed, filter.jointLogMarginalLikelihood( - COUNT_WEIGHT, TDouble10Vec1Vec(), - singleUnitWeight(2), likelihood)); CPPUNIT_ASSERT_EQUAL( maths_t::E_FpFailed, filter.jointLogMarginalLikelihood( - COUNT_WEIGHT, + {}, maths_t::CUnitWeights::singleUnit(2), likelihood)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpFailed, + filter.jointLogMarginalLikelihood( TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2), likelihood)); + maths_t::CUnitWeights::singleUnit(2), likelihood)); CPPUNIT_ASSERT_EQUAL( maths_t::E_FpOverflowed, filter.jointLogMarginalLikelihood( - COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2), likelihood)); + {TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(2), likelihood)); CPPUNIT_ASSERT_EQUAL(boost::numeric::bounds::lowest(), likelihood); - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), + filter.addSamples(TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2)); + maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT_EQUAL( maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood( - COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), boost::end(constant))), - singleUnitWeight(2), likelihood)); + {TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(2), likelihood)); CPPUNIT_ASSERT_EQUAL(std::log(boost::numeric::bounds::highest()), likelihood); CPPUNIT_ASSERT_EQUAL( maths_t::E_FpOverflowed, filter.jointLogMarginalLikelihood( - COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(different), boost::end(different))), - singleUnitWeight(2), likelihood)); + {TDouble10Vec(boost::begin(different), boost::end(different))}, + maths_t::CUnitWeights::singleUnit(2), likelihood)); CPPUNIT_ASSERT_EQUAL(boost::numeric::bounds::lowest(), likelihood); } @@ -149,10 +126,8 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodMean() { core::CContainerPrinter::print(filter.marginalLikelihoodMean())); double constant[] = {1.2, 6.0, 14.1}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), - boost::end(constant))), - singleUnitWeight(3)); + filter.addSamples({TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(3)); CPPUNIT_ASSERT_EQUAL(std::string("[1.2, 6, 14.1]"), core::CContainerPrinter::print(filter.marginalLikelihoodMean())); @@ -170,17 +145,15 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodMode() { CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(filter.marginalLikelihoodMean()), core::CContainerPrinter::print(filter.marginalLikelihoodMode( - COUNT_WEIGHT, unitWeight(4)))); + maths_t::CUnitWeights::unit(4)))); double constant[] = {1.1, 6.5, 12.3, 14.1}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), - boost::end(constant))), - singleUnitWeight(4)); + filter.addSamples({TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(4)); CPPUNIT_ASSERT_EQUAL(core::CContainerPrinter::print(filter.marginalLikelihoodMean()), core::CContainerPrinter::print(filter.marginalLikelihoodMode( - COUNT_WEIGHT, unitWeight(4)))); + maths_t::CUnitWeights::unit(4)))); } void CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance() { @@ -207,10 +180,8 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance() { } double constant[] = {1.1, 6.5, 12.3, 14.1}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(boost::begin(constant), - boost::end(constant))), - singleUnitWeight(4)); + filter.addSamples({TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(2)); covariance = filter.marginalLikelihoodCovariance(); CPPUNIT_ASSERT_EQUAL(std::size_t(4), covariance.size()); @@ -223,13 +194,9 @@ void CMultivariateConstantPriorTest::testMarginalLikelihoodCovariance() { } void CMultivariateConstantPriorTest::testSampleMarginalLikelihood() { - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - LOG_DEBUG(<< "| " - "CMultivariateConstantPriorTest::testSampleMarginalLikelihood " - " |"); - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); + LOG_DEBUG(<< "+----------------------------------------------------------------+"); + LOG_DEBUG(<< "| CMultivariateConstantPriorTest::testSampleMarginalLikelihood |"); + LOG_DEBUG(<< "+----------------------------------------------------------------+"); // Check we get zero samples for non-informative and sample of the // constant otherwise. @@ -242,10 +209,8 @@ void CMultivariateConstantPriorTest::testSampleMarginalLikelihood() { double constant[] = {1.2, 4.1}; - filter.addSamples(COUNT_WEIGHT, - TDouble10Vec1Vec(2, TDouble10Vec(boost::begin(constant), - boost::end(constant))), - singleUnitWeight(2)); + filter.addSamples({TDouble10Vec(boost::begin(constant), boost::end(constant))}, + maths_t::CUnitWeights::singleUnit(2)); filter.sampleMarginalLikelihood(4, samples); CPPUNIT_ASSERT_EQUAL(std::size_t(4), samples.size()); @@ -274,25 +239,25 @@ void CMultivariateConstantPriorTest::testProbabilityOfLessLikelySamples() { for (std::size_t i = 0u; i < boost::size(samples); ++i) { double lb, ub; maths::CMultivariateConstantPrior::TTail10Vec tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, - samples[i], singleUnitWeight(2), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, samples[i], + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(1.0, lb); CPPUNIT_ASSERT_EQUAL(1.0, ub); LOG_DEBUG(<< "tail = " << core::CContainerPrinter::print(tail)); CPPUNIT_ASSERT_EQUAL(std::string("[0, 0]"), core::CContainerPrinter::print(tail)); } - filter.addSamples(COUNT_WEIGHT, samples[0], singleUnitWeight(2)); + filter.addSamples(samples[0], maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT(!filter.isNonInformative()); std::string expectedTails[] = {"[0, 0]", "[1, 2]", "[1, 2]"}; for (std::size_t i = 0u; i < boost::size(samples); ++i) { double lb, ub; maths::CMultivariateConstantPrior::TTail10Vec tail; - filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, - samples[i], singleUnitWeight(2), - lb, ub, tail); + filter.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, samples[i], + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); CPPUNIT_ASSERT_EQUAL(i == 0 ? 1.0 : 0.0, lb); CPPUNIT_ASSERT_EQUAL(i == 0 ? 1.0 : 0.0, ub); LOG_DEBUG(<< "tail = " << core::CContainerPrinter::print(tail)); diff --git a/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc b/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc index 0d4b0f118f..4c5921abb2 100644 --- a/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc +++ b/lib/maths/unittest/CMultivariateMultimodalPriorTest.cc @@ -45,12 +45,6 @@ using TMean2Accumulator = maths::CBasicStatistics::SSampleMean::TAccum using TCovariances2 = maths::CBasicStatistics::SSampleCovariances; namespace { - -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); -const maths_t::TWeightStyleVec VARIANCE_WEIGHT(1, maths_t::E_SampleCountVarianceScaleWeight); -const TDouble10Vec UNIT_WEIGHT_2(2, 1.0); -const TDouble10Vec4Vec1Vec SINGLE_UNIT_WEIGHT_2(1, TDouble10Vec4Vec(1, UNIT_WEIGHT_2)); - template class CMultivariateMultimodalPriorForTest : public maths::CMultivariateMultimodalPrior { @@ -79,12 +73,11 @@ makePrior(maths_t::EDataType dataType, double decayRate = 0.0) { } void gaussianSamples(test::CRandomNumbers& rng, - std::size_t modes, - const std::size_t* n, + const TSizeVec& n, const double (*means)[2], const double (*covariances)[3], TDouble10Vec1Vec& samples) { - for (std::size_t i = 0u; i < modes; ++i) { + for (std::size_t i = 0u; i < n.size(); ++i) { TVector2 mean(means[i], means[i] + 2); TMatrix2 covariance(covariances[i], covariances[i] + 3); TDoubleVecVec samples_; @@ -173,14 +166,14 @@ void CMultivariateMultimodalPriorTest::testMultipleUpdate() { // Test that we get the same result updating once with a vector of 100 // samples of an R.V. versus updating individually 100 times. - const std::size_t n[] = {100}; + const TSizeVec n{100}; const double means[][2] = {{10.0, 20.0}}; const double covariances[][3] = {{3.0, 1.0, 2.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; @@ -193,13 +186,13 @@ void CMultivariateMultimodalPriorTest::testMultipleUpdate() { maths::CSampling::seed(); for (std::size_t j = 0; j < samples.size(); ++j) { - filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter1.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); } maths::CSampling::seed(); - filter2.addSamples( - COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter2.addSamples(samples, maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), + maths_t::CUnitWeights::unit(2))); LOG_DEBUG(<< "checksum 1 " << filter1.checksum()); LOG_DEBUG(<< "checksum 2 " << filter2.checksum()); @@ -213,17 +206,15 @@ void CMultivariateMultimodalPriorTest::testMultipleUpdate() { maths::CMultivariateMultimodalPrior<2> filter1(makePrior<2>(dataTypes[i])); maths::CMultivariateMultimodalPrior<2> filter2(filter1); - TDouble10Vec4Vec1Vec weights; - weights.resize(samples.size() / 2, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.5))); - weights.resize(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 2.0))); + maths_t::TDouble10VecWeightsAry1Vec weights; + weights.resize(samples.size() / 2, maths_t::countVarianceScaleWeight(1.5, 2)); + weights.resize(samples.size(), maths_t::countVarianceScaleWeight(2.0, 2)); maths::CSampling::seed(); for (std::size_t j = 0u; j < samples.size(); ++j) { - TDouble10Vec1Vec sample(1, samples[j]); - TDouble10Vec4Vec1Vec weight(1, weights[j]); - filter1.addSamples(VARIANCE_WEIGHT, sample, weight); + filter1.addSamples({samples[j]}, {weights[j]}); } maths::CSampling::seed(); - filter2.addSamples(VARIANCE_WEIGHT, samples, weights); + filter2.addSamples(samples, weights); LOG_DEBUG(<< "checksum 1 " << filter1.checksum()); LOG_DEBUG(<< "checksum 2 " << filter2.checksum()); @@ -244,14 +235,14 @@ void CMultivariateMultimodalPriorTest::testPropagation() { const double eps = 1e-3; - const std::size_t n[] = {400, 600}; + const TSizeVec n{400, 600}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); @@ -259,8 +250,9 @@ void CMultivariateMultimodalPriorTest::testPropagation() { maths::CMultivariateMultimodalPrior<2> filter( makePrior<2>(maths_t::E_ContinuousData, decayRate)); - filter.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples(samples, maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), + maths_t::CUnitWeights::unit(2))); double numberSamples = filter.numberSamples(); TDouble10Vec mean = filter.marginalLikelihoodMean(); @@ -309,19 +301,20 @@ void CMultivariateMultimodalPriorTest::testSingleMode() { maths::CSampling::seed(); - const std::size_t n[] = {500}; + const TSizeVec n{500}; const double means[][2] = {{20.0, 20.0}}; const double covariances[][3] = {{40.0, 10.0, 20.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData)); for (std::size_t i = 0; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), filter.numberModes()); } } @@ -346,12 +339,12 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() { LOG_DEBUG(<< "Mixture Normals"); { - const std::size_t n[] = {400, 600}; + const TSizeVec n{400, 600}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{4.0, 1.0, 4.0}, {10.0, -4.0, 6.0}}; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); double w[] = {n[0] / static_cast(n[0] + n[1]), n[1] / static_cast(n[0] + n[1])}; @@ -372,11 +365,13 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() { maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData); filter1.addSamples( - COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + samples, + maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), maths_t::CUnitWeights::unit(2))); filter2.addSamples( - COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + samples, + maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), maths_t::CUnitWeights::unit(2))); CPPUNIT_ASSERT_EQUAL(std::size_t(2), filter1.numberModes()); @@ -388,14 +383,16 @@ void CMultivariateMultimodalPriorTest::testMultipleModes() { TDouble10Vec1Vec sample(1, samples[j]); double l1; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter1.jointLogMarginalLikelihood( - COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, l1)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpNoErrors, + filter1.jointLogMarginalLikelihood( + sample, maths_t::CUnitWeights::singleUnit(2), l1)); loss1G.add(ll - l1); double l2; - CPPUNIT_ASSERT_EQUAL(maths_t::E_FpNoErrors, - filter2.jointLogMarginalLikelihood( - COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, l2)); + CPPUNIT_ASSERT_EQUAL( + maths_t::E_FpNoErrors, + filter2.jointLogMarginalLikelihood( + sample, maths_t::CUnitWeights::singleUnit(2), l2)); loss12.add(l2 - l1); } @@ -472,8 +469,8 @@ void CMultivariateMultimodalPriorTest::testSplitAndMerge() { LOG_DEBUG(<< "# samples = " << samples.size()); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - TDouble10Vec4Vec1Vec(1, TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); //pointsToDate.push_back(samples[j]); //if (pointsToDate.size() == subplotCounts[subplot]) @@ -578,9 +575,9 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihood() { rng.random_shuffle(samples.begin(), samples.end()); maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - filter.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), - TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples(samples, maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), + maths_t::CUnitWeights::unit(2))); LOG_DEBUG(<< "# modes = " << filter.numberModes()); if (filter.numberModes() != 3) { continue; @@ -669,13 +666,9 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihood() { } void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() { - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - LOG_DEBUG(<< "| " - "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean " - " |"); - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); + LOG_DEBUG(<< "+----------------------------------------------------------------+"); + LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean |"); + LOG_DEBUG(<< "+----------------------------------------------------------------+"); // Test that the marginal likelihood mean is close to the sample // mean for a multimodal distribution. @@ -684,14 +677,14 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() { const double eps = 0.05; - const std::size_t n[] = {400, 600}; + const TSizeVec n{400, 600}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); @@ -700,7 +693,8 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() { TMeanAccumulator meanError; for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); expectedMean.add(samples[i]); if (i % 10 == 0) { @@ -722,13 +716,9 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMean() { } void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - LOG_DEBUG(<< "| " - "CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode " - " |"); - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); + LOG_DEBUG(<< "+----------------------------------------------------------------+"); + LOG_DEBUG(<< "| CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode |"); + LOG_DEBUG(<< "+----------------------------------------------------------------+"); // Test that the sample mode is close to the generating distribution mode. @@ -760,19 +750,20 @@ void CMultivariateMultimodalPriorTest::testMarginalLikelihoodMode() { rng.random_shuffle(samples.begin(), samples.end()); CMultivariateMultimodalPriorForTest<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - filter.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), SINGLE_UNIT_WEIGHT_2[0])); - TDouble10Vec mode = - filter.marginalLikelihoodMode(COUNT_WEIGHT, SINGLE_UNIT_WEIGHT_2[0]); + filter.addSamples(samples, maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), + maths_t::CUnitWeights::unit(2))); + TDouble10Vec mode = filter.marginalLikelihoodMode( + maths_t::CUnitWeights::unit(2)); TVector2 expectedMode; TMaxAccumulator maxLikelihood; for (std::size_t i = 0u; i < filter.modes().size(); ++i) { TDouble10Vec mi = (filter.modes())[i].s_Prior->marginalLikelihoodMode( - COUNT_WEIGHT, SINGLE_UNIT_WEIGHT_2[0]); + maths_t::CUnitWeights::unit(2)); double likelihood; - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(1, mi), - SINGLE_UNIT_WEIGHT_2, likelihood); + filter.jointLogMarginalLikelihood( + {mi}, maths_t::CUnitWeights::singleUnit(2), likelihood); if (maxLikelihood.add(likelihood)) { expectedMode = TVector2(mi); } @@ -833,8 +824,9 @@ void CMultivariateMultimodalPriorTest::testSampleMarginalLikelihood() { LOG_DEBUG(<< "# samples = " << samples.size()); maths::CMultivariateMultimodalPrior<2> filter(makePrior<2>(maths_t::E_ContinuousData)); - filter.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), TDouble10Vec4Vec(1, UNIT_WEIGHT_2))); + filter.addSamples(samples, maths_t::TDouble10VecWeightsAry1Vec( + samples.size(), + maths_t::CUnitWeights::unit(2))); TDouble10Vec1Vec sampled; filter.sampleMarginalLikelihood(300, sampled); @@ -939,7 +931,8 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() { CMultivariateMultimodalPriorForTest<2> filter(makePrior<2>(maths_t::E_ContinuousData)); for (std::size_t k = 0u; k < samples.size(); ++k) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[k]}, + maths_t::CUnitWeights::singleUnit(2)); } LOG_DEBUG(<< "# modes = " << filter.numberModes()); @@ -961,9 +954,8 @@ void CMultivariateMultimodalPriorTest::testProbabilityOfLessLikelySamples() { double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, - TDouble10Vec1Vec(1, x.toVector()), - SINGLE_UNIT_WEIGHT_2, lb, ub, tail); + maths_t::E_TwoSided, {x.toVector()}, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); double pa = (lb + ub) / 2.0; LOG_DEBUG(<< " p(" << x << "), actual = " << pa << ", expected = " << px); @@ -1031,8 +1023,8 @@ void CMultivariateMultimodalPriorTest::testLatLongData() { *modePrior); for (std::size_t i = 0u; i < timeseries.size(); ++i) { - filter->addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, timeseries[i].second), - SINGLE_UNIT_WEIGHT_2); + filter->addSamples({timeseries[i].second}, + maths_t::CUnitWeights::singleUnit(2)); filter->propagateForwardsByTime(1.0); } LOG_DEBUG(<< filter->print()); @@ -1067,14 +1059,14 @@ void CMultivariateMultimodalPriorTest::testPersist() { maths::CSampling::seed(); - std::size_t n[] = {100, 100}; + const TSizeVec n{100, 100}; const double means[][2] = {{10.0, 20.0}, {100.0, 30.0}}; const double covariances[][3] = {{3.0, 1.0, 2.0}, {60.0, 20.0, 70.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); maths_t::EDataType dataType = maths_t::E_ContinuousData; @@ -1083,7 +1075,8 @@ void CMultivariateMultimodalPriorTest::testPersist() { maths::CMultivariateMultimodalPrior<2> origFilter(makePrior<2>(dataType)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + origFilter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } uint64_t checksum = origFilter.checksum(); diff --git a/lib/maths/unittest/CMultivariateNormalConjugateTest.cc b/lib/maths/unittest/CMultivariateNormalConjugateTest.cc index 88846ad79d..2e5829a542 100644 --- a/lib/maths/unittest/CMultivariateNormalConjugateTest.cc +++ b/lib/maths/unittest/CMultivariateNormalConjugateTest.cc @@ -38,13 +38,6 @@ using TDoubleDoublePrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; namespace { - -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); -const maths_t::TWeightStyleVec VARIANCE_WEIGHT(1, maths_t::E_SampleCountVarianceScaleWeight); -const TDouble10Vec4Vec UNIT_WEIGHT_2(1, TDouble10Vec(2, 1.0)); -const TDouble10Vec4Vec1Vec - SINGLE_UNIT_WEIGHT_2(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - void empiricalProbabilityOfLessLikelySamples(const TDoubleVec& mean, const TDoubleVecVec& covariance, TDoubleVec& result) { @@ -120,11 +113,12 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() { maths::CMultivariateNormalConjugate<2> filter2(filter1); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter1.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), UNIT_WEIGHT_2); - filter2.addSamples(COUNT_WEIGHT, samples, weights); + maths_t::TDouble10VecWeightsAry1Vec weights( + samples.size(), maths_t::CUnitWeights::unit(2)); + filter2.addSamples(samples, weights); CPPUNIT_ASSERT(filter1.equalTolerance( filter2, maths::CToleranceTypes::E_AbsoluteTolerance, 1e-5)); @@ -138,16 +132,13 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() { maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i])); maths::CMultivariateNormalConjugate<2> filter2(filter1); - TDouble10Vec4Vec1Vec weights; - weights.resize(samples.size() / 2, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.5))); - weights.resize(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 2.0))); - + maths_t::TDouble10VecWeightsAry1Vec weights; + weights.resize(samples.size() / 2, maths_t::countVarianceScaleWeight(1.5, 2)); + weights.resize(samples.size(), maths_t::countVarianceScaleWeight(2.0, 2)); for (std::size_t j = 0u; j < samples.size(); ++j) { - TDouble10Vec1Vec sample(1, samples[j]); - TDouble10Vec4Vec1Vec weight(1, weights[j]); - filter1.addSamples(VARIANCE_WEIGHT, sample, weight); + filter1.addSamples({samples[j]}, {weights[j]}); } - filter2.addSamples(VARIANCE_WEIGHT, samples, weights); + filter2.addSamples(samples, weights); CPPUNIT_ASSERT(filter1.equalTolerance( filter2, maths::CToleranceTypes::E_RelativeTolerance, 1e-5)); @@ -166,13 +157,11 @@ void CMultivariateNormalConjugateTest::testMultipleUpdate() { double x = 3.0; std::size_t count = 10; for (std::size_t j = 0u; j < count; ++j) { - filter1.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, x)), - SINGLE_UNIT_WEIGHT_2); + filter1.addSamples({TDouble10Vec(2, x)}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec1Vec sample(1, TDouble10Vec(2, x)); - TDouble10Vec4Vec1Vec weight( - 1, TDouble10Vec4Vec(1, TDouble10Vec(2, static_cast(count)))); - filter2.addSamples(COUNT_WEIGHT, sample, weight); + filter2.addSamples({TDouble10Vec(2, x)}, + {maths_t::countWeight(static_cast(count), 2)}); CPPUNIT_ASSERT(filter1.equalTolerance( filter2, maths::CToleranceTypes::E_AbsoluteTolerance, 1e-5)); @@ -207,8 +196,9 @@ void CMultivariateNormalConjugateTest::testPropagation() { maths::CMultivariateNormalConjugate<2> filter( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataTypes[i], 0.1)); - TDouble10Vec4Vec1Vec weights(samples.size(), UNIT_WEIGHT_2); - filter.addSamples(COUNT_WEIGHT, samples, weights); + maths_t::TDouble10VecWeightsAry1Vec weights( + samples.size(), maths_t::CUnitWeights::unit(2)); + filter.addSamples(samples, weights); TVector2 initialMean = filter.mean(); TMatrix2 initialPrecision = filter.precision(); @@ -275,8 +265,8 @@ void CMultivariateNormalConjugateTest::testMeanVectorEstimation() { maths::CMultivariateNormalConjugate<2>::nonInformativePrior( maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); filter.propagateForwardsByTime(1.0); } @@ -380,8 +370,8 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { maths::CMultivariateNormalConjugate<2>::nonInformativePrior( maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); filter.propagateForwardsByTime(1.0); } @@ -389,7 +379,7 @@ void CMultivariateNormalConjugateTest::testPrecisionMatrixEstimation() { std::size_t n = 500; TMatrix2Vec precisionSamples; filter.randomSamplePrecisionMatrixPrior(n, precisionSamples); - TDouble10Vec4Vec elementSamples(3); + TDouble10Vec10Vec elementSamples(3); for (std::size_t j = 0; j < precisionSamples.size(); ++j) { elementSamples[0].push_back(precisionSamples[j](0, 0)); elementSamples[1].push_back(precisionSamples[j](1, 0)); @@ -483,7 +473,8 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihood() { TMeanAccumulator meanCovarianceError; for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); if (!filter.isNonInformative()) { TDouble10Vec m = filter.marginalLikelihoodMean(); @@ -561,13 +552,9 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihood() { } void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() { - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - LOG_DEBUG(<< "| " - "CMultivariateNormalConjugateTest::testMarginalLikelihoodMode " - " |"); - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); + LOG_DEBUG(<< "+----------------------------------------------------------------+"); + LOG_DEBUG(<< "| CMultivariateNormalConjugateTest::testMarginalLikelihoodMode |"); + LOG_DEBUG(<< "+----------------------------------------------------------------+"); // Test that the marginal likelihood mode is at a stationary maximum // of the likelihood function. @@ -583,11 +570,13 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() { maths::CMultivariateNormalConjugate<2> filter( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } LOG_DEBUG(<< "prior = " << filter.print()); - TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, UNIT_WEIGHT_2); + TDouble10Vec mode = + filter.marginalLikelihoodMode(maths_t::CUnitWeights::unit(2)); TDoubleVec epsilons; rng.generateUniformSamples(-0.01, 0.01, 10, epsilons); @@ -606,12 +595,12 @@ void CMultivariateNormalConjugateTest::testMarginalLikelihoodMode() { norm = std::sqrt(norm); double llm, ll, llp; - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, modeMinusEps, - SINGLE_UNIT_WEIGHT_2, llm); - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, TDouble10Vec1Vec(1, mode), - SINGLE_UNIT_WEIGHT_2, ll); - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, modePlusEps, - SINGLE_UNIT_WEIGHT_2, llp); + filter.jointLogMarginalLikelihood( + modeMinusEps, maths_t::CUnitWeights::singleUnit(2), llm); + filter.jointLogMarginalLikelihood( + {mode}, maths_t::CUnitWeights::singleUnit(2), ll); + filter.jointLogMarginalLikelihood( + modePlusEps, maths_t::CUnitWeights::singleUnit(2), llp); double gradient = std::fabs(std::exp(llp) - std::exp(llm)) / norm; LOG_DEBUG(<< "gradient = " << gradient); CPPUNIT_ASSERT(gradient < 1e-6); @@ -661,7 +650,8 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { core::CContainerPrinter::print(resamples[0])); } - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } TDoubleVec p; @@ -698,9 +688,8 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { TDoubleVec sampleProbabilities; for (std::size_t j = 0u; j < resamples.size(); ++j) { double ll; - filter.jointLogMarginalLikelihood(COUNT_WEIGHT, - TDouble10Vec1Vec(1, resamples[j]), - SINGLE_UNIT_WEIGHT_2, ll); + filter.jointLogMarginalLikelihood( + {resamples[j]}, maths_t::CUnitWeights::singleUnit(2), ll); sampleProbabilities.push_back( static_cast(std::lower_bound(p.begin(), p.end(), ll) - p.begin()) / static_cast(p.size())); @@ -715,7 +704,8 @@ void CMultivariateNormalConjugateTest::testSampleMarginalLikelihood() { pAbsError.add(error); pRelError.add(error / expectedProbability); } - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } LOG_DEBUG(<< "pAbsError = " << maths::CBasicStatistics::mean(pAbsError)); @@ -764,8 +754,8 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() { maths::CMultivariateNormalConjugate<2>::nonInformativePrior( maths_t::E_ContinuousData)); for (std::size_t k = 0u; k < samples.size(); ++k) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[k]}, + maths_t::CUnitWeights::singleUnit(2)); } TDoubleVec p; @@ -787,9 +777,8 @@ void CMultivariateNormalConjugateTest::testProbabilityOfLessLikelySamples() { double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, - TDouble10Vec1Vec(1, x.toVector()), - SINGLE_UNIT_WEIGHT_2, lb, ub, tail); + maths_t::E_TwoSided, {x.toVector()}, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); double pa = (lb + ub) / 2.0; LOG_DEBUG(<< " p(" << x << "), actual = " << pa << ", expected = " << px); @@ -857,9 +846,11 @@ void CMultivariateNormalConjugateTest::testIntegerData() { for (std::size_t k = 0u; k < n; ++k) { TVector2 x(samples[k]); TDouble10Vec1Vec sample(1, x.toVector()); - filter1.addSamples(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2); + filter1.addSamples( + sample, maths_t::CUnitWeights::singleUnit(2)); sample[0] = (x + TVector2(uniform[k])).toVector(); - filter2.addSamples(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2); + filter2.addSamples( + sample, maths_t::CUnitWeights::singleUnit(2)); } CPPUNIT_ASSERT(filter1.equalTolerance( @@ -873,14 +864,14 @@ void CMultivariateNormalConjugateTest::testIntegerData() { TDouble10Vec1Vec sample(1, x.toVector()); double ll1; - filter1.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, - SINGLE_UNIT_WEIGHT_2, ll1); + filter1.jointLogMarginalLikelihood( + sample, maths_t::CUnitWeights::singleUnit(2), ll1); meanLogLikelihood1.add(-ll1); sample[0] = (x + TVector2(uniform[k])).toVector(); double ll2; - filter2.jointLogMarginalLikelihood(COUNT_WEIGHT, sample, - SINGLE_UNIT_WEIGHT_2, ll2); + filter2.jointLogMarginalLikelihood( + sample, maths_t::CUnitWeights::singleUnit(2), ll2); meanLogLikelihood2.add(-ll2); } @@ -904,8 +895,8 @@ void CMultivariateNormalConjugateTest::testLowVariationData() { maths::CMultivariateNormalConjugate<2> filter( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_IntegerData)); for (std::size_t i = 0u; i < 100; ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({TDouble10Vec(2, 430.0)}, + maths_t::CUnitWeights::singleUnit(2)); } TDouble10Vec10Vec covariances = filter.marginalLikelihoodCovariance(); @@ -917,8 +908,8 @@ void CMultivariateNormalConjugateTest::testLowVariationData() { maths::CMultivariateNormalConjugate<2> filter( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < 100; ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, TDouble10Vec(2, 430.0)), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({TDouble10Vec(2, 430.0)}, + maths_t::CUnitWeights::singleUnit(2)); } TDouble10Vec10Vec covariances = filter.marginalLikelihoodCovariance(); @@ -949,7 +940,8 @@ void CMultivariateNormalConjugateTest::testPersist() { maths::CMultivariateNormalConjugate<2> origFilter( maths::CMultivariateNormalConjugate<2>::nonInformativePrior(dataType)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + origFilter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1036,7 +1028,8 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() { TDouble10Vec1Vec sample(1, TDouble10Vec(2)); sample[0][0] = samples[i][indices[j][0]]; sample[0][1] = samples[i][indices[j][1]]; - filters[j].addSamples(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2); + filters[j].addSamples( + sample, maths_t::CUnitWeights::singleUnit(2)); } } @@ -1052,9 +1045,9 @@ void CMultivariateNormalConjugateTest::calibrationExperiment() { sample[0][1] = samples[i][indices[j][1]]; double lb, ub; maths::CMultivariatePrior::TTail10Vec tail; - filters[j].probabilityOfLessLikelySamples(maths_t::E_TwoSided, COUNT_WEIGHT, - sample, SINGLE_UNIT_WEIGHT_2, - lb, ub, tail); + filters[j].probabilityOfLessLikelySamples( + maths_t::E_TwoSided, sample, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); p[j].push_back((lb + ub) / 2.0); mpi = std::min(mpi, (lb + ub) / 2.0); epi.add((lb + ub) / 2.0, 0.5); diff --git a/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc b/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc index f1708b10a6..1f38671da2 100644 --- a/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc +++ b/lib/maths/unittest/CMultivariateOneOfNPriorTest.cc @@ -43,15 +43,11 @@ namespace { using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; +using TSizeVec = std::vector; using TPriorPtr = maths::CMultivariateOneOfNPrior::TPriorPtr; using TPriorPtrVec = maths::CMultivariateOneOfNPrior::TPriorPtrVec; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); -const maths_t::TWeightStyleVec VARIANCE_WEIGHT(1, maths_t::E_SampleCountVarianceScaleWeight); -const TDouble10Vec4Vec UNIT_WEIGHT_2(1, TDouble10Vec(2, 1.0)); -const TDouble10Vec4Vec1Vec SINGLE_UNIT_WEIGHT_2(1, UNIT_WEIGHT_2); - class CMinusLogLikelihood : public maths::CGradientDescent::CFunction { public: CMinusLogLikelihood(const maths::CMultivariateOneOfNPrior& prior) @@ -59,9 +55,9 @@ class CMinusLogLikelihood : public maths::CGradientDescent::CFunction { bool operator()(const maths::CGradientDescent::TVector& x, double& result) const { if (m_Prior->jointLogMarginalLikelihood( - COUNT_WEIGHT, - TDouble10Vec1Vec(1, TDouble10Vec(x.toVector())), - SINGLE_UNIT_WEIGHT_2, result) == maths_t::E_FpNoErrors) { + {x.toVector()}, + maths_t::CUnitWeights::singleUnit(2), + result) == maths_t::E_FpNoErrors) { result = -result; return true; } @@ -87,20 +83,18 @@ template maths::CMultivariateOneOfNPrior makeOneOfN(maths_t::EDataType dataType, double decayRate = 0.0) { TPriorPtrVec priors; - priors.push_back(TPriorPtr( - maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate) - .clone())); - priors.push_back(TPriorPtr(makeMultimodal(dataType, decayRate).clone())); + priors.emplace_back(maths::CMultivariateNormalConjugate::nonInformativePrior(dataType, decayRate) + .clone()); + priors.emplace_back(makeMultimodal(dataType, decayRate).clone()); return maths::CMultivariateOneOfNPrior(N, priors, dataType, decayRate); } void gaussianSamples(test::CRandomNumbers& rng, - std::size_t modes, - const std::size_t* n, + const TSizeVec& n, const double (*means)[2], const double (*covariances)[3], TDouble10Vec1Vec& samples) { - for (std::size_t i = 0u; i < modes; ++i) { + for (std::size_t i = 0u; i < n.size(); ++i) { TVector2 mean(means[i], means[i] + 2); TMatrix2 covariance(covariances[i], covariances[i] + 3); TDoubleVecVec samples_; @@ -179,19 +173,18 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() { maths::CMultivariateOneOfNPrior filter2(filter1); for (std::size_t j = 0u; j < seedSamples.size(); ++j) { - TDouble10Vec1Vec sample(1, seedSamples[j]); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - filter1.addSamples(COUNT_WEIGHT, sample, weight); - filter2.addSamples(COUNT_WEIGHT, sample, weight); + filter1.addSamples({seedSamples[j]}, + maths_t::CUnitWeights::singleUnit(2)); + filter2.addSamples({seedSamples[j]}, + maths_t::CUnitWeights::singleUnit(2)); } for (std::size_t j = 0u; j < samples.size(); ++j) { - TDouble10Vec1Vec sample(1, samples[j]); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - filter1.addSamples(COUNT_WEIGHT, sample, weight); + filter1.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), - TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - filter2.addSamples(COUNT_WEIGHT, samples, weights); + maths_t::TDouble10VecWeightsAry1Vec weights( + samples.size(), maths_t::CUnitWeights::unit(2)); + filter2.addSamples(samples, weights); LOG_DEBUG(<< "checksum 1 " << filter1.checksum()); LOG_DEBUG(<< "checksum 2 " << filter2.checksum()); @@ -207,19 +200,16 @@ void CMultivariateOneOfNPriorTest::testMultipleUpdate() { for (std::size_t j = 0u; j < seedSamples.size(); ++j) { TDouble10Vec1Vec sample(1, seedSamples[j]); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - filter1.addSamples(COUNT_WEIGHT, sample, weight); - filter2.addSamples(COUNT_WEIGHT, sample, weight); + filter1.addSamples(sample, maths_t::CUnitWeights::singleUnit(2)); + filter2.addSamples(sample, maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights; - weights.resize(samples.size() / 2, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.5))); - weights.resize(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 2.0))); + maths_t::TDouble10VecWeightsAry1Vec weights; + weights.resize(samples.size() / 2, maths_t::countVarianceScaleWeight(1.5, 2)); + weights.resize(samples.size(), maths_t::countVarianceScaleWeight(2.0, 2)); for (std::size_t j = 0u; j < samples.size(); ++j) { - TDouble10Vec1Vec sample(1, samples[j]); - TDouble10Vec4Vec1Vec weight(1, weights[j]); - filter1.addSamples(VARIANCE_WEIGHT, sample, weight); + filter1.addSamples({samples[j]}, {weights[j]}); } - filter2.addSamples(VARIANCE_WEIGHT, samples, weights); + filter2.addSamples(samples, weights); LOG_DEBUG(<< "checksum 1 " << filter1.checksum()); LOG_DEBUG(<< "checksum 2 " << filter2.checksum()); @@ -240,14 +230,14 @@ void CMultivariateOneOfNPriorTest::testPropagation() { const double eps = 2e-3; - const std::size_t n[] = {400, 600}; + const TSizeVec n{400, 600}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); LOG_DEBUG(<< "# samples = " << samples.size()); @@ -255,8 +245,8 @@ void CMultivariateOneOfNPriorTest::testPropagation() { maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData, decayRate)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), - TDouble10Vec4Vec1Vec(1, UNIT_WEIGHT_2)); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } double numberSamples = filter.numberSamples(); @@ -315,12 +305,12 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { test::CRandomNumbers rng; { - const std::size_t n[] = {100}; + const TSizeVec n{100}; const double mean[][2] = {{10.0, 20.0}}; const double covariance[][3] = {{3.0, 1.0, 2.0}}; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, mean, covariance, samples); + gaussianSamples(rng, n, mean, covariance, samples); using TEqual = maths::CEqualWithTolerance; TEqual equal(maths::CToleranceTypes::E_AbsoluteTolerance, 1e-10); @@ -330,8 +320,8 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { maths::CMultivariateOneOfNPrior filter( makeOneOfN<2>(maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, sum(filter.weights()), 1e-6); filter.propagateForwardsByTime(1.0); CPPUNIT_ASSERT(equal(sum(filter.weights()), 1.0)); @@ -342,12 +332,12 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { { // Test that non-zero decay rate behaves as expected. - const std::size_t n[] = {4000, 6000}; + const TSizeVec n{4000, 6000}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); const double decayRates[] = {0.0008, 0.004, 0.02}; @@ -359,8 +349,8 @@ void CMultivariateOneOfNPriorTest::testWeightUpdate() { makeOneOfN<2>(maths_t::E_ContinuousData, decayRates[i])); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); filter.propagateForwardsByTime(1.0); } @@ -383,14 +373,14 @@ void CMultivariateOneOfNPriorTest::testModelUpdate() { maths::CSampling::CScopeMockRandomNumberGenerator scopeMockRng; - const std::size_t n[] = {400, 600}; + const TSizeVec n{400, 600}; const double means[][2] = {{10.0, 10.0}, {20.0, 20.0}}; const double covariances[][3] = {{8.0, 1.0, 8.0}, {20.0, -4.0, 10.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); const maths_t::EDataType dataTypes[] = {maths_t::E_IntegerData, maths_t::E_ContinuousData}; @@ -402,12 +392,11 @@ void CMultivariateOneOfNPriorTest::testModelUpdate() { makeMultimodal<2>(dataTypes[i]); maths::CMultivariateOneOfNPrior oneOfN(makeOneOfN<2>(dataTypes[i])); - normal.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); - multimodal.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); - oneOfN.addSamples(COUNT_WEIGHT, samples, - TDouble10Vec4Vec1Vec(samples.size(), UNIT_WEIGHT_2)); + maths_t::TDouble10VecWeightsAry1Vec weights( + samples.size(), maths_t::CUnitWeights::unit(2)); + normal.addSamples(samples, weights); + multimodal.addSamples(samples, weights); + oneOfN.addSamples(samples, weights); CPPUNIT_ASSERT_EQUAL(normal.checksum(), oneOfN.models()[0]->checksum()); CPPUNIT_ASSERT_EQUAL(multimodal.checksum(), oneOfN.models()[1]->checksum()); @@ -470,8 +459,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { TMeanAccumulator meanCovarianceError; for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); if (!filter.isNonInformative()) { TDouble10Vec m = filter.marginalLikelihoodMean(); @@ -579,8 +568,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihood() { maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } TDouble10Vec m = filter.marginalLikelihoodMean(); @@ -720,7 +709,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMean() { TMean2Accumulator expectedMean; for (std::size_t j = 0u; j < samples.size(); ++j) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[j]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[j]}, + maths_t::CUnitWeights::singleUnit(2)); expectedMean.add(TVector2(samples[j])); if (!filter.isNonInformative()) { @@ -770,7 +760,7 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { for (std::size_t i = 0u; i < boost::size(means); ++i) { for (std::size_t j = 0u; j < boost::size(covariances); ++j) { - std::size_t n[] = {100}; + const TSizeVec n{100}; const double mean[][2] = {{means[i][0], means[i][1]}}; const double covariance[][3] = { {covariances[i][0], covariances[i][1], covariances[i][2]}}; @@ -779,12 +769,12 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { << ", variance = " << covariance[0][0] << " ***"); TDouble10Vec1Vec samples; - gaussianSamples(rng, 1, n, mean, covariance, samples); + gaussianSamples(rng, n, mean, covariance, samples); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); for (std::size_t k = 0u; k < samples.size(); ++k) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[k]), - SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[k]}, + maths_t::CUnitWeights::singleUnit(2)); } CMinusLogLikelihood likelihood(filter); @@ -797,7 +787,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { maths::CVector(mean[0], mean[0] + 2), likelihood, gradientOfLikelihood, expectedMode, likelihoods); - TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, UNIT_WEIGHT_2); + TDouble10Vec mode = filter.marginalLikelihoodMode( + maths_t::CUnitWeights::unit(2)); LOG_DEBUG(<< "marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) << ", expectedMode = " << expectedMode); @@ -813,7 +804,7 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { { LOG_DEBUG(<< "****** Multimodal ******"); - const std::size_t n[] = {100, 100}; + const TSizeVec n{100, 100}; const double means[][2] = { {10.0, 10.0}, {16.0, 18.0}, @@ -824,11 +815,12 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { }; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } CMinusLogLikelihood likelihood(filter); @@ -841,7 +833,8 @@ void CMultivariateOneOfNPriorTest::testMarginalLikelihoodMode() { maths::CVector(means[0], means[0] + 2), likelihood, gradientOfLikelihood, expectedMode, likelihoods); - TDouble10Vec mode = filter.marginalLikelihoodMode(COUNT_WEIGHT, UNIT_WEIGHT_2); + TDouble10Vec mode = filter.marginalLikelihoodMode( + maths_t::CUnitWeights::unit(2)); LOG_DEBUG(<< "marginalLikelihoodMode = " << core::CContainerPrinter::print(mode) << ", expectedMode = " << expectedMode); @@ -863,7 +856,7 @@ void CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood() { test::CRandomNumbers rng; - const std::size_t n[] = {50, 50}; + const TSizeVec n{50, 50}; const double means[][2] = { {10.0, 10.0}, {25.0, 25.0}, @@ -874,13 +867,14 @@ void CMultivariateOneOfNPriorTest::testSampleMarginalLikelihood() { }; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); for (std::size_t i = 0u; i < samples.size(); ++i) { - filter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + filter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); if (!filter.isNonInformative()) { TDoubleVec weights = filter.weights(); @@ -927,7 +921,7 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { test::CRandomNumbers rng; - const std::size_t n[] = {100, 100}; + const TSizeVec n{100, 100}; const double means[][2] = { {10.0, 10.0}, {16.0, 18.0}, @@ -938,7 +932,7 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { }; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, means, covariances, samples); + gaussianSamples(rng, n, means, covariances, samples); rng.random_shuffle(samples.begin(), samples.end()); maths::CMultivariateOneOfNPrior filter(makeOneOfN<2>(maths_t::E_ContinuousData)); @@ -947,12 +941,12 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { for (std::size_t i = 0u; i < samples.size(); ++i) { TDouble10Vec1Vec sample(1, samples[i]); - filter.addSamples(COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2); + filter.addSamples(sample, maths_t::CUnitWeights::singleUnit(2)); double lowerBound, upperBound; maths::CMultivariatePrior::TTail10Vec tail; CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, + maths_t::E_TwoSided, sample, maths_t::CUnitWeights::singleUnit(2), lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); @@ -966,7 +960,8 @@ void CMultivariateOneOfNPriorTest::testProbabilityOfLessLikelySamples() { double modelLowerBound, modelUpperBound; double weight = weights[j]; CPPUNIT_ASSERT(models[j]->probabilityOfLessLikelySamples( - maths_t::E_TwoSided, COUNT_WEIGHT, sample, SINGLE_UNIT_WEIGHT_2, + maths_t::E_TwoSided, sample, + maths_t::CUnitWeights::singleUnit(2), modelLowerBound, modelUpperBound, tail)); CPPUNIT_ASSERT_EQUAL(modelLowerBound, modelUpperBound); double modelProbability = (modelLowerBound + modelUpperBound) / 2.0; @@ -992,20 +987,21 @@ void CMultivariateOneOfNPriorTest::testPersist() { // Check that persist/restore is idempotent. - const std::size_t n[] = {100}; + const TSizeVec n{100}; const double mean[][2] = {{10.0, 20.0}}; const double covariance[][3] = {{3.0, 1.0, 2.0}}; test::CRandomNumbers rng; TDouble10Vec1Vec samples; - gaussianSamples(rng, boost::size(n), n, mean, covariance, samples); + gaussianSamples(rng, n, mean, covariance, samples); maths_t::EDataType dataType = maths_t::E_ContinuousData; maths::CMultivariateOneOfNPrior origFilter(makeOneOfN<2>(dataType)); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(COUNT_WEIGHT, TDouble10Vec1Vec(1, samples[i]), SINGLE_UNIT_WEIGHT_2); + origFilter.addSamples({samples[i]}, + maths_t::CUnitWeights::singleUnit(2)); } std::size_t dimension = origFilter.dimension(); double decayRate = origFilter.decayRate(); diff --git a/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc b/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc index b4bb54d0fd..df90f4caa5 100644 --- a/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc +++ b/lib/maths/unittest/CNormalMeanPrecConjugateTest.cc @@ -51,6 +51,7 @@ using TDoubleDoublePrVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; using CNormalMeanPrecConjugate = CPriorTestInterfaceMixin; +using TWeightFunc = maths_t::TDoubleWeightsAry (*)(double); CNormalMeanPrecConjugate makePrior(maths_t::EDataType dataType = maths_t::E_ContinuousData, const double& decayRate = 0.0) { @@ -100,13 +101,12 @@ void CNormalMeanPrecConjugateTest::testMultipleUpdate() { CNormalMeanPrecConjugate filter1(makePrior(dataTypes[i])); CNormalMeanPrecConjugate filter2(filter1); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); + maths_t::TDoubleWeightsAry1Vec weights; + weights.resize(samples.size(), maths_t::countVarianceScaleWeight(2.0)); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter1.addSamples(weightStyle, TDouble1Vec(1, samples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + filter1.addSamples({samples[j]}, {weights[j]}); } - filter2.addSamples(weightStyle, samples, - TDouble4Vec1Vec(samples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(samples, weights); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -126,9 +126,7 @@ void CNormalMeanPrecConjugateTest::testMultipleUpdate() { for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(TDouble1Vec(1, x)); } - filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, x), - TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); + filter2.addSamples({x}, {maths_t::countWeight(static_cast(count))}); TEqual equal(maths::CToleranceTypes::E_AbsoluteTolerance, 1e-5); CPPUNIT_ASSERT(filter1.equalTolerance(filter2, equal)); @@ -319,17 +317,14 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { rng.generateNormalSamples(mean, variance, 200, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight}; - double weights[] = {0.1, 1.0, 10.0}; + TWeightFunc weightsFuncs[]{static_cast(maths_t::countWeight), + static_cast(maths_t::winsorisationWeight)}; + double weights[]{0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t i = 0u; i < boost::size(weightsFuncs); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf( - maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 1000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); + filter.minusLogJointCdf({1000.0}, {weightsFuncs[i](weights[j])}, lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -490,9 +485,7 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihood() { double q2 = boost::math::quantile( scaledNormal, (50.0 + percentages[j] / 2.0) / 100.0); TDoubleDoublePr interval = filter.marginalLikelihoodConfidenceInterval( - percentages[j], - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble4Vec(1, vs)); + percentages[j], maths_t::countVarianceScaleWeight(vs)); LOG_DEBUG(<< "[q1, q2] = [" << q1 << ", " << q2 << "]" << ", interval = " << core::CContainerPrinter::print(interval)); CPPUNIT_ASSERT_DOUBLES_EQUAL(q1, interval.first, 0.3); @@ -590,34 +583,28 @@ void CNormalMeanPrecConjugateTest::testMarginalLikelihoodMode() { rng.generateNormalSamples(means[i], variances[j], 1000, samples); filter.addSamples(samples); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[i]; - weight[0] = vs; + maths_t::setCountVarianceScale(vs, weight); boost::math::normal_distribution<> scaledNormal( means[i], std::sqrt(vs * variances[j])); double expectedMode = boost::math::mode(scaledNormal); - LOG_DEBUG(<< "marginalLikelihoodMode = " - << filter.marginalLikelihoodMode(weightStyle, weight) + LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weight) << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), - 0.12 * std::sqrt(variances[j])); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, + filter.marginalLikelihoodMode(weight), + 0.12 * std::sqrt(variances[j])); } } } } void CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance() { - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); - LOG_DEBUG(<< "| " - "CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance " - " |"); - LOG_DEBUG(<< "+------------------------------------------------------------" - "----+"); + LOG_DEBUG(<< "+----------------------------------------------------------------+"); + LOG_DEBUG(<< "| CNormalMeanPrecConjugateTest::testMarginalLikelihoodVariance |"); + LOG_DEBUG(<< "+----------------------------------------------------------------+"); // Test that the expectation of the residual from the mean for // the marginal likelihood matches the expected variance of the @@ -865,11 +852,9 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { meanError.add(std::fabs(px - (lb + ub) / 2.0)); } - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode( + maths_t::countVarianceScaleWeight(vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -879,42 +864,52 @@ void CNormalMeanPrecConjugateTest::testProbabilityOfLessLikelySamples() { { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[0]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, - TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[1]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -1209,9 +1204,7 @@ void CNormalMeanPrecConjugateTest::testPersist() { maths::CNormalMeanPrecConjugate origFilter(makePrior()); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); @@ -1275,9 +1268,7 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { rng.generateNormalSamples(means[i], variances[j], 100, samples); double varianceScales[] = {0.2, 0.5, 1.0, 2.0, 5.0}; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleSeasonalVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); - TDouble4Vec1Vec weights(1, weight); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); double m; double v; @@ -1300,37 +1291,32 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { for (std::size_t k = 0u; k < boost::size(varianceScales); ++k) { double vs = varianceScales[k]; - weight[0] = vs; - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); LOG_DEBUG(<< "*** variance scale = " << vs << " ***"); double Z; - filter.expectation(C1dUnitKernel(), 50, Z, weightStyle, weight); + filter.expectation(C1dUnitKernel(), 50, Z, weight); LOG_DEBUG(<< "Z = " << Z); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, Z, 1e-3); - LOG_DEBUG(<< "sv = " - << filter.marginalLikelihoodVariance(weightStyle, weight)); + LOG_DEBUG(<< "sv = " << filter.marginalLikelihoodVariance(weight)); double expectationVariance; filter.expectation(CVarianceKernel(filter.marginalLikelihoodMean()), - 100, expectationVariance, weightStyle, weight); + 100, expectationVariance, weight); LOG_DEBUG(<< "expectationVariance = " << expectationVariance); CPPUNIT_ASSERT_DOUBLES_EQUAL( vs * unscaledExpectationVariance, expectationVariance, 0.01 * vs * unscaledExpectationVariance); CPPUNIT_ASSERT_DOUBLES_EQUAL( - filter.marginalLikelihoodVariance(weightStyle, weight), expectationVariance, - 0.01 * filter.marginalLikelihoodVariance(weightStyle, weight)); + filter.marginalLikelihoodVariance(weight), expectationVariance, + 0.01 * filter.marginalLikelihoodVariance(weight)); - double mode = filter.marginalLikelihoodMode(weightStyle, weight); + double mode = filter.marginalLikelihoodMode(weight); double fm; double fmMinusEps, fmPlusEps; - filter.jointLogMarginalLikelihood( - weightStyle, TDouble1Vec(1, mode - 1e-3), weights, fmMinusEps); - filter.jointLogMarginalLikelihood( - weightStyle, TDouble1Vec(1, mode), weights, fm); - filter.jointLogMarginalLikelihood( - weightStyle, TDouble1Vec(1, mode + 1e-3), weights, fmPlusEps); + filter.jointLogMarginalLikelihood({mode - 1e-3}, {weight}, fmMinusEps); + filter.jointLogMarginalLikelihood({mode}, {weight}, fm); + filter.jointLogMarginalLikelihood({mode + 1e-3}, {weight}, fmPlusEps); LOG_DEBUG(<< "log(f(mode)) = " << fm << ", log(f(mode - eps)) = " << fmMinusEps << ", log(f(mode + eps)) = " << fmPlusEps); CPPUNIT_ASSERT(fm > fmMinusEps); @@ -1341,13 +1327,13 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { for (std::size_t l = 0u; l < boost::size(points); ++l) { TDouble1Vec x(1, points[l]); double fx; - filter.jointLogMarginalLikelihood(weightStyle, x, weights, fx); + filter.jointLogMarginalLikelihood(x, {weight}, fx); TDouble1Vec xMinusEps(1, points[l] - 1e-3); TDouble1Vec xPlusEps(1, points[l] + 1e-3); double lb, ub; - filter.minusLogJointCdf(weightStyle, xPlusEps, weights, lb, ub); + filter.minusLogJointCdf(xPlusEps, {weight}, lb, ub); double FxPlusEps = std::exp(-(lb + ub) / 2.0); - filter.minusLogJointCdf(weightStyle, xMinusEps, weights, lb, ub); + filter.minusLogJointCdf(xMinusEps, {weight}, lb, ub); double FxMinusEps = std::exp(-(lb + ub) / 2.0); LOG_DEBUG(<< "x = " << points[l] << ", log(f(x)) = " << fx << ", F(x - eps) = " << FxMinusEps @@ -1358,22 +1344,22 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { 0.05 * std::fabs(fx)); sample[0] = m + (points[l] - m) / std::sqrt(vs); - weights[0][0] = 1.0; + maths_t::setSeasonalVarianceScale(1.0, weight); double expectedLowerBound; double expectedUpperBound; maths_t::ETail expectedTail; filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, sample, weights, + maths_t::E_TwoSided, sample, {weight}, expectedLowerBound, expectedUpperBound, expectedTail); sample[0] = points[l]; - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); double lowerBound; double upperBound; maths_t::ETail tail; - filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, sample, weights, - lowerBound, upperBound, tail); + filter.probabilityOfLessLikelySamples(maths_t::E_TwoSided, + sample, {weight}, lowerBound, + upperBound, tail); LOG_DEBUG(<< "expectedLowerBound = " << expectedLowerBound); LOG_DEBUG(<< "lowerBound = " << lowerBound); @@ -1405,9 +1391,9 @@ void CNormalMeanPrecConjugateTest::testSeasonalVarianceScale() { rng.random_shuffle(samples.begin(), samples.end()); CNormalMeanPrecConjugate filter(makePrior()); - weights[0][0] = vs; + maths_t::setSeasonalVarianceScale(vs, weight); for (std::size_t l = 0u; l < samples.size(); ++l) { - filter.addSamples(weightStyle, TDouble1Vec(1, samples[l]), weights); + filter.addSamples({samples[l]}, {weight}); } double sm = filter.marginalLikelihoodMean(); @@ -1516,10 +1502,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { double lowerBound, upperBound; maths_t::ETail tail; CPPUNIT_ASSERT(filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, scaledSamples[k]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[j])), + maths_t::E_TwoSided, {scaledSamples[k]}, + {maths_t::countVarianceScaleWeight(varianceScales[j])}, lowerBound, upperBound, tail)); CPPUNIT_ASSERT_EQUAL(lowerBound, upperBound); double probability = (lowerBound + upperBound) / 2.0; @@ -1583,9 +1567,8 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { CPPUNIT_ASSERT_EQUAL( maths_t::E_FpNoErrors, filter.jointLogMarginalLikelihood( - maths_t::TWeightStyleVec(1, maths_t::E_SampleCountVarianceScaleWeight), - TDouble1Vec(1, scaledSamples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, varianceScales[i])), logLikelihood)); + {scaledSamples[j]}, + {maths_t::countVarianceScaleWeight(varianceScales[i])}, logLikelihood)); differentialEntropy -= logLikelihood; } @@ -1609,7 +1592,6 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { 85.0, 90.0, 95.0, 99.0}; unsigned int errors[] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); double variances[] = {1.0, 5.0}; double precision = 1 / variances[0]; @@ -1619,8 +1601,9 @@ void CNormalMeanPrecConjugateTest::testCountVarianceScale() { for (std::size_t i = 0u; i < boost::size(variances); ++i) { TDoubleVec samples; rng.generateNormalSamples(0.0, variances[i], 1000, samples); - TDouble4Vec1Vec weights(samples.size(), TDouble4Vec(1, variances[i])); - filter.addSamples(weightStyle, samples, weights); + filter.addSamples(samples, maths_t::TDoubleWeightsAry1Vec( + samples.size(), maths_t::countVarianceScaleWeight( + variances[i]))); } for (std::size_t i = 0; i < boost::size(testIntervals); ++i) { diff --git a/lib/maths/unittest/COneOfNPriorTest.cc b/lib/maths/unittest/COneOfNPriorTest.cc index 192c26931f..4560a98aa8 100644 --- a/lib/maths/unittest/COneOfNPriorTest.cc +++ b/lib/maths/unittest/COneOfNPriorTest.cc @@ -66,6 +66,7 @@ using CMultimodalPrior = CPriorTestInterfaceMixin; using CNormalMeanPrecConjugate = CPriorTestInterfaceMixin; using COneOfNPrior = CPriorTestInterfaceMixin; using CPoissonMeanConjugate = CPriorTestInterfaceMixin; +using TWeightFunc = maths_t::TDoubleWeightsAry (*)(double); COneOfNPrior::TPriorPtrVec clone(const TPriorPtrVec& models, const TOptionalDouble& decayRate = TOptionalDouble()) { @@ -220,9 +221,7 @@ void COneOfNPriorTest::testMultipleUpdate() { for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(TDouble1Vec(1, x)); } - filter2.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, x), - TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); + filter2.addSamples({x}, {maths_t::countWeight(static_cast(count))}); CPPUNIT_ASSERT_EQUAL(filter1.checksum(), filter2.checksum()); } @@ -581,17 +580,14 @@ void COneOfNPriorTest::testMarginalLikelihood() { rng.generateLogNormalSamples(location, squareScale, 10, samples); filter.addSamples(samples); - maths_t::ESampleWeightStyle weightStyles[] = { - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight}; - double weights[] = {0.1, 1.0, 10.0}; + TWeightFunc weightsFuncs[]{static_cast(maths_t::countWeight), + static_cast(maths_t::winsorisationWeight)}; + double weights[]{0.1, 1.0, 10.0}; - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (std::size_t i = 0u; i < boost::size(weightsFuncs); ++i) { for (std::size_t j = 0u; j < boost::size(weights); ++j) { double lb, ub; - filter.minusLogJointCdf( - maths_t::TWeightStyleVec(1, weightStyles[i]), TDouble1Vec(1, 10000.0), - TDouble4Vec1Vec(1, TDouble4Vec(1, weights[j])), lb, ub); + filter.minusLogJointCdf({10000.0}, {weightsFuncs[i](weights[j])}, lb, ub); LOG_DEBUG(<< "-log(c.d.f) = " << (lb + ub) / 2.0); CPPUNIT_ASSERT(lb >= 0.0); CPPUNIT_ASSERT(ub >= 0.0); @@ -1169,9 +1165,8 @@ void COneOfNPriorTest::testProbabilityOfLessLikelySamples() { for (std::size_t j = 0u; j < weights.size(); ++j) { double weight = weights[j]; CPPUNIT_ASSERT(models[j]->probabilityOfLessLikelySamples( - maths_t::E_TwoSided, maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, sample[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0)), lb, ub, tail)); + maths_t::E_TwoSided, {sample[0]}, + maths_t::CUnitWeights::SINGLE_UNIT, lb, ub, tail)); CPPUNIT_ASSERT_EQUAL(lb, ub); double modelProbability = (lb + ub) / 2.0; expectedProbability += weight * modelProbability; @@ -1183,51 +1178,61 @@ void COneOfNPriorTest::testProbabilityOfLessLikelySamples() { CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedProbability, probability, 1e-3 * std::max(expectedProbability, probability)); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; ((i + 1) % 11 == 0) && k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode( + maths_t::countVarianceScaleWeight(vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[0]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[1]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -1259,9 +1264,7 @@ void COneOfNPriorTest::testPersist() { maths::COneOfNPrior origFilter(clone(models), E_IntegerData); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); diff --git a/lib/maths/unittest/COrderingsTest.cc b/lib/maths/unittest/COrderingsTest.cc index 199cbc7fc4..fd5ce8ea8e 100644 --- a/lib/maths/unittest/COrderingsTest.cc +++ b/lib/maths/unittest/COrderingsTest.cc @@ -610,8 +610,7 @@ void COrderingsTest::testSimultaneousSort() { std::string expectedKeys("[0.2, 0.7, 1, 1.1, 5, 7]"); std::string expectedValues1("[~, ;, q, e, y, w]"); - std::string expectedValues2("[(1.3, 1.9), (1.2, 10.1), (2.1, 1.1), " - "(3.2, 12.9), (1.3, 6.2), (2, 1)]"); + std::string expectedValues2("[(1.3, 1.9), (1.2, 10.1), (2.1, 1.1), (3.2, 12.9), (1.3, 6.2), (2, 1)]"); maths::COrderings::simultaneousSort(keys, values1, values2); LOG_DEBUG(<< "keys = " << core::CContainerPrinter::print(keys)); @@ -641,9 +640,7 @@ void COrderingsTest::testSimultaneousSort() { LOG_DEBUG(<< "values3 = " << core::CContainerPrinter::print(values3)); std::string expectedKeys("[0.1, 0.7, 0.9, 1.4, 4, 5.1, 7.1, 80]"); std::string expectedValues1("[23, ;;, ~1, b4, pq, zz, a1, sss]"); - std::string expectedValues2("[(4.1, 1.1), (2.2, 1.1), (5.3, 3.9), " - "(7.2, 22.9), (10.3, 13.2), (0.3, 16.2), " - "(1, 1), (21.2, 11.1)]"); + std::string expectedValues2("[(4.1, 1.1), (2.2, 1.1), (5.3, 3.9), (7.2, 22.9), (10.3, 13.2), (0.3, 16.2), (1, 1), (21.2, 11.1)]"); maths::COrderings::simultaneousSort(keys, values1, values2, values3); LOG_DEBUG(<< "keys = " << core::CContainerPrinter::print(keys)); diff --git a/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc b/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc index 83be61b488..fd92c23fcc 100644 --- a/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc +++ b/lib/maths/unittest/CPeriodicityHypothesisTestsTest.cc @@ -325,8 +325,7 @@ void CPeriodicityHypothesisTestsTest::testDiurnal() { if (time > lastTest + window) { maths::CPeriodicityHypothesisTestsResult result{hypotheses.test()}; CPPUNIT_ASSERT(result.print() == "{ 'weekend daily' 'weekday daily' }" || - result.print() == "{ 'weekend daily' 'weekday daily' " - "'weekend weekly' 'weekday weekly' }"); + result.print() == "{ 'weekend daily' 'weekday daily' 'weekend weekly' 'weekday weekly' }"); hypotheses = maths::CPeriodicityHypothesisTests(); hypotheses.initialize(HOUR, window, DAY); lastTest += window; diff --git a/lib/maths/unittest/CPoissonMeanConjugateTest.cc b/lib/maths/unittest/CPoissonMeanConjugateTest.cc index 5f71b0fc33..6efe84968f 100644 --- a/lib/maths/unittest/CPoissonMeanConjugateTest.cc +++ b/lib/maths/unittest/CPoissonMeanConjugateTest.cc @@ -92,13 +92,12 @@ void CPoissonMeanConjugateTest::testMultipleUpdate() { CPoissonMeanConjugate filter1(CPoissonMeanConjugate::nonInformativePrior()); CPoissonMeanConjugate filter2(filter1); - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); + maths_t::TDoubleWeightsAry1Vec weights; + weights.resize(samples.size(), maths_t::countVarianceScaleWeight(2.0)); for (std::size_t j = 0u; j < samples.size(); ++j) { - filter1.addSamples(weightStyle, TDouble1Vec(1, samples[j]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 2.0))); + filter1.addSamples({samples[j]}, {weights[j]}); } - filter2.addSamples(weightStyle, samples, - TDouble4Vec1Vec(samples.size(), TDouble4Vec(1, 2.0))); + filter2.addSamples(samples, weights); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -118,8 +117,7 @@ void CPoissonMeanConjugateTest::testMultipleUpdate() { for (std::size_t j = 0u; j < count; ++j) { filter1.addSamples(TDouble1Vec(1, x)); } - filter2.addSamples(maths::CConstantWeights::COUNT, TDouble1Vec(1, x), - TDouble4Vec1Vec(1, TDouble4Vec(1, static_cast(count)))); + filter2.addSamples({x}, {maths_t::countWeight(10.0)}); LOG_DEBUG(<< filter1.print()); LOG_DEBUG(<< "vs"); @@ -389,18 +387,16 @@ void CPoissonMeanConjugateTest::testMarginalLikelihoodMode() { filter.addSamples(TDouble1Vec(1, static_cast(samples[j]))); } - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - TDouble4Vec weight(1, 1.0); + maths_t::TDoubleWeightsAry weight(maths_t::CUnitWeights::UNIT); for (std::size_t j = 0u; j < boost::size(varianceScales); ++j) { double vs = varianceScales[j]; - weight[0] = vs; + maths_t::setCountVarianceScale(vs, weight); double expectedMode = boost::math::mode(poisson); - LOG_DEBUG(<< "marginalLikelihoodMode = " - << filter.marginalLikelihoodMode(weightStyle, weight) + LOG_DEBUG(<< "marginalLikelihoodMode = " << filter.marginalLikelihoodMode(weight) << ", expectedMode = " << expectedMode); - CPPUNIT_ASSERT_DOUBLES_EQUAL( - expectedMode, filter.marginalLikelihoodMode(weightStyle, weight), 1.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedMode, + filter.marginalLikelihoodMode(weight), 1.0); } } } @@ -655,11 +651,9 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() { meanError.add(std::fabs(px - (lb + ub) / 2.0)); } - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleCountVarianceScaleWeight); - for (std::size_t k = 0u; k < boost::size(vs); ++k) { - double mode = filter.marginalLikelihoodMode(weightStyle, - TDouble4Vec(1, vs[k])); + double mode = filter.marginalLikelihoodMode( + maths_t::countVarianceScaleWeight(vs[k])); double ss[] = {0.9 * mode, 1.1 * mode}; LOG_DEBUG(<< "vs = " << vs[k] << ", mode = " << mode); @@ -669,40 +663,52 @@ void CPoissonMeanConjugateTest::testProbabilityOfLessLikelySamples() { if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[0]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[0]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } if (mode > 0.0) { filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(1, ss[1]), - TDouble4Vec1Vec(1, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, {ss[1]}, + {maths_t::countVarianceScaleWeight(vs[k])}, lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_TwoSided, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_MixedOrNeitherTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedBelow, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedBelow, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_LeftTail, tail); filter.probabilityOfLessLikelySamples( - maths_t::E_OneSidedAbove, weightStyle, TDouble1Vec(ss, ss + 2), - TDouble4Vec1Vec(2, TDouble4Vec(1, vs[k])), lb, ub, tail); + maths_t::E_OneSidedAbove, TDouble1Vec(ss, ss + 2), + maths_t::TDoubleWeightsAry1Vec( + 2, maths_t::countVarianceScaleWeight(vs[k])), + lb, ub, tail); CPPUNIT_ASSERT_EQUAL(maths_t::E_RightTail, tail); } } @@ -911,9 +917,7 @@ void CPoissonMeanConjugateTest::testPersist() { maths::CPoissonMeanConjugate origFilter(CPoissonMeanConjugate::nonInformativePrior()); for (std::size_t i = 0u; i < samples.size(); ++i) { - origFilter.addSamples(maths_t::TWeightStyleVec(1, maths_t::E_SampleCountWeight), - TDouble1Vec(1, samples[i]), - TDouble4Vec1Vec(1, TDouble4Vec(1, 1.0))); + origFilter.addSamples({samples[i]}, maths_t::CUnitWeights::SINGLE_UNIT); } double decayRate = origFilter.decayRate(); uint64_t checksum = origFilter.checksum(); diff --git a/lib/maths/unittest/CPriorTest.cc b/lib/maths/unittest/CPriorTest.cc index 694689c401..3154231ae0 100644 --- a/lib/maths/unittest/CPriorTest.cc +++ b/lib/maths/unittest/CPriorTest.cc @@ -66,22 +66,19 @@ class CMinusLogLikelihood { public: CMinusLogLikelihood(const maths::CPrior& prior) - : m_Prior(&prior), m_WeightStyle(1, maths_t::E_SampleCountWeight), - m_X(1, 0.0), m_Weight(1, TDoubleVec(1, 1.0)) {} + : m_Prior(&prior), m_X(1, 0.0) {} bool operator()(const double& x, double& result) const { m_X[0] = x; - maths_t::EFloatingPointErrorStatus status = - m_Prior->jointLogMarginalLikelihood(m_WeightStyle, m_X, m_Weight, result); + maths_t::EFloatingPointErrorStatus status = m_Prior->jointLogMarginalLikelihood( + m_X, maths_t::CUnitWeights::SINGLE_UNIT, result); result = -result; return !(status & maths_t::E_FpFailed); } private: const maths::CPrior* m_Prior; - maths_t::TWeightStyleVec m_WeightStyle; mutable TDoubleVec m_X; - TDoubleVecVec m_Weight; }; } @@ -91,11 +88,10 @@ void CPriorTest::testExpectation() { LOG_DEBUG(<< "+-------------------------------+"); using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar::TAccumulator; - using CNormalMeanPrecConjugate = CPriorTestInterfaceMixin; test::CRandomNumbers rng; - CNormalMeanPrecConjugate prior( + maths::CNormalMeanPrecConjugate prior( maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)); TDoubleVec samples; @@ -103,7 +99,8 @@ void CPriorTest::testExpectation() { TMeanVarAccumulator moments; moments.add(samples); - prior.addSamples(samples); + prior.addSamples(samples, maths_t::TDoubleWeightsAry1Vec( + samples.size(), maths_t::CUnitWeights::UNIT)); double trueMean = maths::CBasicStatistics::mean(moments); LOG_DEBUG(<< "true mean = " << trueMean); diff --git a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc index c2472af2fb..ba08a52e7d 100644 --- a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc +++ b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc @@ -1048,9 +1048,8 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { if (decomposition.addPoint(time, value)) { model.setToNonInformative(0.0, 0.01); } - model.addSamples(maths_t::TWeightStyleVec{maths_t::E_SampleCountWeight}, - TDoubleVec{decomposition.detrend(time, value, 70.0)}, - TDoubleVecVec{TDoubleVec(1, 1.0)}); + model.addSamples({decomposition.detrend(time, value, 70.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); @@ -1079,11 +1078,9 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { double lb, ub; maths_t::ETail tail; model.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec{maths_t::E_SampleSeasonalVarianceScaleWeight}, - TDoubleVec{decomposition.detrend(time, value, 70.0)}, - TDoubleVecVec{TDoubleVec{ - std::max(decomposition.scale(time, variance, 70.0).second, 0.25)}}, + maths_t::E_TwoSided, {decomposition.detrend(time, value, 70.0)}, + {maths_t::seasonalVarianceScaleWeight( + std::max(decomposition.scale(time, variance, 70.0).second, 0.25))}, lb, ub, tail); double pScaled = (lb + ub) / 2.0; pMinScaled = std::min(pMinScaled, pScaled); @@ -1095,10 +1092,8 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { //probs.push_back(-std::log(pScaled)); model.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, - maths_t::TWeightStyleVec(1, maths_t::E_SampleSeasonalVarianceScaleWeight), - TDoubleVec(1, decomposition.detrend(time, value, 70.0)), - TDoubleVecVec(1, TDoubleVec(1, 1.0)), lb, ub, tail); + maths_t::E_TwoSided, {decomposition.detrend(time, value, 70.0)}, + maths_t::CUnitWeights::SINGLE_UNIT, lb, ub, tail); double pUnscaled = (lb + ub) / 2.0; pMinUnscaled = std::min(pMinUnscaled, pUnscaled); } @@ -2219,17 +2214,13 @@ void CTimeSeriesDecompositionTest::testUpgrade() { LOG_DEBUG(<< "Saved state size = " << xml.size()); std::string values; - load("testfiles/" - "CTimeSeriesDecomposition.6.2.seasonal.expected_values.txt", - values); + load("testfiles/CTimeSeriesDecomposition.6.2.seasonal.expected_values.txt", values); LOG_DEBUG(<< "Expected values size = " << values.size()); TStrVec expectedValues; core::CStringUtils::tokenise(";", values, expectedValues, empty); std::string scales; - load("testfiles/" - "CTimeSeriesDecomposition.6.2.seasonal.expected_scales.txt", - scales); + load("testfiles/CTimeSeriesDecomposition.6.2.seasonal.expected_scales.txt", scales); LOG_DEBUG(<< "Expected scales size = " << scales.size()); TStrVec expectedScales; core::CStringUtils::tokenise(";", scales, expectedScales, empty); @@ -2276,24 +2267,18 @@ void CTimeSeriesDecompositionTest::testUpgrade() { LOG_DEBUG(<< "*** Trend and Seasonal Components ***"); { std::string xml; - load("testfiles/" - "CTimeSeriesDecomposition.6.2.trend_and_seasonal.state.xml", - xml); + load("testfiles/CTimeSeriesDecomposition.6.2.trend_and_seasonal.state.xml", xml); LOG_DEBUG(<< "Saved state size = " << xml.size()); std::string values; - load("testfiles/" - "CTimeSeriesDecomposition.6.2.trend_and_seasonal.expected_values." - "txt", + load("testfiles/CTimeSeriesDecomposition.6.2.trend_and_seasonal.expected_values.txt", values); LOG_DEBUG(<< "Expected values size = " << values.size()); TStrVec expectedValues; core::CStringUtils::tokenise(";", values, expectedValues, empty); std::string scales; - load("testfiles/" - "CTimeSeriesDecomposition.6.2.trend_and_seasonal.expected_scales." - "txt", + load("testfiles/CTimeSeriesDecomposition.6.2.trend_and_seasonal.expected_scales.txt", scales); LOG_DEBUG(<< "Expected scales size = " << scales.size()); TStrVec expectedScales; diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index 1e4b79d08c..11df64f1f4 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -45,16 +45,14 @@ using TBool2Vec = core::CSmallVector; using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; using TDouble1Vec = core::CSmallVector; +using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TDouble2Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; +using TDouble2VecWeightsAry = maths_t::TDouble2VecWeightsAry; +using TDouble2VecWeightsAryVec = std::vector; using TDouble10Vec1Vec = core::CSmallVector; -using TDouble10Vec4Vec = core::CSmallVector; -using TDouble2Vec4VecVec = std::vector; -using TDouble10Vec4Vec1Vec = core::CSmallVector; +using TDouble10VecWeightsAry1Vec = maths_t::TDouble10VecWeightsAry1Vec; using TSize1Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; using TTime2Vec1Vec = core::CSmallVector; @@ -66,6 +64,7 @@ using TMeanAccumulator2Vec = core::CSmallVector; using TDecompositionPtr = std::shared_ptr; using TDecompositionPtr10Vec = core::CSmallVector; using TDecayRateController2Ary = maths::CUnivariateTimeSeriesModel::TDecayRateController2Ary; +using TSetWeightsFunc = void (*)(double, std::size_t, TDouble2VecWeightsAry&); const double MINIMUM_SEASONAL_SCALE{0.25}; const double MINIMUM_SIGNIFICANT_CORRELATION{0.4}; @@ -161,8 +160,8 @@ void reinitializePrior(double learnRate, for (std::size_t i = 0u; i < value.second.size(); ++i) { detrended_[0][i] = trends[i]->detrend(value.first, value.second[i], 0.0); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended_, - {{TDouble10Vec(value.second.size(), learnRate)}}); + prior.addSamples(detrended_, + {maths_t::countWeight(learnRate, value.second.size())}); } if (controllers) { for (auto& trend : trends) { @@ -196,16 +195,11 @@ void CTimeSeriesModelTest::testClone() { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - TDouble2Vec4Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } @@ -228,15 +222,11 @@ void CTimeSeriesModelTest::testClone() { TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -277,28 +267,22 @@ void CTimeSeriesModelTest::testMode() { for (auto sample : samples) { trend.addPoint(time, sample); TDouble1Vec sample_{trend.detrend(time, sample, 0.0)}; - prior.addSamples(maths::CConstantWeights::COUNT, sample_, - maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples(sample_, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; } - TDouble2Vec4Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; time = 0; for (auto sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } double expectedMode{maths::CBasicStatistics::mean(trend.baseline(time)) + prior.marginalLikelihoodMode()}; - TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, weight)); + TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(1))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode[0]); @@ -311,7 +295,6 @@ void CTimeSeriesModelTest::testMode() { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - double learnRate{params(bucketLength).learnRate()}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; maths::CUnivariateTimeSeriesModel model{params(bucketLength), 0, trend, prior}; @@ -324,35 +307,28 @@ void CTimeSeriesModelTest::testMode() { time += bucketLength; } - TDouble2Vec4Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; time = 0; for (auto sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); if (trend.addPoint(time, sample)) { prior.setToNonInformative(0.0, DECAY_RATE); for (const auto& value : model.slidingWindow()) { - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(value.first, value.second, 0.0)}, - {{learnRate}}); + prior.addSamples({trend.detrend(value.first, value.second, 0.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); } } TDouble1Vec sample_{trend.detrend(time, sample, 0.0)}; - prior.addSamples(maths::CConstantWeights::COUNT, sample_, - maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples(sample_, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; } double expectedMode{maths::CBasicStatistics::mean(trend.baseline(time)) + prior.marginalLikelihoodMode()}; - TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, weight)); + TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(1))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode[0]); @@ -381,30 +357,25 @@ void CTimeSeriesModelTest::testMode() { trends[i]->addPoint(time, sample[i]); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended, - maths::CConstantWeights::singleUnit(3)); + prior.addSamples(detrended, + maths_t::CUnitWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); } - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; time = 0; for (const auto& sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } TDouble2Vec expectedMode(prior.marginalLikelihoodMode( - maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); + maths_t::CUnitWeights::unit(3))); for (std::size_t i = 0u; i < trends.size(); ++i) { expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->baseline(time)); } - TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(3))); + TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(3))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode); @@ -443,15 +414,11 @@ void CTimeSeriesModelTest::testMode() { time += bucketLength; } - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; time = 0; for (const auto& sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); bool reinitialize{false}; @@ -463,19 +430,18 @@ void CTimeSeriesModelTest::testMode() { if (reinitialize) { reinitializePrior(learnRate, model, trends, prior); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended, - maths::CConstantWeights::singleUnit(3)); + prior.addSamples(detrended, + maths_t::CUnitWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); time += bucketLength; } TDouble2Vec expectedMode(prior.marginalLikelihoodMode( - maths::CConstantWeights::COUNT, maths::CConstantWeights::unit(3))); + maths_t::CUnitWeights::unit(3))); for (std::size_t i = 0u; i < trends.size(); ++i) { expectedMode[i] += maths::CBasicStatistics::mean(trends[i]->baseline(time)); } - TDouble2Vec mode(model.mode(time, maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(3))); + TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(3))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode); @@ -503,24 +469,22 @@ void CTimeSeriesModelTest::testAddBucketValue() { core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1}, TAG), - core::make_triple(core_t::TTime{12}, TDouble2Vec{1.2}, TAG), - }; - TDouble2Vec4VecVec weights{{{1.0}}, {{1.5}}, {{0.9}}, {{1.9}}}; + core::make_triple(core_t::TTime{12}, TDouble2Vec{1.2}, TAG)}; + TDoubleVec weights{1.0, 1.5, 0.9, 1.9}; + TDouble2VecWeightsAryVec modelWeights{ + maths_t::countWeight(TDouble2Vec{weights[0]}), + maths_t::countWeight(TDouble2Vec{weights[1]}), + maths_t::countWeight(TDouble2Vec{weights[2]}), + maths_t::countWeight(TDouble2Vec{weights[3]})}; for (std::size_t i = 0u; i < samples.size(); ++i) { - prior.addSamples(maths::CConstantWeights::COUNT, {samples[i].second[0]}, - {{weights[i][0][0]}}); + prior.addSamples({samples[i].second[0]}, {maths_t::countWeight(weights[i])}); } prior.propagateForwardsByTime(1.0); - prior.adjustOffset(maths::CConstantWeights::COUNT, {-1.0}, - maths::CConstantWeights::SINGLE_UNIT); + prior.adjustOffset({-1.0}, maths_t::CUnitWeights::SINGLE_UNIT); maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(modelWeights).priorWeights(modelWeights); model.addSamples(params, samples); model.addBucketValue({core::make_triple(core_t::TTime{20}, TDouble2Vec{-1.0}, TAG)}); @@ -550,27 +514,30 @@ void CTimeSeriesModelTest::testAddSamples() { core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1}, TAG)}; - TDouble2Vec4VecVec weights{{{1.0}}, {{1.5}}, {{0.9}}}; + TDoubleVec weights{1.0, 1.5, 0.9}; + TDouble2VecWeightsAryVec modelWeights{ + maths_t::countWeight(TDouble2Vec{weights[0]}), + maths_t::countWeight(TDouble2Vec{weights[1]}), + maths_t::countWeight(TDouble2Vec{weights[2]})}; maths::CModelAddSamplesParams params; params.integer(false) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + .trendWeights(modelWeights) + .priorWeights(modelWeights); model.addSamples(params, samples); trend.addPoint(samples[1].first, samples[1].second[0], - maths::CConstantWeights::COUNT, weights[1][0]); + maths_t::countWeight(weights[1])); trend.addPoint(samples[2].first, samples[2].second[0], - maths::CConstantWeights::COUNT, weights[2][0]); + maths_t::countWeight(weights[2])); trend.addPoint(samples[0].first, samples[0].second[0], - maths::CConstantWeights::COUNT, weights[0][0]); - TDouble1Vec samples_{samples[2].second[0], samples[0].second[0], - samples[1].second[0]}; - TDouble4Vec1Vec weights_{weights[2][0], weights[0][0], weights[1][0]}; - prior.addSamples(maths::CConstantWeights::COUNT, samples_, weights_); + maths_t::countWeight(weights[0])); + prior.addSamples( + {samples[2].second[0], samples[0].second[0], samples[1].second[0]}, + {maths_t::countWeight(weights[2]), maths_t::countWeight(weights[0]), + maths_t::countWeight(weights[1])}); prior.propagateForwardsByTime(1.0); uint64_t checksum1{trend.checksum()}; @@ -596,32 +563,35 @@ void CTimeSeriesModelTest::testAddSamples() { core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5, 3.4, 3.3}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9, 3.8, 3.7}, TAG), core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1, 2.0, 1.9}, TAG)}; - TDouble2Vec4VecVec weights{{{1.0, 1.1, 1.2}}, {{1.5, 1.6, 1.7}}, {{0.9, 1.0, 1.1}}}; + double weights[][3]{{1.0, 1.1, 1.2}, {1.5, 1.6, 1.7}, {0.9, 1.0, 1.1}}; + TDouble2VecWeightsAryVec modelWeights{ + maths_t::countWeight(TDouble2Vec(weights[0], weights[0] + 3)), + maths_t::countWeight(TDouble2Vec(weights[1], weights[1] + 3)), + maths_t::countWeight(TDouble2Vec(weights[2], weights[2] + 3))}; maths::CModelAddSamplesParams params; params.integer(false) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + .trendWeights(modelWeights) + .priorWeights(modelWeights); model.addSamples(params, samples); for (std::size_t i = 0u; i < trends.size(); ++i) { trends[i]->addPoint(samples[1].first, samples[1].second[i], - maths::CConstantWeights::COUNT, - TDouble4Vec{weights[1][0][i]}); + maths_t::countWeight(weights[0][i])); trends[i]->addPoint(samples[2].first, samples[2].second[i], - maths::CConstantWeights::COUNT, - TDouble4Vec{weights[2][0][i]}); + maths_t::countWeight(weights[1][i])); trends[i]->addPoint(samples[0].first, samples[0].second[i], - maths::CConstantWeights::COUNT, - TDouble4Vec{weights[0][0][i]}); + maths_t::countWeight(weights[2][i])); } TDouble10Vec1Vec samples_{samples[2].second, samples[0].second, samples[1].second}; - TDouble10Vec4Vec1Vec weights_{{weights[2][0]}, {weights[0][0]}, {weights[1][0]}}; - prior.addSamples(maths::CConstantWeights::COUNT, samples_, weights_); + TDouble10VecWeightsAry1Vec weights_{ + maths_t::countWeight(TDouble10Vec(weights[2], weights[2] + 3)), + maths_t::countWeight(TDouble10Vec(weights[0], weights[0] + 3)), + maths_t::countWeight(TDouble10Vec(weights[1], weights[1] + 3))}; + prior.addSamples(samples_, weights_); prior.propagateForwardsByTime(1.0); for (std::size_t i = 0u; i < trends.size(); ++i) { @@ -638,16 +608,16 @@ void CTimeSeriesModelTest::testAddSamples() { LOG_DEBUG(<< "Propagation interval univariate"); { - maths_t::TWeightStyleVec weightStyles{maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight, - maths_t::E_SampleCountVarianceScaleWeight}; maths::CTimeSeriesDecompositionStub trend; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; maths::CUnivariateTimeSeriesModel model{params(bucketLength), 0, trend, prior}; double interval[]{1.0, 1.1, 0.4}; TDouble2Vec samples[]{{10.0}, {13.9}, {27.1}}; - TDouble2Vec4VecVec weights{{{0.9}, {1.5}, {1.1}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; + maths_t::setCount(TDouble2Vec{1.5}, weights[0]); + maths_t::setWinsorisationWeight(TDouble2Vec{0.9}, weights[0]); + maths_t::setCountVarianceScale(TDouble2Vec{1.1}, weights[0]); core_t::TTime time{0}; for (std::size_t i = 0u; i < 3; ++i) { @@ -655,13 +625,15 @@ void CTimeSeriesModelTest::testAddSamples() { maths::CModelAddSamplesParams params; params.integer(false) .propagationInterval(interval[i]) - .weightStyles(weightStyles) .trendWeights(weights) .priorWeights(weights); model.addSamples(params, sample); - TDouble4Vec weight{weights[0][0][0], weights[0][1][0], weights[0][2][0]}; - prior.addSamples(weightStyles, samples[i], {weight}); + TDoubleWeightsAry1Vec weight{maths_t::CUnitWeights::UNIT}; + for (std::size_t j = 0u; j < weights[0].size(); ++j) { + weight[0][j] = weights[0][j][0]; + } + prior.addSamples(TDouble1Vec(samples[i]), weight); prior.propagateForwardsByTime(interval[i]); uint64_t checksum1{prior.checksum()}; @@ -682,15 +654,12 @@ void CTimeSeriesModelTest::testAddSamples() { maths::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; maths::CMultivariateTimeSeriesModel model{params(bucketLength), *trends[0], prior}; - maths_t::TWeightStyleVec weightStyles{maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountWeight, - maths_t::E_SampleCountVarianceScaleWeight}; double interval[]{1.0, 1.1, 0.4}; TDouble2Vec samples[]{{13.5, 13.4, 13.3}, {13.9, 13.8, 13.7}, {20.1, 20.0, 10.9}}; - TDouble2Vec4VecVec weights{ - {{0.1, 0.1, 0.2}, {1.0, 1.1, 1.2}, {2.0, 2.1, 2.2}}, - {{0.5, 0.6, 0.7}, {2.0, 2.1, 2.2}, {1.0, 1.1, 1.2}}, - {{0.9, 1.0, 1.0}, {0.9, 1.0, 1.0}, {1.9, 2.0, 2.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; + maths_t::setCount(TDouble2Vec{1.0, 1.1, 1.2}, weights[0]); + maths_t::setWinsorisationWeight(TDouble2Vec{0.1, 0.1, 0.2}, weights[0]); + maths_t::setCountVarianceScale(TDouble2Vec{2.0, 2.1, 2.2}, weights[0]); core_t::TTime time{0}; for (std::size_t i = 0u; i < 3; ++i) { @@ -698,15 +667,15 @@ void CTimeSeriesModelTest::testAddSamples() { maths::CModelAddSamplesParams params; params.integer(false) .propagationInterval(interval[i]) - .weightStyles(weightStyles) .trendWeights(weights) .priorWeights(weights); model.addSamples(params, sample); - TDouble10Vec4Vec weight{TDouble10Vec(weights[0][0]), - TDouble10Vec(weights[0][1]), - TDouble10Vec(weights[0][2])}; - prior.addSamples(weightStyles, {TDouble10Vec(samples[i])}, {weight}); + TDouble10VecWeightsAry1Vec weight{maths_t::CUnitWeights::unit(3)}; + for (std::size_t j = 0u; j < weights[0].size(); ++j) { + weight[0][j] = weights[0][j]; + } + prior.addSamples({TDouble10Vec(samples[i])}, weight); prior.propagateForwardsByTime(interval[i]); uint64_t checksum1{prior.checksum()}; @@ -720,7 +689,6 @@ void CTimeSeriesModelTest::testAddSamples() { LOG_DEBUG(<< "Decay rate control univariate"); { - double learnRate{params(bucketLength).learnRate()}; maths::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; auto controllers = decayRateControllers(1); @@ -730,8 +698,7 @@ void CTimeSeriesModelTest::testAddSamples() { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 2000, samples); - TDouble4Vec1Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto noise : samples) { @@ -743,27 +710,22 @@ void CTimeSeriesModelTest::testAddSamples() { TTimeDouble2VecSizeTrVec sample_{ core::make_triple(time, TDouble2Vec{sample}, TAG)}; maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, sample_); if (trend.addPoint(time, sample)) { trend.decayRate(trend.decayRate() / controllers[0].multiplier()); prior.setToNonInformative(0.0, prior.decayRate()); for (const auto& value : model.slidingWindow()) { - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(value.first, value.second, 0.0)}, - {{learnRate}}); + prior.addSamples({trend.detrend(value.first, value.second, 0.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); } prior.decayRate(prior.decayRate() / controllers[1].multiplier()); controllers[0].reset(); controllers[1].reset(); } double detrended{trend.detrend(time, sample, 0.0)}; - prior.addSamples(maths::CConstantWeights::COUNT, {detrended}, weight); + prior.addSamples({detrended}, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); if (trend.initialized()) { @@ -810,8 +772,7 @@ void CTimeSeriesModelTest::testAddSamples() { rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); } - TDouble10Vec4Vec1Vec weight{{{1.0, 1.0, 1.0}}}; - TDouble2Vec4VecVec weights{{{1.0, 1.0, 1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (auto& sample : samples) { @@ -838,7 +799,6 @@ void CTimeSeriesModelTest::testAddSamples() { maths::CModelAddSamplesParams params_; params_.integer(false) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); model.addSamples(params_, sample_); @@ -846,7 +806,8 @@ void CTimeSeriesModelTest::testAddSamples() { if (reinitialize) { reinitializePrior(learnRate, model, trends, prior, &controllers); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended, weight); + prior.addSamples(detrended, + maths_t::CUnitWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); if (hasTrend) { @@ -895,7 +856,6 @@ void CTimeSeriesModelTest::testPredict() { LOG_DEBUG(<< "Univariate seasonal"); { - double learnRate{params(bucketLength).learnRate()}; maths::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::CNormalMeanPrecConjugate prior{univariateNormal()}; auto controllers = decayRateControllers(1); @@ -904,31 +864,25 @@ void CTimeSeriesModelTest::testPredict() { TDoubleVec samples; rng.generateNormalSamples(0.0, 4.0, 1008, samples); - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0); maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); if (trend.addPoint(time, sample)) { prior.setToNonInformative(0.0, DECAY_RATE); for (const auto& value : model.slidingWindow()) { - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(value.first, value.second, 0.0)}, - {{learnRate}}); + prior.addSamples({trend.detrend(value.first, value.second, 0.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); } } - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(time, sample, 0.0)}, - maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples({trend.detrend(time, sample, 0.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; @@ -968,15 +922,11 @@ void CTimeSeriesModelTest::testPredict() { samples.insert(samples.end(), samples_.begin(), samples_.end()); rng.random_shuffle(samples.begin(), samples.end()); - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(1)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } @@ -1015,7 +965,7 @@ void CTimeSeriesModelTest::testPredict() { rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); } - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (auto& sample : samples) { for (auto& coordinate : sample) { @@ -1031,16 +981,12 @@ void CTimeSeriesModelTest::testPredict() { if (reinitialize) { reinitializePrior(learnRate, model, trends, prior); } - prior.addSamples(maths::CConstantWeights::COUNT, {detrended}, - maths::CConstantWeights::singleUnit(3)); + prior.addSamples({detrended}, + maths_t::CUnitWeights::singleUnit(3)); prior.propagateForwardsByTime(1.0); maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; @@ -1100,15 +1046,11 @@ void CTimeSeriesModelTest::testPredict() { rng.random_shuffle(samples.begin(), samples.end()); } - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } @@ -1162,24 +1104,26 @@ void CTimeSeriesModelTest::testProbability() { rng.generateNormalSamples(10.0, 4.0, 1000, samples); core_t::TTime time{0}; - const TDouble2Vec4VecVec weight{maths::CConstantWeights::unit(1)}; - for (auto sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weight) - .priorWeights(weight); + { + const TDouble2VecWeightsAryVec weight{ + maths_t::CUnitWeights::unit(1)}; + for (auto sample : samples) { + maths::CModelAddSamplesParams params; + params.integer(false) + .propagationInterval(1.0) + .trendWeights(weight) + .priorWeights(weight); - double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * - static_cast(time) / 86400.0)}; + double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0)}; - models[0].addSamples( - params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); - models[1].addSamples( - params, {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); + models[0].addSamples( + params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); + models[1].addSamples( + params, {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); - time += bucketLength; + time += bucketLength; + } } TTime2Vec1Vec time_{{time}}; @@ -1189,11 +1133,10 @@ void CTimeSeriesModelTest::testProbability() { maths_t::E_OneSidedAbove}; double confidences[]{0.0, 20.0, 50.0}; bool empties[]{true, false}; - maths_t::TWeightStyleVec weightStyles[]{ - {maths_t::E_SampleCountVarianceScaleWeight}, - {maths_t::E_SampleCountVarianceScaleWeight, - maths_t::E_SampleSeasonalVarianceScaleWeight}}; - TDouble2Vec4Vec weights[]{{{0.9}}, {{1.1}, {1.8}}}; + TDouble2VecWeightsAryVec weights(2, maths_t::CUnitWeights::unit(1)); + maths_t::setCountVarianceScale(TDouble2Vec{0.9}, weights[0]); + maths_t::setCountVarianceScale(TDouble2Vec{1.1}, weights[1]); + maths_t::setSeasonalVarianceScale(TDouble2Vec{1.8}, weights[1]); for (auto calculation : calculations) { LOG_DEBUG(<< "calculation = " << calculation); @@ -1201,24 +1144,24 @@ void CTimeSeriesModelTest::testProbability() { LOG_DEBUG(<< " confidence = " << confidence); for (auto empty : empties) { LOG_DEBUG(<< " empty = " << empty); - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (const auto& weight : weights) { LOG_DEBUG(<< " weights = " - << core::CContainerPrinter::print(weights[i])); + << core::CContainerPrinter::print(weight)); double expectedProbability[2]; maths_t::ETail expectedTail[2]; { - TDouble4Vec weights_; - for (const auto& weight_ : weights[i]) { - weights_.push_back(weight_[0]); + maths_t::TDoubleWeightsAry weight_(maths_t::CUnitWeights::UNIT); + for (std::size_t i = 0u; i < weight.size(); ++i) { + weight_[i] = weight[i][0]; } double lb[2], ub[2]; models[0].prior().probabilityOfLessLikelySamples( - calculation, weightStyles[i], sample, - {weights_}, lb[0], ub[0], expectedTail[0]); + calculation, sample, {weight_}, lb[0], ub[0], + expectedTail[0]); models[1].prior().probabilityOfLessLikelySamples( - calculation, weightStyles[i], + calculation, {models[1].trend().detrend(time, sample[0], confidence)}, - {weights_}, lb[1], ub[1], expectedTail[1]); + {weight_}, lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } @@ -1230,8 +1173,7 @@ void CTimeSeriesModelTest::testProbability() { params.addCalculation(calculation) .seasonalConfidenceInterval(confidence) .addBucketEmpty({empty}) - .weightStyles(weightStyles[i]) - .addWeights(weights[i]); + .addWeights(weight); bool conditional; TSize1Vec mostAnomalousCorrelate; models[0].probability(params, time_, {sample}, @@ -1271,27 +1213,28 @@ void CTimeSeriesModelTest::testProbability() { } core_t::TTime time{0}; - const TDouble2Vec4VecVec weight{maths::CConstantWeights::unit(3)}; - for (auto& sample : samples) { - maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weight) - .priorWeights(weight); + { + TDouble2VecWeightsAryVec weight{maths_t::CUnitWeights::unit(3)}; + for (auto& sample : samples) { + maths::CModelAddSamplesParams params; + params.integer(false) + .propagationInterval(1.0) + .trendWeights(weight) + .priorWeights(weight); - TDouble2Vec sample_(sample); - models[0].addSamples(params, {core::make_triple(time, sample_, TAG)}); + TDouble2Vec sample_(sample); + models[0].addSamples(params, {core::make_triple(time, sample_, TAG)}); - double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * - static_cast(time) / 86400.0)}; - for (auto& component : sample_) { - component += trend; - } + double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * + static_cast(time) / 86400.0)}; + for (auto& component : sample_) { + component += trend; + } - models[1].addSamples(params, {core::make_triple(time, sample_, TAG)}); + models[1].addSamples(params, {core::make_triple(time, sample_, TAG)}); - time += bucketLength; + time += bucketLength; + } } TTime2Vec1Vec time_{{time}}; @@ -1301,11 +1244,10 @@ void CTimeSeriesModelTest::testProbability() { maths_t::E_OneSidedAbove}; double confidences[]{0.0, 20.0, 50.0}; bool empties[]{true, false}; - maths_t::TWeightStyleVec weightStyles[]{ - {maths_t::E_SampleCountVarianceScaleWeight}, - {maths_t::E_SampleCountVarianceScaleWeight, - maths_t::E_SampleSeasonalVarianceScaleWeight}}; - TDouble2Vec4Vec weights[]{{{0.9, 0.9, 0.8}}, {{1.1, 1.0, 1.2}, {1.8, 1.7, 1.6}}}; + TDouble2VecWeightsAryVec weights(2, maths_t::CUnitWeights::unit(3)); + maths_t::setCountVarianceScale(TDouble2Vec{0.9, 0.9, 0.8}, weights[0]); + maths_t::setCountVarianceScale(TDouble2Vec{1.1, 1.0, 1.2}, weights[1]); + maths_t::setSeasonalVarianceScale(TDouble2Vec{1.8, 1.7, 1.6}, weights[1]); for (auto calculation : calculations) { LOG_DEBUG(<< "calculation = " << calculation); @@ -1313,28 +1255,29 @@ void CTimeSeriesModelTest::testProbability() { LOG_DEBUG(<< " confidence = " << confidence); for (auto empty : empties) { LOG_DEBUG(<< " empty = " << empty); - for (std::size_t i = 0u; i < boost::size(weightStyles); ++i) { + for (const auto& weight : weights) { LOG_DEBUG(<< " weights = " - << core::CContainerPrinter::print(weights[i])); + << core::CContainerPrinter::print(weight)); double expectedProbability[2]; TTail10Vec expectedTail[2]; { - TDouble10Vec4Vec weights_; - for (const auto& weight_ : weights[i]) { - weights_.push_back(weight_); + maths_t::TDouble10VecWeightsAry weight_( + maths_t::CUnitWeights::unit(3)); + for (std::size_t i = 0u; i < weight.size(); ++i) { + weight_[i] = weight[i]; } double lb[2], ub[2]; models[0].prior().probabilityOfLessLikelySamples( - calculation, weightStyles[i], {TDouble10Vec(sample)}, - {weights_}, lb[0], ub[0], expectedTail[0]); + calculation, {TDouble10Vec(sample)}, {weight_}, + lb[0], ub[0], expectedTail[0]); TDouble10Vec detrended; for (std::size_t j = 0u; j < sample.size(); ++j) { detrended.push_back(models[1].trend()[j]->detrend( time, sample[j], confidence)); } models[1].prior().probabilityOfLessLikelySamples( - calculation, weightStyles[i], {detrended}, - {weights_}, lb[1], ub[1], expectedTail[1]); + calculation, {detrended}, {weight_}, lb[1], + ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } @@ -1346,8 +1289,7 @@ void CTimeSeriesModelTest::testProbability() { params.addCalculation(calculation) .seasonalConfidenceInterval(confidence) .addBucketEmpty({empty}) - .weightStyles(weightStyles[i]) - .addWeights(weights[i]); + .addWeights(weight); bool conditional; TSize1Vec mostAnomalousCorrelate; models[0].probability(params, time_, {sample}, @@ -1384,8 +1326,7 @@ void CTimeSeriesModelTest::testProbability() { maths::CBasicStatistics::COrderStatisticsHeap smallest(10); - TDouble2Vec4Vec weight(maths::CConstantWeights::unit(1)); - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; std::size_t bucket{0}; core_t::TTime time{0}; for (auto& sample : samples) { @@ -1396,7 +1337,6 @@ void CTimeSeriesModelTest::testProbability() { maths::CModelAddSamplesParams params; params.integer(false) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); model.addSamples( @@ -1407,8 +1347,7 @@ void CTimeSeriesModelTest::testProbability() { params.addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(50.0) .addBucketEmpty({false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); + .addWeights(weights[0]); TTail2Vec tail; double probability; bool conditional; @@ -1449,7 +1388,7 @@ void CTimeSeriesModelTest::testWeights() { TDoubleVec samples; rng.generateNormalSamples(0.0, 4.0, 1008, samples); - TDouble2Vec4VecVec weights{{{1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * @@ -1457,24 +1396,18 @@ void CTimeSeriesModelTest::testWeights() { sample = scale * (1.0 + 0.1 * sample); maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); if (trend.addPoint(time, sample)) { prior.setToNonInformative(0.0, DECAY_RATE); for (const auto& value : model.slidingWindow()) { - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(value.first, value.second, 0.0)}, - maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples({trend.detrend(value.first, value.second, 0.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); } } - prior.addSamples(maths::CConstantWeights::COUNT, - {trend.detrend(time, sample, 0.0)}, - maths::CConstantWeights::SINGLE_UNIT); + prior.addSamples({trend.detrend(time, sample, 0.0)}, + maths_t::CUnitWeights::SINGLE_UNIT); time += bucketLength; } @@ -1529,8 +1462,7 @@ void CTimeSeriesModelTest::testWeights() { rng.generateMultivariateNormalSamples(mean, covariance, 1008, samples); } - TDouble10Vec4Vec1Vec weight{{{1.0, 1.0, 1.0}}}; - TDouble2Vec4VecVec weights{{{1.0, 1.0, 1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (auto& sample : samples) { double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * @@ -1546,14 +1478,11 @@ void CTimeSeriesModelTest::testWeights() { if (reinitialize) { reinitializePrior(learnRate, model, trends, prior); } - prior.addSamples(maths::CConstantWeights::COUNT, detrended, weight); + prior.addSamples(detrended, + maths_t::CUnitWeights::singleUnit(3)); maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples(params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; @@ -1615,16 +1544,11 @@ void CTimeSeriesModelTest::testMemoryUsage() { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - TDouble2Vec4Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0); trend.addPoint(time, sample); @@ -1655,15 +1579,11 @@ void CTimeSeriesModelTest::testMemoryUsage() { std::unique_ptr model{new maths::CMultivariateTimeSeriesModel{ params(bucketLength), trend, prior, &controllers}}; - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (auto& sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); for (auto& coordinate : sample) { coordinate += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / 86400.0); @@ -1707,16 +1627,11 @@ void CTimeSeriesModelTest::testPersist() { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); - TDouble2Vec4Vec weight{{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); origModel.addSamples( params, {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; @@ -1760,15 +1675,11 @@ void CTimeSeriesModelTest::testPersist() { maths::CMultivariateTimeSeriesModel origModel{params(bucketLength), trend, prior, &controllers}; - TDouble2Vec4VecVec weights{maths::CConstantWeights::unit(3)}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); origModel.addSamples( params, {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; @@ -1855,8 +1766,7 @@ void CTimeSeriesModelTest::testUpgrade() { core::CStringUtils::tokenise(",", expectedIntervals[i], expectedInterval, empty); std::string interval_{core::CContainerPrinter::print(restoredModel.confidenceInterval( - time, 90.0, maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(1)))}; + time, 90.0, maths_t::CUnitWeights::unit(1)))}; core::CStringUtils::replace("[", "", interval_); core::CStringUtils::replace("]", "", interval_); core::CStringUtils::replace(" ", "", interval_); @@ -1906,8 +1816,7 @@ void CTimeSeriesModelTest::testUpgrade() { core::CStringUtils::tokenise(",", expectedIntervals[i], expectedInterval, empty); std::string interval_{core::CContainerPrinter::print(restoredModel.confidenceInterval( - time, 90.0, maths::CConstantWeights::COUNT, - maths::CConstantWeights::unit(3)))}; + time, 90.0, maths_t::CUnitWeights::unit(3)))}; core::CStringUtils::replace("[", "", interval_); core::CStringUtils::replace("]", "", interval_); core::CStringUtils::replace(" ", "", interval_); @@ -1951,21 +1860,17 @@ void CTimeSeriesModelTest::testAddSamplesWithCorrelations() { models[1].modelCorrelations(correlations); CTimeSeriesCorrelateModelAllocator allocator; - TDouble2Vec4VecVec weights{{{1.0}}}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; core_t::TTime time{0}; for (auto sample : samples) { correlations.refresh(allocator); maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); models[0].addSamples( params, {core::make_triple(time, TDouble2Vec{sample[0]}, TAG)}); models[1].addSamples( params, {core::make_triple(time, TDouble2Vec{sample[1]}, TAG)}); - correlations.processSamples(maths::CConstantWeights::COUNT); + correlations.processSamples(); time += bucketLength; } } @@ -2010,8 +1915,7 @@ void CTimeSeriesModelTest::testAnomalyModel() { //TDoubleVec scores; maths::CBasicStatistics::COrderStatisticsHeap mostAnomalous(10); - TDouble2Vec4Vec weight(maths::CConstantWeights::unit(1)); - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; std::size_t bucket{0}; core_t::TTime time{0}; for (auto& sample : samples) { @@ -2025,7 +1929,6 @@ void CTimeSeriesModelTest::testAnomalyModel() { maths::CModelAddSamplesParams params; params.integer(false) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); model.addSamples( @@ -2036,8 +1939,7 @@ void CTimeSeriesModelTest::testAnomalyModel() { params.addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(50.0) .addBucketEmpty({false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); + .addWeights(weights[0]); TTail2Vec tail; double probability; bool conditional; @@ -2098,8 +2000,7 @@ void CTimeSeriesModelTest::testAnomalyModel() { //TDoubleVec scores; maths::CBasicStatistics::COrderStatisticsHeap mostAnomalous(10); - TDouble2Vec4Vec weight(maths::CConstantWeights::unit(3)); - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(3)}; core_t::TTime time{0}; std::size_t bucket{0}; for (auto& sample : samples) { @@ -2116,7 +2017,6 @@ void CTimeSeriesModelTest::testAnomalyModel() { maths::CModelAddSamplesParams params; params.integer(false) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); model.addSamples( @@ -2127,8 +2027,7 @@ void CTimeSeriesModelTest::testAnomalyModel() { params.addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(50.0) .addBucketEmpty({false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); + .addWeights(weights[0]); TTail2Vec tail; double probability; bool conditional; diff --git a/lib/maths/unittest/CToolsTest.cc b/lib/maths/unittest/CToolsTest.cc index e259df5aee..0ed3098ed9 100644 --- a/lib/maths/unittest/CToolsTest.cc +++ b/lib/maths/unittest/CToolsTest.cc @@ -1,3 +1,4 @@ + /* * ELASTICSEARCH CONFIDENTIAL * @@ -1095,8 +1096,7 @@ void CToolsTest::testSpread() { double raw[] = {150.0, 170.0, 4500.0, 4650.0, 4700.0, 4800.0, 73000.0, 73150.0, 73500.0, 73600.0, 73800.0, 74000.0}; double separation = 126.0; - std::string expected = "[97, 223, 4473.5, 4599.5, 4725.5, 4851.5, " - "73000, 73150, 73487, 73613, 73800, 74000]"; + std::string expected = "[97, 223, 4473.5, 4599.5, 4725.5, 4851.5, 73000, 73150, 73487, 73613, 73800, 74000]"; TDoubleVec points(boost::begin(raw), boost::end(raw)); CTools::spread(0.0, period, separation, points); LOG_DEBUG(<< "spread = " << core::CContainerPrinter::print(points)); diff --git a/lib/maths/unittest/TestUtils.cc b/lib/maths/unittest/TestUtils.cc index aabf2c914d..acf6a3e4f2 100644 --- a/lib/maths/unittest/TestUtils.cc +++ b/lib/maths/unittest/TestUtils.cc @@ -46,8 +46,8 @@ class CCdf : public std::unary_function { double lowerBound, upperBound; m_X[0] = x; - if (!m_Prior->minusLogJointCdf(CConstantWeights::COUNT_VARIANCE, m_X, - CConstantWeights::SINGLE_UNIT, lowerBound, upperBound)) { + if (!m_Prior->minusLogJointCdf(m_X, maths_t::CUnitWeights::SINGLE_UNIT, + lowerBound, upperBound)) { // We have no choice but to throw because this is // invoked inside a boost root finding function. @@ -101,71 +101,51 @@ CPriorTestInterface::CPriorTestInterface(CPrior& prior) : m_Prior(&prior) { } void CPriorTestInterface::addSamples(const TDouble1Vec& samples) { - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - m_Prior->addSamples(TWeights::COUNT, samples, weights); + maths_t::TDoubleWeightsAry1Vec weights(samples.size(), TWeights::UNIT); + m_Prior->addSamples(samples, weights); } maths_t::EFloatingPointErrorStatus CPriorTestInterface::jointLogMarginalLikelihood(const TDouble1Vec& samples, double& result) const { - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - return m_Prior->jointLogMarginalLikelihood(TWeights::COUNT, samples, weights, result); + maths_t::TDoubleWeightsAry1Vec weights(samples.size(), TWeights::UNIT); + return m_Prior->jointLogMarginalLikelihood(samples, weights, result); } bool CPriorTestInterface::minusLogJointCdf(const TDouble1Vec& samples, double& lowerBound, double& upperBound) const { - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - return m_Prior->minusLogJointCdf(TWeights::COUNT, samples, weights, lowerBound, upperBound); + maths_t::TDoubleWeightsAry1Vec weights(samples.size(), TWeights::UNIT); + return m_Prior->minusLogJointCdf(samples, weights, lowerBound, upperBound); } bool CPriorTestInterface::minusLogJointCdfComplement(const TDouble1Vec& samples, double& lowerBound, double& upperBound) const { - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - return m_Prior->minusLogJointCdfComplement(TWeights::COUNT, samples, - weights, lowerBound, upperBound); + maths_t::TDoubleWeightsAry1Vec weights(samples.size(), TWeights::UNIT); + return m_Prior->minusLogJointCdfComplement(samples, weights, lowerBound, upperBound); } bool CPriorTestInterface::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, const TDouble1Vec& samples, double& lowerBound, double& upperBound) const { - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); + maths_t::TDoubleWeightsAry1Vec weights(samples.size(), TWeights::UNIT); maths_t::ETail tail; - return m_Prior->probabilityOfLessLikelySamples( - calculation, TWeights::COUNT, samples, weights, lowerBound, upperBound, tail); + return m_Prior->probabilityOfLessLikelySamples(calculation, samples, weights, + lowerBound, upperBound, tail); } bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculation, const TDouble1Vec& samples, double& result) const { - TDoubleDoublePr1Vec weightedSamples; - weightedSamples.reserve(samples.size()); - for (std::size_t i = 0u; i < samples.size(); ++i) { - weightedSamples.push_back(std::make_pair(samples[i], 1.0)); - } - return this->anomalyScore(calculation, maths_t::E_SampleCountWeight, - weightedSamples, result); -} -bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculation, - maths_t::ESampleWeightStyle weightStyle, - const TDoubleDoublePr1Vec& samples, - double& result) const { result = 0.0; - TWeightStyleVec weightStyles(1, weightStyle); - TDouble1Vec samples_(samples.size()); - TDouble4Vec1Vec weights(samples.size(), TWeights::UNIT); - for (std::size_t i = 0u; i < samples.size(); ++i) { - samples_[i] = samples[i].first; - weights[i][0] = samples[i].second; - } - double lowerBound, upperBound; maths_t::ETail tail; - if (!m_Prior->probabilityOfLessLikelySamples(calculation, weightStyles, samples_, weights, + if (!m_Prior->probabilityOfLessLikelySamples(calculation, samples, + maths_t::CUnitWeights::SINGLE_UNIT, lowerBound, upperBound, tail)) { LOG_ERROR(<< "Failed computing probability of less likely samples"); return false; @@ -179,6 +159,7 @@ bool CPriorTestInterface::anomalyScore(maths_t::EProbabilityCalculation calculat bool CPriorTestInterface::marginalLikelihoodQuantileForTest(double percentage, double eps, double& result) const { + result = 0.0; percentage /= 100.0; @@ -215,6 +196,7 @@ bool CPriorTestInterface::marginalLikelihoodQuantileForTest(double percentage, } bool CPriorTestInterface::marginalLikelihoodMeanForTest(double& result) const { + using TMarginalLikelihood = CCompositeFunctions::CExp; using TFunctionTimesMarginalLikelihood = @@ -258,6 +240,7 @@ bool CPriorTestInterface::marginalLikelihoodMeanForTest(double& result) const { } bool CPriorTestInterface::marginalLikelihoodVarianceForTest(double& result) const { + using TMarginalLikelihood = CCompositeFunctions::CExp; using TResidualTimesMarginalLikelihood = diff --git a/lib/maths/unittest/TestUtils.h b/lib/maths/unittest/TestUtils.h index 818925ae51..eee15ad041 100644 --- a/lib/maths/unittest/TestUtils.h +++ b/lib/maths/unittest/TestUtils.h @@ -30,13 +30,9 @@ namespace ml { namespace handy_typedefs { using TDouble1Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; using TDouble10Vec1Vec = core::CSmallVector; -using TDouble10Vec4Vec = core::CSmallVector; using TDouble10Vec10Vec = core::CSmallVector; -using TDouble10Vec4Vec1Vec = core::CSmallVector; using TVector2 = maths::CVectorNx1; using TVector2Vec = std::vector; using TVector2VecVec = std::vector; @@ -56,8 +52,7 @@ class CPriorTestInterface { public: using TDoubleDoublePr = std::pair; using TDoubleDoublePr1Vec = core::CSmallVector; - using TWeightStyleVec = maths_t::TWeightStyleVec; - using TWeights = maths::CConstantWeights; + using TWeights = maths_t::CUnitWeights; public: explicit CPriorTestInterface(maths::CPrior& prior); @@ -218,14 +213,13 @@ class CUnitKernel { bool operator()(const maths::CVectorNx1& x, double& result) const { m_X[0].assign(x.begin(), x.end()); - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, - SINGLE_UNIT, result); + m_Prior->jointLogMarginalLikelihood(m_X, SINGLE_UNIT, result); result = std::exp(result); return true; } private: - static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; + static ml::maths_t::TDouble10VecWeightsAry1Vec SINGLE_UNIT; private: const maths::CMultivariatePrior* m_Prior; @@ -233,9 +227,8 @@ class CUnitKernel { }; template -handy_typedefs::TDouble10Vec4Vec1Vec CUnitKernel::SINGLE_UNIT( - 1, - handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +ml::maths_t::TDouble10VecWeightsAry1Vec CUnitKernel::SINGLE_UNIT{ + ml::maths_t::CUnitWeights::unit(N)}; //! \brief The kernel for computing the mean of a multivariate prior. template @@ -248,15 +241,14 @@ class CMeanKernel { maths::CVectorNx1& result) const { m_X[0].assign(x.begin(), x.end()); double likelihood; - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, - SINGLE_UNIT, likelihood); + m_Prior->jointLogMarginalLikelihood(m_X, SINGLE_UNIT, likelihood); likelihood = std::exp(likelihood); result = x * likelihood; return true; } private: - static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; + static ml::maths_t::TDouble10VecWeightsAry1Vec SINGLE_UNIT; private: const maths::CMultivariatePrior* m_Prior; @@ -264,9 +256,8 @@ class CMeanKernel { }; template -handy_typedefs::TDouble10Vec4Vec1Vec CMeanKernel::SINGLE_UNIT( - 1, - handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +ml::maths_t::TDouble10VecWeightsAry1Vec CMeanKernel::SINGLE_UNIT{ + ml::maths_t::CUnitWeights::unit(N)}; //! \brief The kernel for computing the variance of a multivariate prior. template @@ -280,15 +271,14 @@ class CCovarianceKernel { maths::CSymmetricMatrixNxN& result) const { m_X[0].assign(x.begin(), x.end()); double likelihood; - m_Prior->jointLogMarginalLikelihood(maths::CConstantWeights::COUNT, m_X, - SINGLE_UNIT, likelihood); + m_Prior->jointLogMarginalLikelihood(m_X, SINGLE_UNIT, likelihood); likelihood = std::exp(likelihood); result = (x - m_Mean).outer() * likelihood; return true; } private: - static handy_typedefs::TDouble10Vec4Vec1Vec SINGLE_UNIT; + static ml::maths_t::TDouble10VecWeightsAry1Vec SINGLE_UNIT; private: const maths::CMultivariatePrior* m_Prior; @@ -297,9 +287,8 @@ class CCovarianceKernel { }; template -handy_typedefs::TDouble10Vec4Vec1Vec CCovarianceKernel::SINGLE_UNIT( - 1, - handy_typedefs::TDouble10Vec4Vec(1, handy_typedefs::TDouble10Vec(N, 1.0))); +ml::maths_t::TDouble10VecWeightsAry1Vec CCovarianceKernel::SINGLE_UNIT{ + ml::maths_t::CUnitWeights::unit(N)}; //! A constant function. double constant(core_t::TTime time); diff --git a/lib/model/CEventRateModel.cc b/lib/model/CEventRateModel.cc index 8b6002956f..542bb9a3da 100644 --- a/lib/model/CEventRateModel.cc +++ b/lib/model/CEventRateModel.cc @@ -59,18 +59,11 @@ namespace { using TDouble2Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TBool2Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; // We use short field names to reduce the state size const std::string INDIVIDUAL_STATE_TAG("a"); const std::string PROBABILITY_PRIOR_TAG("b"); - -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; -const maths_t::TWeightStyleVec - PROBABILITY_WEIGHT_STYLES(1, maths_t::E_SampleSeasonalVarianceScaleWeight); } CEventRateModel::CEventRateModel(const SModelParams& params, @@ -256,7 +249,7 @@ void CEventRateModel::sample(core_t::TTime startTime, // Declared outside the loop to minimize the number of times they are created. maths::CModel::TTimeDouble2VecSizeTrVec values; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights(1); + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec weights; for (auto& featureData : m_CurrentBucketStats.s_FeatureData) { model_t::EFeature feature = featureData.first; @@ -269,10 +262,8 @@ void CEventRateModel::sample(core_t::TTime startTime, for (const auto& data_ : data) { if (data_.second.s_Count > 0) { LOG_TRACE(<< "person = " << this->personName(data_.first)); - m_ProbabilityPrior.addSamples( - maths::CConstantWeights::COUNT, - TDouble1Vec{static_cast(data_.first)}, - maths::CConstantWeights::SINGLE_UNIT); + m_ProbabilityPrior.addSamples({static_cast(data_.first)}, + maths_t::CUnitWeights::SINGLE_UNIT); } } if (!data.empty()) { @@ -313,6 +304,7 @@ void CEventRateModel::sample(core_t::TTime startTime, double interval = (1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * derate) * emptyBucketWeight; + double ceff = emptyBucketWeight * this->learnRate(feature); LOG_TRACE(<< "Bucket = " << this->printCurrentBucket() << ", feature = " << model_t::print(feature) << ", count = " @@ -323,17 +315,17 @@ void CEventRateModel::sample(core_t::TTime startTime, model->params().probabilityBucketEmpty( this->probabilityBucketEmpty(feature, pid)); - TDouble2Vec value(1, count); + TDouble2Vec value{count}; values.assign(1, core::make_triple(sampleTime, value, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)); - weights[0].resize(2, TDouble2Vec(1)); - weights[0][0].assign(dimension, emptyBucketWeight * this->learnRate(feature)); - weights[0][1] = model->winsorisationWeight(derate, sampleTime, value); + weights.resize(1, maths_t::CUnitWeights::unit(dimension)); + maths_t::setCount(TDouble2Vec(dimension, ceff), weights[0]); + maths_t::setWinsorisationWeight( + model->winsorisationWeight(derate, sampleTime, value), weights[0]); maths::CModelAddSamplesParams params; params.integer(true) .nonNegative(true) .propagationInterval(interval) - .weightStyles(SAMPLE_WEIGHT_STYLES) .trendWeights(weights) .priorWeights(weights); @@ -343,7 +335,7 @@ void CEventRateModel::sample(core_t::TTime startTime, } } - this->sampleCorrelateModels(SAMPLE_WEIGHT_STYLES); + this->sampleCorrelateModels(); m_Probabilities = TCategoryProbabilityCache(m_ProbabilityPrior); } } @@ -599,8 +591,8 @@ void CEventRateModel::fill(model_t::EFeature feature, core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength())}; TOptionalUInt64 count{this->currentBucketCount(pid, bucketTime)}; double value{model_t::offsetCountToZero(feature, static_cast(data->s_Count))}; - TDouble2Vec4Vec weight{ - model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)}; + maths_t::TDouble2VecWeightsAry weight(maths_t::seasonalVarianceScaleWeight( + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time))); params.s_Feature = feature; params.s_Model = model; @@ -608,7 +600,7 @@ void CEventRateModel::fill(model_t::EFeature feature, params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, TDouble2Vec{value}); if (interim && model_t::requiresInterimResultAdjustment(feature)) { - double mode{params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weight)[0]}; + double mode{params.s_Model->mode(time, weight)[0]}; TDouble2Vec correction{this->interimValueCorrector().corrections( time, this->currentBucketTotalCount(), mode, value)}; params.s_Value[0] += correction; @@ -617,9 +609,8 @@ void CEventRateModel::fill(model_t::EFeature feature, } params.s_Count = 1.0; params.s_ComputeProbabilityParams - .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, !count || *count == 0)) + .addCalculation(model_t::probabilityCalculation(feature)) // new line + .addBucketEmpty({!count || *count == 0}) .addWeights(weight); } @@ -646,9 +637,7 @@ void CEventRateModel::fill(model_t::EFeature feature, params.s_Variables.resize(correlates.size()); params.s_CorrelatedLabels.resize(correlates.size()); params.s_Correlated.resize(correlates.size()); - params.s_ComputeProbabilityParams - .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES); + params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)); // These are indexed as follows: // influenceValues["influencer name"]["correlate"]["influence value"] @@ -670,18 +659,17 @@ void CEventRateModel::fill(model_t::EFeature feature, params.s_Variables[i] = variables; const maths::CModel* models[]{ model, this->model(feature, correlates[i][variables[1]])}; - TDouble2Vec4Vec weight(1, TDouble2Vec(2)); - weight[0][variables[0]] = models[0]->seasonalWeight( + maths_t::TDouble2Vec scale(2); + scale[variables[0]] = models[0]->seasonalWeight( maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)[0]; - weight[0][variables[1]] = models[1]->seasonalWeight( + scale[variables[1]] = models[1]->seasonalWeight( maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)[0]; TOptionalUInt64 count[2]; count[0] = this->currentBucketCount(correlates[i][0], bucketTime); count[1] = this->currentBucketCount(correlates[i][1], bucketTime); params.s_ComputeProbabilityParams - .addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, - !count[1] || *count[1] == 0}) - .addWeights(weight); + .addBucketEmpty({!count[0] || *count[0] == 0, !count[1] || *count[1] == 0}) // new line + .addWeights(maths_t::seasonalVarianceScaleWeight(scale)); const TFeatureData* data[2]; data[0] = this->featureData(feature, correlates[i][0], bucketTime); @@ -719,7 +707,7 @@ void CEventRateModel::fill(model_t::EFeature feature, } if (interim && model_t::requiresInterimResultAdjustment(feature)) { TDouble2Vec1Vec modes = params.s_Model->correlateModes( - time, PROBABILITY_WEIGHT_STYLES, params.s_ComputeProbabilityParams.weights()); + time, params.s_ComputeProbabilityParams.weights()); for (std::size_t i = 0u; i < modes.size(); ++i) { TDouble2Vec& value_ = params.s_Values[i]; if (!value_.empty()) { diff --git a/lib/model/CEventRatePopulationModel.cc b/lib/model/CEventRatePopulationModel.cc index df4d379d84..f5bcee8001 100644 --- a/lib/model/CEventRatePopulationModel.cc +++ b/lib/model/CEventRatePopulationModel.cc @@ -51,9 +51,6 @@ namespace { using TDouble2Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble2Vec4VecVec = std::vector; -using TBool2Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; using TSizeSizePrFeatureDataPrVec = CEventRatePopulationModel::TSizeSizePrFeatureDataPrVec; using TFeatureSizeSizePrFeatureDataPrVecPr = @@ -65,7 +62,7 @@ using TSizeFuzzyDeduplicateUMap = //! \brief The values and weights for an attribute. struct SValuesAndWeights { maths::CModel::TTimeDouble2VecSizeTrVec s_Values; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec s_Weights; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec s_Weights; }; using TSizeValuesAndWeightsUMap = boost::unordered_map; @@ -77,11 +74,6 @@ const std::string FEATURE_MODELS_TAG("d"); const std::string FEATURE_CORRELATE_MODELS_TAG("e"); const std::string MEMORY_ESTIMATOR_TAG("f"); const std::string EMPTY_STRING(""); - -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; -const maths_t::TWeightStyleVec - PROBABILITY_WEIGHT_STYLES(1, maths_t::E_SampleSeasonalVarianceScaleWeight); } CEventRatePopulationModel::CEventRatePopulationModel( @@ -383,6 +375,7 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, for (auto& featureData_ : featureData) { model_t::EFeature feature = featureData_.first; + std::size_t dimension = model_t::dimension(feature); TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; data.swap(featureData_.second); LOG_TRACE(<< model_t::print(feature) << ": " @@ -462,13 +455,18 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, : attribute.s_Values.size(); if (duplicate < attribute.s_Values.size()) { - attribute.s_Weights[duplicate][0][0] += - this->sampleRateWeight(pid, cid) * this->learnRate(feature); + double weight{this->sampleRateWeight(pid, cid) * this->learnRate(feature)}; + maths_t::addCount(TDouble2Vec{weight}, attribute.s_Weights[duplicate]); } else { attribute.s_Values.emplace_back(sampleTime, TDouble2Vec{value}, pid); - attribute.s_Weights.emplace_back(TDouble2Vec4Vec{ - {this->sampleRateWeight(pid, cid) * this->learnRate(feature)}, - model->winsorisationWeight(1.0, sampleTime, {value})}); + attribute.s_Weights.push_back( + maths_t::CUnitWeights::unit(1)); + auto& weight = attribute.s_Weights.back(); + maths_t::setCount(TDouble2Vec{this->sampleRateWeight(pid, cid) * + this->learnRate(feature)}, + weight); + maths_t::setWinsorisationWeight( + model->winsorisationWeight(1.0, sampleTime, {value}), weight); } } @@ -478,7 +476,6 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, params.integer(true) .nonNegative(true) .propagationInterval(this->propagationTime(cid, sampleTime)) - .weightStyles(SAMPLE_WEIGHT_STYLES) .trendWeights(attribute.second.s_Weights) .priorWeights(attribute.second.s_Weights); maths::CModel* model{this->model(feature, cid)}; @@ -490,7 +487,7 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, } for (const auto& feature : m_FeatureCorrelatesModels) { - feature.s_Models->processSamples(SAMPLE_WEIGHT_STYLES); + feature.s_Models->processSamples(); } m_AttributeProbabilities = TCategoryProbabilityCache(m_AttributeProbabilityPrior); @@ -603,13 +600,11 @@ bool CEventRatePopulationModel::computeProbability(std::size_t pid, const TSizeSizePrFeatureDataPrVec& data = this->featureData(feature, startTime); TSizeSizePr range = personRange(data, pid); for (std::size_t j = range.first; j < range.second; ++j) { - TDouble1Vec category( - 1, static_cast(CDataGatherer::extractAttributeId(data[j]))); - TDouble4Vec1Vec weights( - 1, TDouble4Vec(1, static_cast( - CDataGatherer::extractData(data[j]).s_Count))); - personAttributeProbabilityPrior.addSamples( - maths::CConstantWeights::COUNT, category, weights); + TDouble1Vec category{ + static_cast(CDataGatherer::extractAttributeId(data[j]))}; + maths_t::TDoubleWeightsAry1Vec weights{maths_t::countWeight( + static_cast(CDataGatherer::extractData(data[j]).s_Count))}; + personAttributeProbabilityPrior.addSamples(category, weights); } continue; } @@ -1020,8 +1015,8 @@ void CEventRatePopulationModel::fill(model_t::EFeature feature, auto data = find(this->featureData(feature, bucketTime), pid, cid); const maths::CModel* model{this->model(feature, cid)}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength())}; - TDouble2Vec4Vec weight{ - model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time)}; + maths_t::TDouble2VecWeightsAry weight(maths_t::seasonalVarianceScaleWeight( + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time))); double value{model_t::offsetCountToZero( feature, static_cast(CDataGatherer::extractData(*data).s_Count))}; @@ -1031,7 +1026,7 @@ void CEventRatePopulationModel::fill(model_t::EFeature feature, params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, TDouble2Vec{value}); if (interim && model_t::requiresInterimResultAdjustment(feature)) { - double mode{params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weight)[0]}; + double mode{params.s_Model->mode(time, weight)[0]}; TDouble2Vec correction{this->interimValueCorrector().corrections( time, this->currentBucketTotalCount(), mode, value)}; params.s_Value[0] += correction; @@ -1039,10 +1034,10 @@ void CEventRatePopulationModel::fill(model_t::EFeature feature, CCorrectionKey(feature, pid, cid), correction); } params.s_Count = 1.0; - params.s_ComputeProbabilityParams.tag(pid) + params.s_ComputeProbabilityParams + .tag(pid) // new line .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, false)) + .addBucketEmpty({false}) .addWeights(weight); } diff --git a/lib/model/CHierarchicalResultsNormalizer.cc b/lib/model/CHierarchicalResultsNormalizer.cc index 3a68d1c767..59d967e5a7 100644 --- a/lib/model/CHierarchicalResultsNormalizer.cc +++ b/lib/model/CHierarchicalResultsNormalizer.cc @@ -277,15 +277,13 @@ CHierarchicalResultsNormalizer::fromJsonStream(std::istream& inputStream) { if (normalizerVec != nullptr) { if (!traverser.next()) { - LOG_ERROR(<< "Cannot restore hierarchical normalizer - end " - "of object reached when " + LOG_ERROR(<< "Cannot restore hierarchical normalizer - end of object reached when " << CAnomalyScore::MLKEY_ATTRIBUTE << " was expected"); return E_Corrupt; } if (!traverser.next()) { - LOG_ERROR(<< "Cannot restore hierarchical normalizer - end " - "of object reached when " + LOG_ERROR(<< "Cannot restore hierarchical normalizer - end of object reached when " << CAnomalyScore::MLQUANTILESDESCRIPTION_ATTRIBUTE << " was expected"); return E_Corrupt; diff --git a/lib/model/CIndividualModel.cc b/lib/model/CIndividualModel.cc index 63a7a19712..1f81a51435 100644 --- a/lib/model/CIndividualModel.cc +++ b/lib/model/CIndividualModel.cc @@ -563,9 +563,9 @@ maths::CModel* CIndividualModel::model(model_t::EFeature feature, std::size_t pi : nullptr; } -void CIndividualModel::sampleCorrelateModels(const maths_t::TWeightStyleVec& weightStyles) { +void CIndividualModel::sampleCorrelateModels() { for (const auto& feature : m_FeatureCorrelatesModels) { - feature.s_Models->processSamples(weightStyles); + feature.s_Models->processSamples(); } } diff --git a/lib/model/CMetricBucketGatherer.cc b/lib/model/CMetricBucketGatherer.cc index e2249013f8..72b8251b80 100644 --- a/lib/model/CMetricBucketGatherer.cc +++ b/lib/model/CMetricBucketGatherer.cc @@ -449,8 +449,7 @@ class CRestoreFeatureData { result[lastCid] = TSizeTUMap(1); } else if (name == PERSON_TAG) { if (!seenCid) { - LOG_ERROR(<< "Incorrect format - person before " - "attribute ID in " + LOG_ERROR(<< "Incorrect format - person before attribute ID in " << traverser.value()); return false; } @@ -1168,8 +1167,7 @@ bool CMetricBucketGatherer::processFields(const TStrCPtrVec& fieldValues, allOk = false; } if (m_FieldNames.size() > m_FieldMetricCategories.size() + i) { - LOG_ERROR(<< "Inconsistency - more statistic field names than " - "metric categories " + LOG_ERROR(<< "Inconsistency - more statistic field names than metric categories " << m_FieldNames.size() - i << " > " << m_FieldMetricCategories.size()); allOk = false; diff --git a/lib/model/CMetricModel.cc b/lib/model/CMetricModel.cc index 69836dd27e..35caa30def 100644 --- a/lib/model/CMetricModel.cc +++ b/lib/model/CMetricModel.cc @@ -59,20 +59,10 @@ namespace { using TTime1Vec = core::CSmallVector; using TDouble1Vec = core::CSmallVector; using TDouble2Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; -using TBool2Vec = core::CSmallVector; // We use short field names to reduce the state size const std::string INDIVIDUAL_STATE_TAG("a"); - -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountVarianceScaleWeight}; -const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES{ - maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; } CMetricModel::CMetricModel(const SModelParams& params, @@ -233,8 +223,8 @@ void CMetricModel::sample(core_t::TTime startTime, // Declared outside the loop to minimize the number of times they are created. maths::CModel::TTimeDouble2VecSizeTrVec values; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec trendWeights; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec priorWeights; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec trendWeights; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec priorWeights; for (auto& featureData : m_CurrentBucketStats.s_FeatureData) { model_t::EFeature feature = featureData.first; @@ -274,15 +264,16 @@ void CMetricModel::sample(core_t::TTime startTime, continue; } + std::size_t n = samples.size(); double derate = this->derate(pid, sampleTime); double interval = (1.0 + (this->params().s_InitialDecayRateMultiplier - 1.0) * derate) * emptyBucketWeight; - double count = this->params().s_MaximumUpdatesPerBucket > 0.0 && - samples.size() > 0 + double count = this->params().s_MaximumUpdatesPerBucket > 0.0 && n > 0 ? this->params().s_MaximumUpdatesPerBucket / - static_cast(samples.size()) + static_cast(n) : 1.0; + double ceff = emptyBucketWeight * count * this->learnRate(feature); LOG_TRACE(<< "Bucket = " << gatherer.printCurrentBucket(time) << ", feature = " << model_t::print(feature) @@ -295,31 +286,32 @@ void CMetricModel::sample(core_t::TTime startTime, model->params().probabilityBucketEmpty( this->probabilityBucketEmpty(feature, pid)); - values.resize(samples.size()); - trendWeights.resize(samples.size(), TDouble2Vec4Vec(3)); - priorWeights.resize(samples.size(), TDouble2Vec4Vec(3)); - for (std::size_t i = 0u; i < samples.size(); ++i) { + values.resize(n); + trendWeights.resize(n, maths_t::CUnitWeights::unit(dimension)); + priorWeights.resize(n, maths_t::CUnitWeights::unit(dimension)); + for (std::size_t i = 0u; i < n; ++i) { core_t::TTime ti = samples[i].time(); TDouble2Vec vi(samples[i].value(dimension)); double vs = samples[i].varianceScale(); values[i] = core::make_triple( model_t::sampleTime(feature, time, bucketLength, ti), vi, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID); - trendWeights[i][0].assign(dimension, emptyBucketWeight * count * - this->learnRate(feature) / vs); - trendWeights[i][1] = model->winsorisationWeight(derate, ti, vi); - trendWeights[i][2].assign(dimension, vs); - priorWeights[i][0].assign(dimension, emptyBucketWeight * count * - this->learnRate(feature)); - priorWeights[i][1] = trendWeights[i][1]; - priorWeights[i][2].assign(dimension, vs); + maths_t::setCount(TDouble2Vec(dimension, ceff / vs), trendWeights[i]); + maths_t::setWinsorisationWeight( + model->winsorisationWeight(derate, ti, vi), trendWeights[i]); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, vs), + trendWeights[i]); + maths_t::setCount(TDouble2Vec(dimension, ceff), priorWeights[i]); + maths_t::setWinsorisationWeight( + maths_t::winsorisationWeight(trendWeights[i]), priorWeights[i]); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, vs), + priorWeights[i]); } maths::CModelAddSamplesParams params; params.integer(data_.second.s_IsInteger) .nonNegative(data_.second.s_IsNonNegative) .propagationInterval(interval) - .weightStyles(SAMPLE_WEIGHT_STYLES) .trendWeights(trendWeights) .priorWeights(priorWeights); @@ -329,7 +321,7 @@ void CMetricModel::sample(core_t::TTime startTime, } } - this->sampleCorrelateModels(SAMPLE_WEIGHT_STYLES); + this->sampleCorrelateModels(); } } @@ -548,15 +540,17 @@ void CMetricModel::fill(model_t::EFeature feature, core_t::TTime bucketTime, bool interim, CProbabilityAndInfluenceCalculator::SParams& params) const { + std::size_t dimension{model_t::dimension(feature)}; const TFeatureData* data{this->featureData(feature, pid, bucketTime)}; const TOptionalSample& bucket{data->s_BucketValue}; const maths::CModel* model{this->model(feature, pid)}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength(), bucket->time())}; - TDouble2Vec4Vec weights(2); - weights[0] = model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time); - weights[1].assign(dimension, bucket->varianceScale()); + maths_t::TDouble2VecWeightsAry weights(maths_t::CUnitWeights::unit(dimension)); + maths_t::setSeasonalVarianceScale( + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time), weights); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, bucket->varianceScale()), weights); TOptionalUInt64 count{this->currentBucketCount(pid, bucketTime)}; params.s_Feature = feature; @@ -565,7 +559,7 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, bucket->value()); if (interim && model_t::requiresInterimResultAdjustment(feature)) { - TDouble2Vec mode(params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weights)); + TDouble2Vec mode(params.s_Model->mode(time, weights)); TDouble2Vec correction(this->interimValueCorrector().corrections( time, this->currentBucketTotalCount(), mode, bucket->value(dimension))); params.s_Value[0] += correction; @@ -574,9 +568,8 @@ void CMetricModel::fill(model_t::EFeature feature, } params.s_Count = bucket->count(); params.s_ComputeProbabilityParams - .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, !count || *count == 0)) + .addCalculation(model_t::probabilityCalculation(feature)) // new line + .addBucketEmpty({!count || *count == 0}) .addWeights(weights); } @@ -586,6 +579,7 @@ void CMetricModel::fill(model_t::EFeature feature, bool interim, CProbabilityAndInfluenceCalculator::SCorrelateParams& params, TStrCRefDouble1VecDouble1VecPrPrVecVecVec& influenceValues) const { + using TStrCRefDouble1VecDoublePrPr = std::pair; const CDataGatherer& gatherer{this->dataGatherer()}; @@ -603,9 +597,7 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_Variables.resize(correlates.size()); params.s_CorrelatedLabels.resize(correlates.size()); params.s_Correlated.resize(correlates.size()); - params.s_ComputeProbabilityParams - .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES); + params.s_ComputeProbabilityParams.addCalculation(model_t::probabilityCalculation(feature)); // These are indexed as follows: // influenceValues["influencer name"]["correlate"]["influence value"] @@ -626,11 +618,13 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_Variables[i] = variables; const maths::CModel* models[]{ model, this->model(feature, correlates[i][variables[1]])}; - TDouble2Vec4Vec weight(2, TDouble2Vec(2, 1.0)); - weight[0][variables[0]] = models[0]->seasonalWeight( + maths_t::TDouble2VecWeightsAry weight(maths_t::CUnitWeights::unit(2)); + TDouble2Vec scale(2); + scale[variables[0]] = models[0]->seasonalWeight( maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, bucketTime)[0]; - weight[0][variables[1]] = models[1]->seasonalWeight( + scale[variables[1]] = models[1]->seasonalWeight( maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, bucketTime)[0]; + maths_t::setSeasonalVarianceScale(scale, weight); const TFeatureData* data[2]; data[0] = this->featureData(feature, correlates[i][0], bucketTime); @@ -651,8 +645,9 @@ void CMetricModel::fill(model_t::EFeature feature, params.s_Values[i][2 * j + 0] = bucket0->value()[j]; params.s_Values[i][2 * j + 1] = bucket1->value()[j]; } - weight[1][variables[0]] = bucket0->varianceScale(); - weight[1][variables[1]] = bucket1->varianceScale(); + scale[variables[0]] = bucket0->varianceScale(); + scale[variables[1]] = bucket1->varianceScale(); + maths_t::setCountVarianceScale(scale, weight); for (std::size_t j = 0u; j < data[0]->s_InfluenceValues.size(); ++j) { for (const auto& influenceValue : data[0]->s_InfluenceValues[j]) { TStrCRef influence = influenceValue.first; @@ -682,14 +677,13 @@ void CMetricModel::fill(model_t::EFeature feature, count[0] = this->currentBucketCount(correlates[i][0], bucketTime); count[1] = this->currentBucketCount(correlates[i][1], bucketTime); params.s_ComputeProbabilityParams - .addBucketEmpty(TBool2Vec{!count[0] || *count[0] == 0, - !count[1] || *count[1] == 0}) + .addBucketEmpty({!count[0] || *count[0] == 0, !count[1] || *count[1] == 0}) // new line .addWeights(weight); } if (interim && model_t::requiresInterimResultAdjustment(feature)) { core_t::TTime time{bucketTime + bucketLength / 2}; TDouble2Vec1Vec modes(params.s_Model->correlateModes( - time, PROBABILITY_WEIGHT_STYLES, params.s_ComputeProbabilityParams.weights())); + time, params.s_ComputeProbabilityParams.weights())); for (std::size_t i = 0u; i < modes.size(); ++i) { if (!params.s_Values.empty()) { TDouble2Vec value_{params.s_Values[i][0], params.s_Values[i][1]}; @@ -700,7 +694,7 @@ void CMetricModel::fill(model_t::EFeature feature, } this->currentBucketInterimCorrections().emplace( core::make_triple(feature, pid, params.s_Correlated[i]), - TDouble1Vec(1, correction[params.s_Variables[i][0]])); + TDouble1Vec{correction[params.s_Variables[i][0]]}); } } } diff --git a/lib/model/CMetricPopulationModel.cc b/lib/model/CMetricPopulationModel.cc index 85c86421ae..532fef4441 100644 --- a/lib/model/CMetricPopulationModel.cc +++ b/lib/model/CMetricPopulationModel.cc @@ -55,9 +55,6 @@ namespace { using TDouble2Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble2Vec4VecVec = std::vector; -using TBool2Vec = core::CSmallVector; using TTime2Vec = core::CSmallVector; using TOptionalSample = boost::optional; using TSizeSizePrFeatureDataPrVec = CMetricPopulationModel::TSizeSizePrFeatureDataPrVec; @@ -73,8 +70,8 @@ struct SValuesAndWeights { bool s_IsInteger, s_IsNonNegative; maths::CModel::TTimeDouble2VecSizeTrVec s_BucketValues; maths::CModel::TTimeDouble2VecSizeTrVec s_Values; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec s_TrendWeights; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec s_PriorWeights; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec s_TrendWeights; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec s_PriorWeights; }; using TSizeValuesAndWeightsUMap = boost::unordered_map; @@ -83,13 +80,6 @@ const std::string POPULATION_STATE_TAG("a"); const std::string FEATURE_MODELS_TAG("b"); const std::string FEATURE_CORRELATE_MODELS_TAG("c"); const std::string MEMORY_ESTIMATOR_TAG("d"); - -const maths_t::TWeightStyleVec SAMPLE_WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight, - maths_t::E_SampleCountVarianceScaleWeight}; -const maths_t::TWeightStyleVec PROBABILITY_WEIGHT_STYLES{ - maths_t::E_SampleSeasonalVarianceScaleWeight, maths_t::E_SampleCountVarianceScaleWeight}; - } // unnamed:: CMetricPopulationModel::CMetricPopulationModel( @@ -447,26 +437,26 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, : attribute.s_Values.size(); if (duplicate < attribute.s_Values.size()) { - std::for_each(attribute.s_TrendWeights[duplicate][0].begin(), - attribute.s_TrendWeights[duplicate][0].end(), - [countWeight, vs](double& weight) { - weight += countWeight / vs; - }); - std::for_each(attribute.s_PriorWeights[duplicate][0].begin(), - attribute.s_PriorWeights[duplicate][0].end(), - [countWeight](double& weight) { - weight += countWeight; - }); + maths_t::addCount(TDouble2Vec(dimension, countWeight / vs), + attribute.s_TrendWeights[duplicate]); + maths_t::addCount(TDouble2Vec(dimension, countWeight), + attribute.s_PriorWeights[duplicate]); } else { attribute.s_Values.emplace_back(sample.time(), value, pid); attribute.s_TrendWeights.push_back( - {TDouble2Vec(dimension, countWeight / vs), - model->winsorisationWeight(1.0, sample.time(), value), - TDouble2Vec(dimension, vs)}); + maths_t::CUnitWeights::unit(dimension)); attribute.s_PriorWeights.push_back( - {TDouble2Vec(dimension, countWeight), - model->winsorisationWeight(1.0, sample.time(), value), - TDouble2Vec(dimension, vs)}); + maths_t::CUnitWeights::unit(dimension)); + auto& trendWeight = attribute.s_TrendWeights.back(); + auto& priorWeight = attribute.s_PriorWeights.back(); + maths_t::setCount(TDouble2Vec(dimension, countWeight / vs), trendWeight); + maths_t::setWinsorisationWeight( + model->winsorisationWeight(1.0, sample.time(), value), trendWeight); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, vs), trendWeight); + maths_t::setCount(TDouble2Vec(dimension, countWeight), priorWeight); + maths_t::setWinsorisationWeight( + maths_t::winsorisationWeight(trendWeight), priorWeight); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, vs), priorWeight); } } } @@ -482,7 +472,6 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, params.integer(attribute.second.s_IsInteger) .nonNegative(attribute.second.s_IsNonNegative) .propagationInterval(this->propagationTime(cid, latest)) - .weightStyles(SAMPLE_WEIGHT_STYLES) .trendWeights(attribute.second.s_TrendWeights) .priorWeights(attribute.second.s_PriorWeights); @@ -495,7 +484,7 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, } for (const auto& feature : m_FeatureCorrelatesModels) { - feature.s_Models->processSamples(SAMPLE_WEIGHT_STYLES); + feature.s_Models->processSamples(); } m_Probabilities.clear(); @@ -936,15 +925,17 @@ void CMetricPopulationModel::fill(model_t::EFeature feature, core_t::TTime bucketTime, bool interim, CProbabilityAndInfluenceCalculator::SParams& params) const { + std::size_t dimension{model_t::dimension(feature)}; auto data = find(this->featureData(feature, bucketTime), pid, cid); const maths::CModel* model{this->model(feature, cid)}; const TOptionalSample& bucket{CDataGatherer::extractData(*data).s_BucketValue}; core_t::TTime time{model_t::sampleTime(feature, bucketTime, this->bucketLength(), bucket->time())}; - TDouble2Vec4Vec weights{ - model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time), - TDouble2Vec(dimension, bucket->varianceScale())}; + maths_t::TDouble2VecWeightsAry weights(maths_t::CUnitWeights::unit(dimension)); + maths_t::setSeasonalVarianceScale( + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time), weights); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, bucket->varianceScale()), weights); params.s_Feature = feature; params.s_Model = model; @@ -952,7 +943,7 @@ void CMetricPopulationModel::fill(model_t::EFeature feature, params.s_Time.assign(1, TTime2Vec{time}); params.s_Value.assign(1, bucket->value()); if (interim && model_t::requiresInterimResultAdjustment(feature)) { - TDouble2Vec mode(params.s_Model->mode(time, PROBABILITY_WEIGHT_STYLES, weights)); + TDouble2Vec mode(params.s_Model->mode(time, weights)); TDouble2Vec correction(this->interimValueCorrector().corrections( time, this->currentBucketTotalCount(), mode, bucket->value(dimension))); params.s_Value[0] += correction; @@ -960,10 +951,10 @@ void CMetricPopulationModel::fill(model_t::EFeature feature, CCorrectionKey(feature, pid, cid), correction); } params.s_Count = 1.0; - params.s_ComputeProbabilityParams.tag(pid) + params.s_ComputeProbabilityParams + .tag(pid) // new line .addCalculation(model_t::probabilityCalculation(feature)) - .weightStyles(PROBABILITY_WEIGHT_STYLES) - .addBucketEmpty(TBool2Vec(1, false)) + .addBucketEmpty({false}) .addWeights(weights); } diff --git a/lib/model/CModelDetailsView.cc b/lib/model/CModelDetailsView.cc index 760041e1c8..69bc74ec91 100644 --- a/lib/model/CModelDetailsView.cc +++ b/lib/model/CModelDetailsView.cc @@ -29,8 +29,6 @@ namespace ml { namespace model { namespace { -const maths_t::TWeightStyleVec WEIGHT_STYLES{maths_t::E_SampleSeasonalVarianceScaleWeight, - maths_t::E_SampleCountVarianceScaleWeight}; const std::string EMPTY_STRING(""); } @@ -85,7 +83,6 @@ void CModelDetailsView::modelPlotForByFieldId(core_t::TTime time, using TDouble1VecDouble1VecPr = std::pair; using TDouble2Vec = core::CSmallVector; using TDouble2Vec3Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; if (this->isByFieldIdActive(byFieldId)) { const maths::CModel* model = this->model(feature, byFieldId); @@ -95,16 +92,18 @@ void CModelDetailsView::modelPlotForByFieldId(core_t::TTime time, std::size_t dimension = model_t::dimension(feature); - TDouble2Vec4Vec weights(WEIGHT_STYLES.size()); - weights[0] = model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time); - weights[1].assign(dimension, this->countVarianceScale(feature, byFieldId, time)); + maths_t::TDouble2VecWeightsAry weights( + maths_t::CUnitWeights::unit(dimension)); + maths_t::setSeasonalVarianceScale( + model->seasonalWeight(maths::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL, time), weights); + maths_t::setCountVarianceScale( + TDouble2Vec(dimension, this->countVarianceScale(feature, byFieldId, time)), weights); TDouble1VecDouble1VecPr support(model_t::support(feature)); TDouble2Vec supportLower(support.first); TDouble2Vec supportUpper(support.second); - TDouble2Vec3Vec interval(model->confidenceInterval(time, boundsPercentile, - WEIGHT_STYLES, weights)); + TDouble2Vec3Vec interval(model->confidenceInterval(time, boundsPercentile, weights)); if (interval.size() == 3) { TDouble2Vec lower = maths::CTools::truncate(interval[0], supportLower, supportUpper); diff --git a/lib/model/CModelParams.cc b/lib/model/CModelParams.cc index 6fc3d0125a..1f7db6654a 100644 --- a/lib/model/CModelParams.cc +++ b/lib/model/CModelParams.cc @@ -70,8 +70,7 @@ void SModelParams::configureLatency(core_t::TTime latency, core_t::TTime bucketL s_SampleCountFactor = CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_WITH_LATENCY; if (s_LatencyBuckets > 50) { LOG_WARN(<< "There are a large number of buckets in the latency window. " - "Please ensure sufficient resources are available for this " - "job."); + "Please ensure sufficient resources are available for this job."); } } } diff --git a/lib/model/CModelTools.cc b/lib/model/CModelTools.cc index 45177966db..aac52a4490 100644 --- a/lib/model/CModelTools.cc +++ b/lib/model/CModelTools.cc @@ -342,8 +342,7 @@ void CModelTools::CProbabilityCache::addModes(model_t::EFeature feature, TDouble1Vec& modes{m_Caches[{feature, id}].s_Modes}; if (modes.empty()) { TDouble2Vec1Vec modes_( - model.residualModes(maths::CConstantWeights::COUNT_VARIANCE, - maths::CConstantWeights::unit(1))); + model.residualModes(maths_t::CUnitWeights::unit(1))); for (const auto& mode : modes_) { modes.push_back(mode[0]); } diff --git a/lib/model/CProbabilityAndInfluenceCalculator.cc b/lib/model/CProbabilityAndInfluenceCalculator.cc index 6e10ff5fa2..4108834ef6 100644 --- a/lib/model/CProbabilityAndInfluenceCalculator.cc +++ b/lib/model/CProbabilityAndInfluenceCalculator.cc @@ -36,9 +36,7 @@ using TSize1Vec = CProbabilityAndInfluenceCalculator::TSize1Vec; using TSize2Vec = CProbabilityAndInfluenceCalculator::TSize2Vec; using TDouble1Vec = CProbabilityAndInfluenceCalculator::TDouble1Vec; using TDouble2Vec = CProbabilityAndInfluenceCalculator::TDouble2Vec; -using TDouble4Vec = CProbabilityAndInfluenceCalculator::TDouble4Vec; using TDouble2Vec1Vec = CProbabilityAndInfluenceCalculator::TDouble2Vec1Vec; -using TDouble4Vec1Vec = CProbabilityAndInfluenceCalculator::TDouble4Vec1Vec; using TDouble1VecDoublePr = CProbabilityAndInfluenceCalculator::TDouble1VecDoublePr; using TBool2Vec = CProbabilityAndInfluenceCalculator::TBool2Vec; using TTime2Vec = CProbabilityAndInfluenceCalculator::TTime2Vec; @@ -57,8 +55,6 @@ using TStoredStringPtrStoredStringPtrPrDoublePrVec = CProbabilityAndInfluenceCalculator::TStoredStringPtrStoredStringPtrPrDoublePrVec; using TTail2Vec = core::CSmallVector; using TProbabilityCalculation2Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble2Vec4Vec1Vec = core::CSmallVector; using TSizeDoublePr = std::pair; using TSizeDoublePr1Vec = core::CSmallVector; @@ -167,10 +163,10 @@ class CValueDifference { double ni, maths::CModelProbabilityParams& params, TDouble2Vec& difference) const { - params.addBucketEmpty(TBool2Vec{n == ni}); for (std::size_t i = 0u; i < v.size(); ++i) { difference[i] = v[i] - vi[i]; } + params.addBucketEmpty({n == ni}); } //! Correlates. @@ -199,10 +195,10 @@ class CValueIntersection { double ni, maths::CModelProbabilityParams& params, TDouble2Vec& intersection) const { - params.addBucketEmpty(TBool2Vec{ni == 0}); for (std::size_t i = 0u; i < vi.size(); ++i) { intersection[i] = vi[i]; } + params.addBucketEmpty({ni == 0}); } //! Correlates. @@ -231,18 +227,14 @@ class CMeanDifference { double ni, maths::CModelProbabilityParams& params, TDouble2Vec& difference) const { - params.addBucketEmpty(TBool2Vec{n == ni}); for (std::size_t d = 0u; d < v.size(); ++d) { - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { - params.weights()[0][i][d] *= n / (n - ni); - break; - } - } difference[d] = maths::CBasicStatistics::mean( maths::CBasicStatistics::accumulator(n, v[d]) - maths::CBasicStatistics::accumulator(ni, vi[d])); } + maths_t::multiplyCountVarianceScale(TDouble2Vec(v.size(), n / (n - ni)), + params.weights()[0]); + params.addBucketEmpty({n == ni}); } //! Correlates. @@ -255,16 +247,13 @@ class CMeanDifference { TBool2Vec bucketEmpty(2); for (std::size_t d = 0u; d < 2; ++d) { bucketEmpty[d] = ((n[d] - ni[d]) == 0); - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { - params.weights()[0][i][d] *= n[d] / (n[d] - ni[d]); - break; - } - } difference[d] = maths::CBasicStatistics::mean( maths::CBasicStatistics::accumulator(n[d], v[d]) - maths::CBasicStatistics::accumulator(ni[d], vi[d])); } + maths_t::multiplyCountVarianceScale( + TDouble2Vec{n[0] / (n[0] - ni[0]), n[1] / (n[1] - ni[1])}, + params.weights()[0]); params.addBucketEmpty(bucketEmpty); } }; @@ -280,18 +269,14 @@ class CVarianceDifference { maths::CModelProbabilityParams& params, TDouble2Vec& difference) const { std::size_t dimension = v.size() / 2; - params.addBucketEmpty(TBool2Vec{n == ni}); for (std::size_t d = 0u; d < dimension; ++d) { - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { - params.weights()[0][i][d] *= n / (n - ni); - break; - } - } difference[d] = maths::CBasicStatistics::maximumLikelihoodVariance( maths::CBasicStatistics::accumulator(n, v[dimension + d], v[d]) - maths::CBasicStatistics::accumulator(ni, vi[dimension + d], vi[d])); } + maths_t::multiplyCountVarianceScale(TDouble2Vec(dimension, n / (n - ni)), + params.weights()[0]); + params.addBucketEmpty({n == ni}); } //! Correlates. @@ -304,17 +289,14 @@ class CVarianceDifference { TBool2Vec bucketEmpty(2); for (std::size_t d = 0u; d < 2; ++d) { bucketEmpty[d] = ((n[d] - ni[d]) == 0); - for (std::size_t i = 0u; i < params.weightStyles().size(); ++i) { - if (params.weightStyles()[i] == maths_t::E_SampleCountVarianceScaleWeight) { - params.weights()[0][i][d] *= n[d] / (n[d] - ni[d]); - break; - } - } difference[d] = maths::CBasicStatistics::maximumLikelihoodVariance( maths::CBasicStatistics::accumulator(n[d], v[2 + d], v[d]) - maths::CBasicStatistics::accumulator(ni[d], vi[2 + d], vi[d])); } params.addBucketEmpty(bucketEmpty); + maths_t::multiplyCountVarianceScale( + TDouble2Vec{n[0] / (n[0] - ni[0]), n[1] / (n[1] - ni[1])}, + params.weights()[0]); } }; @@ -393,7 +375,7 @@ void doComputeInfluences(model_t::EFeature feature, TSize1Vec mostAnomalousCorrelate; double logp = maths::CTools::fastLog(probability); - TDouble2Vec4Vec1Vec weights(params.weights()); + maths_t::TDouble2VecWeightsAry1Vec weights(params.weights()); for (auto i = influencerValues.begin(); i != influencerValues.end(); ++i) { params.weights(weights).updateAnomalyModel(false); @@ -486,7 +468,7 @@ void doComputeCorrelateInfluences(model_t::EFeature feature, TSize1Vec mostAnomalousCorrelate; double logp = std::log(probability); - TDouble2Vec4Vec1Vec weights(params.weights()); + maths_t::TDouble2VecWeightsAry1Vec weights(params.weights()); for (const auto& influence_ : influencerValues) { params.weights(weights).updateAnomalyModel(false); @@ -647,7 +629,7 @@ bool CProbabilityAndInfluenceCalculator::addProbability(model_t::EFeature featur return false; } - // Maybe check the cache. + // Check the cache. if (!model_t::isConstant(feature) && m_ProbabilityCache) { TDouble2Vec1Vec values(model_t::stripExtraStatistics(feature, values_)); model.detrend(time, params.seasonalConfidenceInterval(), values); @@ -905,7 +887,6 @@ void CLogProbabilityComplementInfluenceCalculator::computeInfluences(TParams& pa params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; @@ -934,7 +915,6 @@ void CLogProbabilityComplementInfluenceCalculator::computeInfluences(TCorrelateP params_.addCalculation(maths_t::E_OneSidedAbove) .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); LOG_TRACE(<< "influencerValues = " @@ -984,7 +964,6 @@ void CLogProbabilityInfluenceCalculator::computeInfluences(TParams& params) cons params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; @@ -1015,7 +994,6 @@ void CLogProbabilityInfluenceCalculator::computeInfluences(TCorrelateParams& par params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); LOG_TRACE(<< "influencerValues = " @@ -1043,7 +1021,6 @@ void CMeanInfluenceCalculator::computeInfluences(TParams& params) const { params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; @@ -1074,7 +1051,6 @@ void CMeanInfluenceCalculator::computeInfluences(TCorrelateParams& params) const params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); LOG_TRACE(<< "influencerValues = " @@ -1102,7 +1078,6 @@ void CVarianceInfluenceCalculator::computeInfluences(TParams& params) const { params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[0]); TStrCRefDouble1VecDoublePrPrVec& influencerValues = params.s_InfluencerValues; @@ -1134,7 +1109,6 @@ void CVarianceInfluenceCalculator::computeInfluences(TCorrelateParams& params) c params_ .seasonalConfidenceInterval( params.s_ComputeProbabilityParams.seasonalConfidenceInterval()) - .weightStyles(params.s_ComputeProbabilityParams.weightStyles()) .addWeights(params.s_ComputeProbabilityParams.weights()[correlate]) .mostAnomalousCorrelate(correlate); LOG_TRACE(<< "influencerValues = " diff --git a/lib/model/CRuleCondition.cc b/lib/model/CRuleCondition.cc index 7001fb8c9a..f0636faf9d 100644 --- a/lib/model/CRuleCondition.cc +++ b/lib/model/CRuleCondition.cc @@ -147,8 +147,7 @@ bool CRuleCondition::checkCondition(const CAnomalyDetectorModel& model, switch (m_Type) { case E_CategoricalMatch: case E_CategoricalComplement: { - LOG_ERROR(<< "Should never check numerical condition for categorical " - "rule condition"); + LOG_ERROR(<< "Should never check numerical condition for categorical rule condition"); return false; } case E_NumericalActual: { @@ -173,8 +172,7 @@ bool CRuleCondition::checkCondition(const CAnomalyDetectorModel& model, return false; } if (value.size() != typical.size()) { - LOG_ERROR(<< "Cannot apply rule condition: cannot calculate difference " - "between " + LOG_ERROR(<< "Cannot apply rule condition: cannot calculate difference between " << "actual and typical values due to different dimensions."); return false; } diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index 10cfd1bd58..9fc6fbb844 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -565,7 +565,7 @@ unsigned minimumSampleCount(EFeature feature) { case E_IndividualHighMedianByPerson: return 1; - // Population variance needs a minimum population size + // Population variance needs a minimum population size case E_IndividualVarianceByPerson: case E_IndividualLowVarianceByPerson: case E_IndividualHighVarianceByPerson: @@ -592,7 +592,7 @@ unsigned minimumSampleCount(EFeature feature) { case E_PopulationSumVelocityByPersonAndAttribute: return 1; - // Population variance needs a minimum population size + // Population variance needs a minimum population size case E_PopulationVarianceByPersonAndAttribute: case E_PopulationLowVarianceByPersonAndAttribute: case E_PopulationHighVarianceByPersonAndAttribute: @@ -1708,20 +1708,15 @@ std::string print(EFeature feature) { case E_PopulationHighUniqueCountByBucketPersonAndAttribute: return "'high unique count per bucket by person and attribute'"; case E_PopulationLowCountsByBucketPersonAndAttribute: - return "'low values of non-zero count per bucket by person and " - "attribute'"; + return "'low values of non-zero count per bucket by person and attribute'"; case E_PopulationHighCountsByBucketPersonAndAttribute: - return "'high values of non-zero count per bucket by person and " - "attribute'"; + return "'high values of non-zero count per bucket by person and attribute'"; case E_PopulationInfoContentByBucketPersonAndAttribute: - return "'information content of value per bucket by person and " - "attribute'"; + return "'information content of value per bucket by person and attribute'"; case E_PopulationLowInfoContentByBucketPersonAndAttribute: - return "'low information content of value per bucket by person and " - "attribute'"; + return "'low information content of value per bucket by person and attribute'"; case E_PopulationHighInfoContentByBucketPersonAndAttribute: - return "'high information content of value per bucket by person and " - "attribute'"; + return "'high information content of value per bucket by person and attribute'"; case E_PopulationTimeOfDayByBucketPersonAndAttribute: return "'time-of-day per bucket by person and attribute'"; case E_PopulationTimeOfWeekByBucketPersonAndAttribute: @@ -1773,23 +1768,17 @@ std::string print(EFeature feature) { case E_PeersLowUniqueCountByBucketPersonAndAttribute: return "'low unique count per bucket by peers of person and attribute'"; case E_PeersHighUniqueCountByBucketPersonAndAttribute: - return "'high unique count per bucket by peers of person and " - "attribute'"; + return "'high unique count per bucket by peers of person and attribute'"; case E_PeersLowCountsByBucketPersonAndAttribute: - return "'low values of non-zero count per bucket by peers of person " - "and attribute'"; + return "'low values of non-zero count per bucket by peers of person and attribute'"; case E_PeersHighCountsByBucketPersonAndAttribute: - return "'high values of non-zero count per bucket by peers of person " - "and attribute'"; + return "'high values of non-zero count per bucket by peers of person and attribute'"; case E_PeersInfoContentByBucketPersonAndAttribute: - return "'information content of value per bucket by peers of person " - "and attribute'"; + return "'information content of value per bucket by peers of person and attribute'"; case E_PeersLowInfoContentByBucketPersonAndAttribute: - return "'low information content of value per bucket by peers of " - "person and attribute'"; + return "'low information content of value per bucket by peers of person and attribute'"; case E_PeersHighInfoContentByBucketPersonAndAttribute: - return "'high information content of value per bucket by peers of " - "person and attribute'"; + return "'high information content of value per bucket by peers of person and attribute'"; case E_PeersTimeOfDayByBucketPersonAndAttribute: return "'time-of-day per bucket by peers of person and attribute'"; case E_PeersTimeOfWeekByBucketPersonAndAttribute: diff --git a/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc b/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc index d5e3438588..a929a3b52f 100644 --- a/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc +++ b/lib/model/unittest/CAnnotatedProbabilityBuilderTest.cc @@ -146,16 +146,16 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(i, static_cast(i)); - TDouble4Vec1Vec weights(i, maths::CConstantWeights::UNIT); - attributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); + maths_t::TDoubleWeightsAry1Vec weights(i, maths_t::CUnitWeights::UNIT); + attributePrior.addSamples(samples, weights); } maths::CMultinomialConjugate personAttributePrior( maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(2 * i, static_cast(i)); - TDouble4Vec1Vec weights(2 * i, maths::CConstantWeights::UNIT); - personAttributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); + maths_t::TDoubleWeightsAry1Vec weights(2 * i, maths_t::CUnitWeights::UNIT); + personAttributePrior.addSamples(samples, weights); } SAnnotatedProbability result; @@ -226,16 +226,16 @@ void CAnnotatedProbabilityBuilderTest::testAddAttributeProbabilityGivenPopulatio maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(i, static_cast(i)); - TDouble4Vec1Vec weights(i, maths::CConstantWeights::UNIT); - attributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); + maths_t::TDoubleWeightsAry1Vec weights(i, maths_t::CUnitWeights::UNIT); + attributePrior.addSamples(samples, weights); } maths::CMultinomialConjugate personAttributePrior( maths::CMultinomialConjugate::nonInformativePrior(4u)); for (std::size_t i = 1u; i <= 4u; ++i) { TDouble1Vec samples(2 * i, static_cast(i)); - TDouble4Vec1Vec weights(2 * i, maths::CConstantWeights::UNIT); - personAttributePrior.addSamples(maths::CConstantWeights::COUNT, samples, weights); + maths_t::TDoubleWeightsAry1Vec weights(2 * i, maths_t::CUnitWeights::UNIT); + personAttributePrior.addSamples(samples, weights); } SAnnotatedProbability result; diff --git a/lib/model/unittest/CEventRateModelTest.cc b/lib/model/unittest/CEventRateModelTest.cc index 4608497068..1b870eb78e 100644 --- a/lib/model/unittest/CEventRateModelTest.cc +++ b/lib/model/unittest/CEventRateModelTest.cc @@ -318,8 +318,6 @@ void testModelWithValueField(model_t::EFeature feature, } } -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); -const TDoubleVecVec UNIT_WEIGHT(1, TDoubleVec(1, 1.0)); const TSizeDoublePr1Vec NO_CORRELATES; } // unnamed:: @@ -342,8 +340,8 @@ void CEventRateModelTest::testOnlineCountSample() { TMathsModelPtr timeseriesModel{factory.defaultFeatureModel( model_t::E_IndividualCountByBucketAndPerson, bucketLength, 0.4, true)}; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights{ - maths::CConstantWeights::unit(1)}; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec weights{ + maths_t::CUnitWeights::unit(1)}; // Generate some events. TTimeVec eventTimes; @@ -372,7 +370,6 @@ void CEventRateModelTest::testOnlineCountSample() { params_.integer(true) .nonNegative(true) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); double sample{static_cast(expectedEventCounts[j])}; @@ -445,8 +442,8 @@ void CEventRateModelTest::testOnlineNonZeroCountSample() { TMathsModelPtr timeseriesModel{factory.defaultFeatureModel( model_t::E_IndividualNonZeroCountByBucketAndPerson, bucketLength, 0.4, true)}; - maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights{ - maths::CConstantWeights::unit(1)}; + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec weights{ + maths_t::CUnitWeights::unit(1)}; // Generate some events. TTimeVec eventTimes; @@ -476,7 +473,6 @@ void CEventRateModelTest::testOnlineNonZeroCountSample() { params_.integer(true) .nonNegative(true) .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) .trendWeights(weights) .priorWeights(weights); double sample{static_cast(model_t::offsetCountToZero( @@ -2709,7 +2705,7 @@ void CEventRateModelTest::testDecayRateControl() { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - params.s_MinimumModeFraction = model::CAnomalyDetectorModelConfig::DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTION; + params.s_MinimumModeFraction = CAnomalyDetectorModelConfig::DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTION; test::CRandomNumbers rng; @@ -3037,10 +3033,12 @@ CppUnit::Test* CEventRateModelTest::suite() { "CEventRateModelTest::testOnlineProbabilityCalculation", &CEventRateModelTest::testOnlineProbabilityCalculation)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount", + "CEventRateModelTest::" + "testOnlineProbabilityCalculationForLowNonZeroCount", &CEventRateModelTest::testOnlineProbabilityCalculationForLowNonZeroCount)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount", + "CEventRateModelTest::" + "testOnlineProbabilityCalculationForHighNonZeroCount", &CEventRateModelTest::testOnlineProbabilityCalculationForHighNonZeroCount)); suiteOfTests->addTest(new CppUnit::TestCaller( "CEventRateModelTest::testOnlineCorrelatedNoTrend", @@ -3059,7 +3057,8 @@ CppUnit::Test* CEventRateModelTest::suite() { "CEventRateModelTest::testCountProbabilityCalculationWithInfluence", &CEventRateModelTest::testCountProbabilityCalculationWithInfluence)); suiteOfTests->addTest(new CppUnit::TestCaller( - "CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence", + "CEventRateModelTest::" + "testDistinctCountProbabilityCalculationWithInfluence", &CEventRateModelTest::testDistinctCountProbabilityCalculationWithInfluence)); suiteOfTests->addTest(new CppUnit::TestCaller( "CEventRateModelTest::testOnlineRareWithInfluence", diff --git a/lib/model/unittest/CEventRatePopulationModelTest.cc b/lib/model/unittest/CEventRatePopulationModelTest.cc index a1b9a8dafe..0dd384695b 100644 --- a/lib/model/unittest/CEventRatePopulationModelTest.cc +++ b/lib/model/unittest/CEventRatePopulationModelTest.cc @@ -334,8 +334,8 @@ void CEventRatePopulationModelTest::testFeatures() { using TDouble2Vec = core::CSmallVector; using TDouble2VecVec = std::vector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAry = maths_t::TDouble2VecWeightsAry; + using TDouble2VecWeightsAryVec = std::vector; using TSizeSet = std::set; using TSizeSizeSetMap = std::map; using TStrStrPr = std::pair; @@ -346,12 +346,10 @@ void CEventRatePopulationModelTest::testFeatures() { using TSizeSizePrUInt64Map = std::map; using TMathsModelPtr = std::shared_ptr; using TSizeMathsModelPtrMap = std::map; - using TDouble2VecVecDouble2Vec4VecVecPr = std::pair; - using TSizeDouble2VecVecDouble2Vec4VecVecPrMap = - std::map; - - static const maths_t::TWeightStyleVec WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; + using TDouble2VecVecDouble2VecWeightsAryVecPr = + std::pair; + using TSizeDouble2VecVecDouble2VecWeightsAryVecPrMap = + std::map; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -406,7 +404,7 @@ void CEventRatePopulationModelTest::testFeatures() { expectedNonZeroCounts[{pid, cid}] = count.second; } - TSizeDouble2VecVecDouble2Vec4VecVecPrMap populationSamples; + TSizeDouble2VecVecDouble2VecWeightsAryVecPrMap populationSamples; for (const auto& count_ : expectedNonZeroCounts) { std::size_t pid = count_.first.first; std::size_t cid = count_.first.second; @@ -420,14 +418,17 @@ void CEventRatePopulationModelTest::testFeatures() { } TDoubleVec sample(1, count); - TDouble2Vec4Vec weight{{model->sampleRateWeight(pid, cid)}, - model_->winsorisationWeight(1.0, time, sample)}; + TDouble2VecWeightsAry weight( + maths_t::CUnitWeights::unit(1)); + maths_t::setCount(TDouble2Vec{model->sampleRateWeight(pid, cid)}, weight); + maths_t::setWinsorisationWeight( + model_->winsorisationWeight(1.0, time, sample), weight); populationSamples[cid].first.push_back({sample[0]}); populationSamples[cid].second.push_back(weight); } for (auto& samples_ : populationSamples) { std::size_t cid = samples_.first; - TDouble2Vec4VecVec& weights = samples_.second.second; + TDouble2VecWeightsAryVec& weights = samples_.second.second; maths::COrderings::simultaneousSort(samples_.second.first, weights); maths::CModel::TTimeDouble2VecSizeTrVec samples; for (const auto& sample : samples_.second.first) { @@ -437,7 +438,6 @@ void CEventRatePopulationModelTest::testFeatures() { params_.integer(true) .nonNegative(true) .propagationInterval(1.0) - .weightStyles(WEIGHT_STYLES) .trendWeights(weights) .priorWeights(weights); expectedPopulationModels[cid]->addSamples(params_, samples); @@ -1446,9 +1446,8 @@ void CEventRatePopulationModelTest::testIgnoreSamplingGivenDetectionRules() { // Checksums will be different because a model is created for attribute a3 CPPUNIT_ASSERT(modelWithSkip->checksum() != modelNoSkip->checksum()); - CAnomalyDetectorModel::CModelDetailsViewPtr modelWithSkipView = - modelWithSkip->details(); - CAnomalyDetectorModel::CModelDetailsViewPtr modelNoSkipView = modelNoSkip->details(); + auto modelWithSkipView = modelWithSkip->details(); + auto modelNoSkipView = modelNoSkip->details(); // but the underlying models for attributes a1 and a2 are the same uint64_t withSkipChecksum = diff --git a/lib/model/unittest/CMetricModelTest.cc b/lib/model/unittest/CMetricModelTest.cc index 04a192bcb4..b3e8a015c9 100644 --- a/lib/model/unittest/CMetricModelTest.cc +++ b/lib/model/unittest/CMetricModelTest.cc @@ -77,9 +77,8 @@ using TOptionalStr = boost::optional; using TTimeDoublePr = std::pair; using TOptionalTimeDoublePr = boost::optional; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; -using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsStack; -using TMaxAccumulator = - maths::CBasicStatistics::COrderStatisticsStack>; +using TMinAccumulator = maths::CBasicStatistics::SMin::TAccumulator; +using TMaxAccumulator = maths::CBasicStatistics::SMax::TAccumulator; using TMathsModelPtr = std::shared_ptr; using TPriorPtr = std::shared_ptr; using TMultivariatePriorPtr = std::shared_ptr; @@ -281,8 +280,6 @@ void processBucket(core_t::TTime time, partitioningFields, 1, probability2); } -const maths_t::TWeightStyleVec COUNT_WEIGHT(1, maths_t::E_SampleCountWeight); -const TDouble4Vec1Vec UNIT_WEIGHT(1, TDouble4Vec(1, 1.0)); const TSizeDoublePr1Vec NO_CORRELATES; } @@ -396,13 +393,12 @@ void CMetricModelTest::testSample() { << core::CContainerPrinter::print(expectedMinSamples) << ", max samples = " << core::CContainerPrinter::print(expectedMaxSamples)); - maths::CModelAddSamplesParams::TDouble2Vec4VecVec weights( - numberSamples, maths::CConstantWeights::unit(1)); + maths::CModelAddSamplesParams::TDouble2VecWeightsAryVec weights( + numberSamples, maths_t::CUnitWeights::unit(1)); maths::CModelAddSamplesParams params_; params_.integer(false) .nonNegative(true) .propagationInterval(1.0) - .weightStyles(COUNT_WEIGHT) .trendWeights(weights) .priorWeights(weights); @@ -543,7 +539,6 @@ void CMetricModelTest::testSample() { void CMetricModelTest::testMultivariateSample() { LOG_DEBUG(<< "*** testMultivariateSample ***"); - using TDoubleVecVecVec = std::vector; using TVector2 = maths::CVectorNx1; using TMean2Accumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TTimeDouble2AryPr = std::pair>; @@ -562,8 +557,8 @@ void CMetricModelTest::testMultivariateSample() { {202, 1.0, 0.7}, {204, 1.5, 1.8}}; TTimeDouble2AryPrVec data; for (std::size_t i = 0u; i < boost::size(data_); ++i) { - boost::array values = {{data_[i][1], data_[i][2]}}; - data.push_back(TTimeDouble2AryPr(static_cast(data_[i][0]), values)); + boost::array value = {{data_[i][1], data_[i][2]}}; + data.emplace_back(static_cast(data_[i][0]), value); } unsigned int sampleCounts[] = {2u, 1u}; @@ -590,7 +585,7 @@ void CMetricModelTest::testMultivariateSample() { TMean2Accumulator expectedLatLongSample; std::size_t numberSamples = 0; TDoubleVecVec expectedLatLongSamples; - TMultivariatePriorPtr expectedMeanPrior = + TMultivariatePriorPtr expectedPrior = factory.defaultMultivariatePrior(model_t::E_IndividualMeanLatLongByPerson); std::size_t j = 0; @@ -626,53 +621,54 @@ void CMetricModelTest::testMultivariateSample() { expectedLatLongSamples.end()); LOG_DEBUG(<< "Adding mean samples = " << core::CContainerPrinter::print(expectedLatLongSamples)); - expectedMeanPrior->dataType(maths_t::E_ContinuousData); - expectedMeanPrior->addSamples( - COUNT_WEIGHT, expectedLatLongSamples, - TDoubleVecVecVec(expectedLatLongSamples.size(), - TDoubleVecVec(1, TDoubleVec(2, 1.0)))); + expectedPrior->dataType(maths_t::E_ContinuousData); + expectedPrior->addSamples( + expectedLatLongSamples, + maths_t::TDouble10VecWeightsAry1Vec( + expectedLatLongSamples.size(), + maths_t::CUnitWeights::unit(2))); numberSamples = 0u; expectedLatLongSamples.clear(); } model_t::CResultType type(model_t::CResultType::E_Unconditional | model_t::CResultType::E_Final); - TOptionalUInt64 currentCount = model.currentBucketCount(0, time); + TOptionalUInt64 count = model.currentBucketCount(0, time); TDouble1Vec bucketLatLong = model.currentBucketValue( model_t::E_IndividualMeanLatLongByPerson, 0, 0, time); TDouble1Vec baselineLatLong = model.baselineBucketMean(model_t::E_IndividualMeanLatLongByPerson, 0, 0, type, NO_CORRELATES, time); + TDouble1Vec featureLatLong = multivariateFeatureData( + model, model_t::E_IndividualMeanLatLongByPerson, 0, time); + const auto& prior = + dynamic_cast( + model.details()->model(model_t::E_IndividualMeanLatLongByPerson, 0)) + ->prior(); - LOG_DEBUG(<< "bucket count = " - << core::CContainerPrinter::print(currentCount)); - LOG_DEBUG(<< "current bucket mean = " - << core::CContainerPrinter::print(bucketLatLong) << ", expected baseline bucket mean = " - << maths::CBasicStatistics::mean(expectedBaselineLatLong) << ", baseline bucket mean = " + LOG_DEBUG(<< "bucket count = " << core::CContainerPrinter::print(count)); + LOG_DEBUG(<< "current = " << core::CContainerPrinter::print(bucketLatLong)); + LOG_DEBUG(<< ", expected baseline = " + << maths::CBasicStatistics::mean(expectedBaselineLatLong)); + LOG_DEBUG(<< ", actual baseline = " << core::CContainerPrinter::print(baselineLatLong)); - CPPUNIT_ASSERT(currentCount); - CPPUNIT_ASSERT_EQUAL(expectedCount, *currentCount); + CPPUNIT_ASSERT(count); + CPPUNIT_ASSERT_EQUAL(expectedCount, *count); TDouble1Vec latLong; if (maths::CBasicStatistics::count(expectedLatLong) > 0.0) { latLong.push_back(maths::CBasicStatistics::mean(expectedLatLong)(0)); latLong.push_back(maths::CBasicStatistics::mean(expectedLatLong)(1)); } - CPPUNIT_ASSERT(latLong == bucketLatLong); + CPPUNIT_ASSERT_EQUAL(latLong, bucketLatLong); if (!baselineLatLong.empty()) { baselineLatLongError.add(maths::fabs( TVector2(baselineLatLong) - maths::CBasicStatistics::mean(expectedBaselineLatLong))); } - CPPUNIT_ASSERT(latLong == multivariateFeatureData(model, model_t::E_IndividualMeanLatLongByPerson, - 0, time)); - CPPUNIT_ASSERT_EQUAL( - expectedMeanPrior->checksum(), - dynamic_cast( - model.details()->model(model_t::E_IndividualMeanLatLongByPerson, 0)) - ->prior() - .checksum()); + CPPUNIT_ASSERT_EQUAL(latLong, featureLatLong); + CPPUNIT_ASSERT_EQUAL(expectedPrior->checksum(), prior.checksum()); // Test persistence. (We check for idempotency.) std::string origXml; diff --git a/lib/model/unittest/CMetricPopulationModelTest.cc b/lib/model/unittest/CMetricPopulationModelTest.cc index 684254dd5f..9f7cb21077 100644 --- a/lib/model/unittest/CMetricPopulationModelTest.cc +++ b/lib/model/unittest/CMetricPopulationModelTest.cc @@ -452,23 +452,20 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { using TTimeDouble2VecSizeTr = core::CTriple; using TTimeDouble2VecSizeTrVec = std::vector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAry = maths_t::TDouble2VecWeightsAry; + using TDouble2VecWeightsAryVec = std::vector; using TSizeSizePrDoubleVecMap = std::map; using TSizeSizePrMeanAccumulatorUMap = std::map; using TSizeSizePrMinAccumulatorMap = std::map; using TSizeSizePrMaxAccumulatorMap = std::map; using TMathsModelPtr = std::shared_ptr; using TSizeMathsModelPtrMap = std::map; - using TTimeDouble2VecSizeTrVecDouble2Vec4VecVecPr = - std::pair; - using TSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMap = - std::map; - using TSizeSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMapMap = - std::map; - - static const maths_t::TWeightStyleVec WEIGHT_STYLES{ - maths_t::E_SampleCountWeight, maths_t::E_SampleWinsorisationWeight}; + using TTimeDouble2VecSizeTrVecDouble2VecWeightsAryVecPr = + std::pair; + using TSizeTimeDouble2VecSizeTrVecDouble2VecWeightsAryVecPrMap = + std::map; + using TSizeSizeTimeDouble2VecSizeTrVecDouble2VecWeightAryVecPrMapMap = + std::map; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -512,15 +509,14 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { if (message.s_Time >= startTime + bucketLength) { model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); - TSizeSizeTimeDouble2VecSizeTrVecDouble2Vec4VecVecPrMapMap populationWeightedSamples; + TSizeSizeTimeDouble2VecSizeTrVecDouble2VecWeightAryVecPrMapMap populationWeightedSamples; for (std::size_t feature = 0u; feature < features.size(); ++feature) { for (const auto& samples_ : expectedSamples[feature]) { std::size_t pid = samples_.first.first; std::size_t cid = samples_.first.second; - double weight = model->sampleRateWeight(pid, cid); TTimeDouble2VecSizeTrVec& samples = populationWeightedSamples[feature][cid].first; - TDouble2Vec4VecVec& weights = + TDouble2VecWeightsAryVec& weights = populationWeightedSamples[feature][cid].second; TMathsModelPtr& model_ = expectedPopulationModels[feature][cid]; if (!model_) { @@ -534,8 +530,12 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { expectedSampleTimes[{pid, cid}][j] + 0.5); TDouble2Vec sample{samples_.second[j]}; samples.emplace_back(time_, sample, pid); - weights.push_back( - {{weight}, model_->winsorisationWeight(1.0, time_, sample)}); + weights.push_back(maths_t::CUnitWeights::unit(1)); + auto& weight = weights.back(); + maths_t::setCount( + TDouble2Vec{model->sampleRateWeight(pid, cid)}, weight); + maths_t::setWinsorisationWeight( + model_->winsorisationWeight(1.0, time_, sample), weight); } } } @@ -543,13 +543,12 @@ void CMetricPopulationModelTest::testMinMaxAndMean() { for (auto& attribute : feature.second) { std::size_t cid = attribute.first; TTimeDouble2VecSizeTrVec& samples = attribute.second.first; - TDouble2Vec4VecVec& weights = attribute.second.second; + TDouble2VecWeightsAryVec& weights = attribute.second.second; maths::COrderings::simultaneousSort(samples, weights); maths::CModelAddSamplesParams params_; params_.integer(false) .nonNegative(nonNegative) .propagationInterval(1.0) - .weightStyles(WEIGHT_STYLES) .trendWeights(weights) .priorWeights(weights); expectedPopulationModels[feature.first][cid]->addSamples(params_, samples); diff --git a/lib/model/unittest/CModelToolsTest.cc b/lib/model/unittest/CModelToolsTest.cc index 6cc3b27e6a..394c746c86 100644 --- a/lib/model/unittest/CModelToolsTest.cc +++ b/lib/model/unittest/CModelToolsTest.cc @@ -198,8 +198,7 @@ void CModelToolsTest::testProbabilityCache() { using TTime2Vec = core::CSmallVector; using TTime2Vec1Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; - using TDouble2Vec4Vec = core::CSmallVector; - using TDouble2Vec4VecVec = std::vector; + using TDouble2VecWeightsAryVec = std::vector; using TTail2Vec = core::CSmallVector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; @@ -211,8 +210,7 @@ void CModelToolsTest::testProbabilityCache() { test::CRandomNumbers rng; core_t::TTime time_{0}; - TDouble2Vec4Vec weight{TDouble2Vec{1.0}}; - TDouble2Vec4VecVec weights{weight}; + TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; { TDoubleVec samples_[3]; @@ -226,11 +224,7 @@ void CModelToolsTest::testProbabilityCache() { rng.random_shuffle(samples.begin(), samples.end()); for (auto sample : samples) { maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples( params, {core::make_triple(time_, TDouble2Vec(1, sample), TAG)}); } @@ -266,8 +260,7 @@ void CModelToolsTest::testProbabilityCache() { params.addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(0.0) .addBucketEmpty(TBool2Vec{false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); + .addWeights(weights[0]); double expectedProbability; TTail2Vec expectedTail; bool conditional; @@ -309,8 +302,7 @@ void CModelToolsTest::testProbabilityCache() { params.addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(0.0) .addBucketEmpty(TBool2Vec{false}) - .weightStyles(maths::CConstantWeights::COUNT) - .addWeights(weight); + .addWeights(weights[0]); double expectedProbability; TTail2Vec expectedTail; bool conditional; diff --git a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc index a800e879f7..812fd4911d 100644 --- a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc +++ b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc @@ -47,11 +47,9 @@ using TTimeVec = std::vector; using TBool2Vec = core::CSmallVector; using TDouble1Vec = core::CSmallVector; using TDouble2Vec = core::CSmallVector; -using TDouble4Vec = core::CSmallVector; +using TDouble10Vec = core::CSmallVector; using TDouble2Vec1Vec = core::CSmallVector; -using TDouble2Vec4Vec = core::CSmallVector; -using TDouble4Vec1Vec = core::CSmallVector; -using TDouble2Vec4VecVec = std::vector; +using TDouble2VecWeightsAryVec = std::vector; using TSize1Vec = core::CSmallVector; using TSize10Vec = core::CSmallVector; using TTail2Vec = core::CSmallVector; @@ -77,17 +75,16 @@ using TStoredStringPtrStoredStringPtrPrDoublePrVec = using TInfluenceCalculatorCPtr = std::shared_ptr; TDouble1VecDoublePr make_pair(double first, double second) { - return TDouble1VecDoublePr{TDouble1Vec{first}, second}; + return TDouble1VecDoublePr{{first}, second}; } TDouble1VecDoublePr make_pair(double first1, double first2, double second) { - return TDouble1VecDoublePr{TDouble1Vec{first1, first2}, second}; + return TDouble1VecDoublePr{{first1, first2}, second}; } -//TDouble1VecDouble1VecPr make_pair(double first1, double first2, double second1, double second2) -//{ -// return TDouble1VecDouble1VecPr{TDouble1Vec{first1, first2}, TDouble1Vec{second1, second2}}; -//} +TDouble1VecDouble1VecPr make_pair(double first1, double first2, double second1, double second2) { + return TDouble1VecDouble1VecPr{{first1, first2}, {second1, second2}}; +} maths::CModelParams params(core_t::TTime bucketLength) { double learnRate{static_cast(bucketLength) / 1800.0}; @@ -112,17 +109,13 @@ TTimeDouble2VecSizeTr sample(core_t::TTime time, const TDoubleVec& sample) { template core_t::TTime addSamples(core_t::TTime bucketLength, const SAMPLES& samples, maths::CModel& model) { - TDouble2Vec4VecVec weights{ - maths::CConstantWeights::unit(dimension(samples[0]))}; + TDouble2VecWeightsAryVec weights{ + maths_t::CUnitWeights::unit(dimension(samples[0]))}; maths::CModelAddSamplesParams params; - params.integer(false) - .propagationInterval(1.0) - .weightStyles(maths::CConstantWeights::COUNT) - .trendWeights(weights) - .priorWeights(weights); + params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); core_t::TTime time{0}; for (const auto& sample_ : samples) { - model.addSamples(params, TTimeDouble2VecSizeTrVec{sample(time, sample_)}); + model.addSamples(params, {sample(time, sample_)}); time += bucketLength; } return time; @@ -134,16 +127,15 @@ void computeProbability(core_t::TTime time, const maths::CModel& model, double& probablity, TTail2Vec& tail) { - TDouble2Vec4Vec weight{model.seasonalWeight(0.0, time)}; + maths_t::TDouble2VecWeightsAry weight( + maths_t::CUnitWeights::unit(sample.size())); + maths_t::setSeasonalVarianceScale(model.seasonalWeight(0.0, time), weight); maths::CModelProbabilityParams params; - params.addCalculation(calculation) - .addBucketEmpty(TBool2Vec{false}) - .weightStyles(maths::CConstantWeights::SEASONAL_VARIANCE) - .addWeights(weight); + params.addCalculation(calculation).addBucketEmpty(TBool2Vec{false}).addWeights(weight); bool conditional; TSize1Vec mostAnomalousCorrelate; - model.probability(params, TTime2Vec1Vec{TTime2Vec{time}}, TDouble2Vec1Vec{sample}, - probablity, tail, conditional, mostAnomalousCorrelate); + model.probability(params, {{time}}, {sample}, probablity, tail, conditional, + mostAnomalousCorrelate); } const std::string I("I"); @@ -164,18 +156,16 @@ void computeInfluences(CALCULATOR& calculator, const std::string& influencerName, const TStrCRefDouble1VecDoublePrPrVec& influencerValues, TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { - maths_t::TWeightStyleVec weightStyles; - weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); - weightStyles.push_back(maths_t::E_SampleCountVarianceScaleWeight); model::CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); - TDouble2Vec4Vec weights{model.seasonalWeight(0.0, time), TDouble2Vec{1.0}}; + maths_t::TDouble2VecWeightsAry weight(maths_t::CUnitWeights::unit(1)); + maths_t::setSeasonalVarianceScale(model.seasonalWeight(0.0, time), weight); model::CProbabilityAndInfluenceCalculator::SParams params(partitioningFields); params.s_Feature = feature; params.s_Model = &model; params.s_Time = TTime2Vec1Vec{TTimeVec{time}}; params.s_Value = TDouble2Vec1Vec{TDoubleVec{value}}; params.s_Count = count; - params.s_ComputeProbabilityParams.weightStyles(weightStyles).addWeights(weights); + params.s_ComputeProbabilityParams.addWeights(weight); params.s_Probability = probability; params.s_Tail = tail; params.s_InfluencerName = model::CStringStore::influencers().get(influencerName); @@ -197,9 +187,6 @@ void computeInfluences(CALCULATOR& calculator, const std::string& influencerName, const TStrCRefDouble1VecDouble1VecPrPrVec& influencerValues, TStoredStringPtrStoredStringPtrPrDoublePrVec& result) { - maths_t::TWeightStyleVec weightStyles; - weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); - weightStyles.push_back(maths_t::E_SampleCountVarianceScaleWeight); model::CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); TTime2Vec times_(×[0], ×[2]); TDouble2Vec values_(&values[0], &values[2]); @@ -211,12 +198,8 @@ void computeInfluences(CALCULATOR& calculator, params.s_Times.push_back(times_); params.s_Values.push_back(values_); params.s_Counts.push_back(counts_); - params.s_ComputeProbabilityParams.weightStyles(weightStyles); - //for (auto &weight : weights) - //{ - // weight.resize(weightStyles.size(), TDouble2Vec(2, 1.0)); - // params.s_ComputeProbabilityParams.addWeights(weight); - //} + params.s_ComputeProbabilityParams.addWeights( + maths_t::CUnitWeights::unit(2)); params.s_Probability = probability; params.s_Tail = tail; params.s_MostAnomalousCorrelate.push_back(0); @@ -233,9 +216,6 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, const TDoubleVecVec& values, const TStrCRefDouble1VecDoublePrPrVecVec& influencerValues, TStoredStringPtrStoredStringPtrPrDoublePrVec& influences) { - maths_t::TWeightStyleVec weightStyles; - weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); - weightStyles.push_back(maths_t::E_SampleCountVarianceScaleWeight); model::CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); model::CProbabilityAndInfluenceCalculator calculator(0.3); @@ -251,7 +231,10 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, std::size_t dimension{values[i].size() - 1}; TTime2Vec1Vec time{TTime2Vec{time_}}; TDouble2Vec1Vec value{TDouble2Vec(&values[i][0], &values[i][dimension])}; - TDouble2Vec4Vec weight(2, TDouble2Vec(dimension, values[i][dimension])); + maths_t::TDouble2VecWeightsAry weight( + maths_t::CUnitWeights::unit(dimension)); + maths_t::setSeasonalVarianceScale(TDouble2Vec(dimension, values[i][dimension]), weight); + maths_t::setCountVarianceScale(TDouble2Vec(dimension, values[i][dimension]), weight); double count{0.0}; for (const auto& influence : influencerValues[i]) { count += influence.second.second; @@ -261,7 +244,6 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, params_.addCalculation(model_t::probabilityCalculation(feature)) .seasonalConfidenceInterval(0.0) .addBucketEmpty(TBool2Vec{false}) - .weightStyles(weightStyles) .addWeights(weight); double p = 0.0; @@ -328,49 +310,47 @@ void CProbabilityAndInfluenceCalculatorTest::testInfluenceUnavailableCalculator( TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, model_t::E_IndividualLowCountsByBucketAndPerson, model, 0 /*time*/, 15.0 /*value*/, 1.0 /*count*/, - 0.001 /*probability*/, TTail2Vec{maths_t::E_RightTail}, - I, influencerValues, influences); + 0.001 /*probability*/, {maths_t::E_RightTail}, I, + influencerValues, influences); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT(influences.empty()); } - /*{ + { LOG_DEBUG(<< "Test correlated"); model::CInfluenceUnavailableCalculator calculator; maths::CTimeSeriesDecomposition trend{0.0, 600}; maths::CMultivariateNormalConjugate<2> prior{ - maths::CMultivariateNormalConjugate<2>::nonInformativePrior(maths_t::E_ContinuousData, 0.0)}; - maths::CMultivariateTimeSeriesModel model{params(600), 0, trend, prior}; + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, 0.0)}; + maths::CMultivariateTimeSeriesModel model{params(600), trend, prior}; TDoubleVec samples_; rng.generateNormalSamples(10.0, 1.0, 50, samples_); - TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - samples.push_back(TDouble10Vec(2, samples_[i])); + for (std::size_t i = 0u; i < samples_.size(); ++i) { + prior.addSamples({TDouble10Vec(2, samples_[i])}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - prior->addSamples(COUNT_WEIGHT, samples, weights); core_t::TTime times[] = {0, 0}; - double values[] = {15.0, 15.0}; - double counts[] = {1.0, 1.0}; + double values[]{15.0, 15.0}; + double counts[]{1.0, 1.0}; TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; influencerValues.emplace_back(TStrCRef(i1), make_pair(11.0, 11.0, 1.0, 1.0)); influencerValues.emplace_back(TStrCRef(i2), make_pair(11.0, 11.0, 1.0, 1.0)); influencerValues.emplace_back(TStrCRef(i3), make_pair(15.0, 15.0, 1.0, 1.0)); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualLowCountsByBucketAndPerson, model, - times, values, TDouble10Vec4Vec1Vec{TDouble10Vec4Vec{TDouble10Vec{1.0}}}, counts, - 0.1probability, maths_t::E_RightTail, 0, I, influencerValues, influences); + computeInfluences(calculator, model_t::E_IndividualLowCountsByBucketAndPerson, + model, times, values, counts, 0.1 /*probability*/, + TTail2Vec(2, maths_t::E_RightTail), I, + influencerValues, influences); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT(influences.empty()); - }*/ + } } void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluenceCalculator() { @@ -378,8 +358,6 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen test::CRandomNumbers rng; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleSeasonalVarianceScaleWeight); - model::CLogProbabilityComplementInfluenceCalculator calculator; core_t::TTime bucketLength{600}; @@ -497,16 +475,19 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen } } } - /*{ + { LOG_DEBUG(<< "Test correlated"); - double counts[] = {1.0, 1.0}; + double counts[]{1.0, 1.0}; { LOG_DEBUG(<< "One influencer value"); - maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = - maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); + maths::CTimeSeriesDecomposition trend{0.0, 600}; + maths::CMultivariateNormalConjugate<2> prior{ + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, 0.0)}; + maths::CMultivariateTimeSeriesModel model{params(600), trend, prior}; TDoubleVec mean(2, 10.0); TDoubleVecVec covariances(2, TDoubleVec(2)); @@ -514,44 +495,38 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen covariances[0][1] = covariances[1][0] = 4.0; TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(mean, covariances, 50, samples_); - TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - samples.push_back(samples_[i]); + for (std::size_t i = 0u; i < samples_.size(); ++i) { + prior.addSamples({TDouble10Vec(samples_[i])}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - prior->addSamples(COUNT_WEIGHT, samples, weights); - core_t::TTime times[] = {0, 0}; - double values[] = {15.0, 15.0}; - double vs[] = {1.0, 1.0}; + core_t::TTime times[]{0, 0}; + double values[]{15.0, 15.0}; double lb, ub; TTail10Vec tail; - TDouble10Vec1Vec sample(1, TDouble10Vec(&values[0], &values[2])); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(&vs[0], &vs[2]))); - prior->probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - sample, - weight, - lb, ub, tail); + prior.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, {TDouble10Vec(&values[0], &values[2])}, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(15.0, 15.0, 1.0, 1.0))); + influencerValues.push_back({TStrCRef(i1), make_pair(15.0, 15.0, 1.0, 1.0)}); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualCountByBucketAndPerson, TDecompositionCPtr1Vec(), *prior, - times, values, weight, counts, - 0.5*(lb+ub), tail, 0, 0.0confidence, - I, influencerValues, influences); + computeInfluences(calculator, model_t::E_IndividualCountByBucketAndPerson, + model, times, values, counts, 0.5 * (lb + ub), + tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), + core::CContainerPrinter::print(influences)); } - { + /*{ LOG_DEBUG(<< "No trend"); - maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = - maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); + maths::CTimeSeriesDecomposition trend{0.0, 600}; + maths::CMultivariateNormalConjugate<2> prior{ + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, 0.0)}; + maths::CMultivariateTimeSeriesModel model{params(600), trend, prior}; TDoubleVec mean(2, 10.0); TDoubleVecVec covariances(2, TDoubleVec(2)); @@ -559,44 +534,31 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen covariances[0][1] = covariances[1][0] = 4.0; TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(mean, covariances, 50, samples_); - TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - samples.push_back(samples_[i]); + for (std::size_t i = 0u; i < samples_.size(); ++i) { + prior.addSamples({TDouble10Vec(samples_[i])}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - prior->addSamples(COUNT_WEIGHT, samples, weights); - - core_t::TTime times[] = {0, 0}; - double values[] = {20.0, 10.0}; - double vs[] = {1.0, 1.0}; - TSize10Vec coordinates(std::size_t(1), 0); - TDouble10Vec2Vec lbs, ubs; + + core_t::TTime times[]{0, 0}; + double values[]{20.0, 10.0}; + double lb, ub; TTail10Vec tail; - TDouble10Vec1Vec sample(1, TDouble10Vec(&values[0], &values[2])); - TDouble10Vec4Vec1Vec weight(1, TDouble10Vec4Vec(1, TDouble10Vec(&vs[0], &vs[2]))); - prior->probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyle, - sample, - weight, - coordinates, - lbs, ubs, tail); - double lb = std::sqrt(lbs[0][0] * lbs[1][0]); - double ub = std::sqrt(ubs[0][0] * ubs[1][0]); + prior.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, {TDouble10Vec(&values[0], &values[2])}, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair( 1.0, 1.0, 1.0, 1.0))); - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i2), make_pair( 1.0, 1.0, 1.0, 1.0))); - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i3), make_pair(18.0, 8.0, 1.0, 1.0))); + influencerValues.push_back({TStrCRef(i1), make_pair(1.0, 1.0, 1.0, 1.0)}); + influencerValues.push_back({TStrCRef(i2), make_pair(1.0, 1.0, 1.0, 1.0)}); + influencerValues.push_back({TStrCRef(i3), make_pair(18.0, 8.0, 1.0, 1.0)}); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualCountByBucketAndPerson, TDecompositionCPtr1Vec(), *prior, - times, values, weight, counts, - 0.5*(lb+ub), tail, coordinates[0], 0.0confidence, - I, influencerValues, influences); + computeInfluences(calculator, model_t::E_IndividualCountByBucketAndPerson, + model, times, values, counts, 0.5 * (lb + ub), + tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); - CPPUNIT_ASSERT_EQUAL(std::string("[((I, i3), 1)]"), core::CContainerPrinter::print(influences)); + CPPUNIT_ASSERT_EQUAL(std::string("[((I, i3), 1)]"), + core::CContainerPrinter::print(influences)); } { LOG_DEBUG(<< "Trend"); @@ -694,8 +656,8 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityComplementInfluen CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedInfluences[i][j], influences[j].second, 0.05); } } - } - }*/ + }*/ + } } void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { @@ -838,16 +800,20 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { } } } - /*{ + { LOG_DEBUG(<< "Test correlated"); - core_t::TTime times[] = {0, 0}; + core_t::TTime times[]{0, 0}; { LOG_DEBUG(<< "One influencer value"); - maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = - maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); + maths::CTimeSeriesDecomposition trend{0.0, 600}; + maths::CMultivariateNormalConjugate<2> prior{ + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, 0.0)}; + maths::CMultivariateTimeSeriesModel model{params(600), trend, prior}; + { TDoubleVec mean(2, 10.0); TDoubleVecVec covariances(2, TDoubleVec(2)); @@ -855,41 +821,33 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { covariances[0][1] = covariances[1][0] = 4.0; TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(mean, covariances, 50, samples_); - TDouble10Vec1Vec samples; - for (std::size_t i = 0u; i < samples_.size(); ++i) - { - samples.push_back(samples_[i]); + for (std::size_t i = 0u; i < samples_.size(); ++i) { + prior.addSamples({TDouble10Vec(samples_[i])}, + maths_t::CUnitWeights::singleUnit(2)); } - TDouble10Vec4Vec1Vec weights(samples.size(), TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))); - prior->addSamples(COUNT_WEIGHT, samples, weights); } - double values[] = {5.0, 5.0}; - double counts[] = {1.0, 1.0}; + double values[]{5.0, 5.0}; + double counts[]{1.0, 1.0}; double lb, ub; TTail10Vec tail; - TDouble10Vec1Vec sample(1, TDouble10Vec(&values[0], &values[2])); - TDouble10Vec4Vec1Vec weights(1, TDouble10Vec4Vec(2, TDouble10Vec(2, 1.0))); - prior->probabilityOfLessLikelySamples(maths_t::E_TwoSided, - weightStyles, - sample, - weights, - lb, ub, tail); + prior.probabilityOfLessLikelySamples( + maths_t::E_TwoSided, {TDouble10Vec(&values[0], &values[2])}, + maths_t::CUnitWeights::singleUnit(2), lb, ub, tail); TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(5.0, 5.0, 1.0, 1.0))); + influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr( + TStrCRef(i1), make_pair(5.0, 5.0, 1.0, 1.0))); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualMeanByPerson, TDecompositionCPtr1Vec(), *prior, - times, values, weights, counts, - 0.5*(lb+ub), tail, 0, - I, influencerValues, influences); + computeInfluences(calculator, model_t::E_IndividualMeanByPerson, + model, times, values, counts, 0.5 * (lb + ub), + tail, I, influencerValues, influences); LOG_DEBUG(<< " influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1)]"), core::CContainerPrinter::print(influences)); } - { + /*{ LOG_DEBUG(<< "No trend"); maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = @@ -1044,8 +1002,8 @@ void CProbabilityAndInfluenceCalculatorTest::testMeanInfluenceCalculator() { CPPUNIT_ASSERT_EQUAL(i3, *influences[1].first.second); CPPUNIT_ASSERT_DOUBLES_EQUAL(0.6, influences[1].second, 0.08); } - } - }*/ + }*/ + } } void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculator() { @@ -1056,7 +1014,6 @@ void CProbabilityAndInfluenceCalculatorTest::testLogProbabilityInfluenceCalculat model::CLogProbabilityInfluenceCalculator calculator; core_t::TTime bucketLength{600}; - maths_t::TWeightStyleVec weightStyle(1, maths_t::E_SampleSeasonalVarianceScaleWeight); { LOG_DEBUG(<< "Test univariate"); @@ -1402,41 +1359,45 @@ void CProbabilityAndInfluenceCalculatorTest::testIndicatorInfluenceCalculator() TStoredStringPtrStoredStringPtrPrDoublePrVec influences; computeInfluences(calculator, model_t::E_IndividualIndicatorOfBucketPerson, - model, 0 /*time*/, 1.0 /*value*/, 1.0 /*count*/, - 0.1 /*probability*/, TTail2Vec{maths_t::E_RightTail}, - I, influencerValues, influences); + model, 0 /*time*/, 1.0 /*value*/, 1.0 /*count*/, 0.1 /*probability*/, + {maths_t::E_RightTail}, I, influencerValues, influences); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1), ((I, i2), 1), ((I, i3), 1)]"), core::CContainerPrinter::print(influences)); } - /*{ + { LOG_DEBUG(<< "Test correlated"); model::CIndicatorInfluenceCalculator calculator; - maths::CMultivariateNormalConjugateFactory::TPriorPtr prior = - maths::CMultivariateNormalConjugateFactory::nonInformative(2, maths_t::E_ContinuousData, 0.0); + maths::CTimeSeriesDecomposition trend{0.0, 600}; + maths::CMultivariateNormalConjugate<2> prior{ + maths::CMultivariateNormalConjugate<2>::nonInformativePrior( + maths_t::E_ContinuousData, 0.0)}; + maths::CMultivariateTimeSeriesModel model{params(600), trend, prior}; - core_t::TTime times[] = {0, 0}; - double values[] = {1.0, 1.0}; - double counts[] = {1.0, 1.0}; + core_t::TTime times[]{0, 0}; + double values[]{1.0, 1.0}; + double counts[]{1.0, 1.0}; TStrCRefDouble1VecDouble1VecPrPrVec influencerValues; - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i1), make_pair(1.0, 1.0, 1.0, 1.0))); - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i2), make_pair(1.0, 1.0, 1.0, 1.0))); - influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr(TStrCRef(i3), make_pair(1.0, 1.0, 1.0, 1.0))); + influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr( + TStrCRef(i1), make_pair(1.0, 1.0, 1.0, 1.0))); + influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr( + TStrCRef(i2), make_pair(1.0, 1.0, 1.0, 1.0))); + influencerValues.push_back(TStrCRefDouble1VecDouble1VecPrPr( + TStrCRef(i3), make_pair(1.0, 1.0, 1.0, 1.0))); TStoredStringPtrStoredStringPtrPrDoublePrVec influences; - computeInfluences(calculator, - model_t::E_IndividualIndicatorOfBucketPerson, TDecompositionCPtr1Vec(), *prior, - times, values, TDouble10Vec4Vec1Vec(1, TDouble10Vec4Vec(1, TDouble10Vec(2, 1.0))), counts, - 0.1probability, maths_t::E_RightTail, 0, - I, influencerValues, influences); + computeInfluences(calculator, model_t::E_IndividualIndicatorOfBucketPerson, + model, times, values, counts, 0.1 /*probability*/, + TTail2Vec(2, maths_t::E_RightTail), I, + influencerValues, influences); LOG_DEBUG(<< "influences = " << core::CContainerPrinter::print(influences)); CPPUNIT_ASSERT_EQUAL(std::string("[((I, i1), 1), ((I, i2), 1), ((I, i3), 1)]"), core::CContainerPrinter::print(influences)); - }*/ + } } void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculator() { @@ -1469,9 +1430,6 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat model_t::E_IndividualMeanLatLongByPerson}; const maths::CModel* models[]{&univariateModel, &multivariateModel}; - maths_t::TWeightStyleVec weightStyles; - weightStyles.push_back(maths_t::E_SampleSeasonalVarianceScaleWeight); - weightStyles.push_back(maths_t::E_SampleCountVarianceScaleWeight); model::CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); { @@ -1505,13 +1463,14 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat for (std::size_t j = 0u; j < features.size(); ++j) { TDouble2Vec1Vec value{TDouble2Vec(&values[i + 5 * j][0], &values[i + 5 * j][1 + j])}; - TDouble2Vec4Vec weights{TDouble2Vec(1 + j, values[i + 5 * j][1 + j]), - TDouble2Vec(1 + j, 1.0)}; + maths_t::TDouble2VecWeightsAry weights( + maths_t::CUnitWeights::unit(1 + j)); + maths_t::setSeasonalVarianceScale( + TDouble2Vec(1 + j, values[i + 5 * j][1 + j]), weights); maths::CModelProbabilityParams params_; params_.addCalculation(maths_t::E_TwoSided) .seasonalConfidenceInterval(0.0) .addBucketEmpty(TBool2Vec{false}) - .weightStyles(weightStyles) .addWeights(weights); double p; TTail2Vec tail; @@ -1557,25 +1516,19 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat LOG_DEBUG(<< "influencing joint probability"); TDoubleVecVec values[]{ - TDoubleVecVec{{12.0, 1.0}, {15.0, 1.0}, {7.0, 1.5}, {9.0, 1.0}, {17.0, 2.0}}, - TDoubleVecVec{{12.0, 17.0, 1.0}, - {15.0, 20.0, 1.0}, - {7.0, 12.0, 1.5}, - {9.0, 14.0, 1.0}, - {17.0, 22.0, 2.0}}}; + {{12.0, 1.0}, {15.0, 1.0}, {7.0, 1.5}, {9.0, 1.0}, {17.0, 2.0}}, + {{12.0, 17.0, 1.0}, {15.0, 20.0, 1.0}, {7.0, 12.0, 1.5}, {9.0, 14.0, 1.0}, {17.0, 22.0, 2.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(7.0, 1.5)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(9.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 2.0)}}}, - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(15.0, 20.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(7.0, 12.0, 1.5)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(9.0, 14.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(17.0, 22.0, 2.0)}}}}; + {{{TStrCRef(i2), make_pair(12.0, 1.0)}}, + {{TStrCRef(i1), make_pair(15.0, 1.0)}}, + {{TStrCRef(i2), make_pair(7.0, 1.5)}}, + {{TStrCRef(i2), make_pair(9.0, 1.0)}}, + {{TStrCRef(i1), make_pair(17.0, 2.0)}}}, + {{{TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}, + {{TStrCRef(i1), make_pair(15.0, 20.0, 1.0)}}, + {{TStrCRef(i2), make_pair(7.0, 12.0, 1.5)}}, + {{TStrCRef(i2), make_pair(9.0, 14.0, 1.0)}}, + {{TStrCRef(i1), make_pair(17.0, 22.0, 2.0)}}}}; for (std::size_t i = 0u; i < features.size(); ++i) { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; testProbabilityAndGetInfluences(features[i], *models[i], now, values[i], @@ -1590,25 +1543,19 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat LOG_DEBUG(<< "influencing extreme probability"); TDoubleVecVec values[]{ - TDoubleVecVec{{11.0, 1.0}, {10.5, 1.0}, {8.5, 1.5}, {10.8, 1.5}, {19.0, 1.0}}, - TDoubleVecVec{{11.0, 16.0, 1.0}, - {10.5, 15.5, 1.0}, - {8.5, 13.5, 1.5}, - {10.8, 15.8, 1.5}, - {19.0, 24.0, 1.0}}}; + {{11.0, 1.0}, {10.5, 1.0}, {8.5, 1.5}, {10.8, 1.5}, {19.0, 1.0}}, + {{11.0, 16.0, 1.0}, {10.5, 15.5, 1.0}, {8.5, 13.5, 1.5}, {10.8, 15.8, 1.5}, {19.0, 24.0, 1.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(8.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 1.0)}}}, - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(8.5, 13.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.8, 15.8, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i2), make_pair(19.0, 24.0, 1.0)}}}}; + {{{TStrCRef(i1), make_pair(11.0, 1.0)}}, + {{TStrCRef(i1), make_pair(10.5, 1.0)}}, + {{TStrCRef(i1), make_pair(8.5, 1.0)}}, + {{TStrCRef(i1), make_pair(10.8, 1.0)}}, + {{TStrCRef(i2), make_pair(19.0, 1.0)}}}, + {{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}}, + {{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}}, + {{TStrCRef(i1), make_pair(8.5, 13.5, 1.0)}}, + {{TStrCRef(i1), make_pair(10.8, 15.8, 1.0)}}, + {{TStrCRef(i2), make_pair(19.0, 24.0, 1.0)}}}}; for (std::size_t i = 0u; i < features.size(); ++i) { TStoredStringPtrStoredStringPtrPrDoublePrVec influences; @@ -1624,36 +1571,24 @@ void CProbabilityAndInfluenceCalculatorTest::testProbabilityAndInfluenceCalculat LOG_DEBUG(<< "marginal influence"); TDoubleVecVec values[]{ - TDoubleVecVec{{11.0, 1.0}, {10.5, 1.0}, {8.0, 1.0}, {10.8, 1.0}, {14.0, 1.0}}, - TDoubleVecVec{{11.0, 16.0, 1.0}, - {10.5, 15.5, 1.0}, - {8.0, 13.0, 1.0}, - {10.8, 15.8, 1.0}, - {14.0, 19.0, 1.0}}}; + {{11.0, 1.0}, {10.5, 1.0}, {8.0, 1.0}, {10.8, 1.0}, {14.0, 1.0}}, + {{11.0, 16.0, 1.0}, {10.5, 15.5, 1.0}, {8.0, 13.0, 1.0}, {10.8, 15.8, 1.0}, {14.0, 19.0, 1.0}}}; TStrCRefDouble1VecDoublePrPrVecVec influencerValues[]{ - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 1.0)}, - {TStrCRef(i2), make_pair(10.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 1.0)}, - {TStrCRef(i2), make_pair(10.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(9.0, 1.0)}, - {TStrCRef(i2), make_pair(7.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 1.0)}, - {TStrCRef(i2), make_pair(10.6, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(16.0, 1.0)}, - {TStrCRef(i2), make_pair(12.0, 1.0)}}}, - TStrCRefDouble1VecDoublePrPrVecVec{ - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(12.0, 17.0, 1.0)}, - {TStrCRef(i2), make_pair(10.0, 15.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}, - {TStrCRef(i2), make_pair(10.5, 15.5, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(9.0, 14.0, 1.0)}, - {TStrCRef(i2), make_pair(7.0, 12.0, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}, - {TStrCRef(i2), make_pair(10.6, 15.6, 1.0)}}, - TStrCRefDouble1VecDoublePrPrVec{ - {TStrCRef(i1), make_pair(16.0, 21.0, 1.0)}, - {TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}}}; + {{{TStrCRef(i1), make_pair(12.0, 1.0)}, {TStrCRef(i2), make_pair(10.0, 1.0)}}, + {{TStrCRef(i1), make_pair(10.5, 1.0)}, {TStrCRef(i2), make_pair(10.5, 1.0)}}, + {{TStrCRef(i1), make_pair(9.0, 1.0)}, {TStrCRef(i2), make_pair(7.0, 1.0)}}, + {{TStrCRef(i1), make_pair(11.0, 1.0)}, {TStrCRef(i2), make_pair(10.6, 1.0)}}, + {{TStrCRef(i1), make_pair(16.0, 1.0)}, {TStrCRef(i2), make_pair(12.0, 1.0)}}}, + {{{TStrCRef(i1), make_pair(12.0, 17.0, 1.0)}, + {TStrCRef(i2), make_pair(10.0, 15.0, 1.0)}}, + {{TStrCRef(i1), make_pair(10.5, 15.5, 1.0)}, + {TStrCRef(i2), make_pair(10.5, 15.5, 1.0)}}, + {{TStrCRef(i1), make_pair(9.0, 14.0, 1.0)}, + {TStrCRef(i2), make_pair(7.0, 12.0, 1.0)}}, + {{TStrCRef(i1), make_pair(11.0, 16.0, 1.0)}, + {TStrCRef(i2), make_pair(10.6, 15.6, 1.0)}}, + {{TStrCRef(i1), make_pair(16.0, 21.0, 1.0)}, + {TStrCRef(i2), make_pair(12.0, 17.0, 1.0)}}}}; { TStoredStringPtrStoredStringPtrPrDoublePrVec influences;