Skip to content

Commit

Permalink
Merge pull request cms-sw#81 from gpetruc/vectorizing_multipdf
Browse files Browse the repository at this point in the history
Bugfix in recursive RooAddPdf splitting (needed for Hgg), and vectorizing multipdf (off by default)
Merged after validation on full H->gg card: 20 point scan of mu, deltaNLL values within 10^-5 from results in master (often much closer), time reduced by a factor 3 with ADDNLL_MULTINLL=1
  • Loading branch information
gpetruc committed Jan 21, 2014
2 parents 572ddb7 + ccddd75 commit 89a5448
Show file tree
Hide file tree
Showing 9 changed files with 379 additions and 30 deletions.
39 changes: 39 additions & 0 deletions interface/CachingMultiPdf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef CachingMultiPdf_h
#define CachingMultiPdf_h

#include "../interface/RooMultiPdf.h"
#include "../interface/CachingNLL.h"
#include <RooAbsData.h>
#include <RooAddPdf.h>
#include <vector>

namespace cacheutils {
class CachingMultiPdf : public CachingPdfBase {
public:
CachingMultiPdf(const RooMultiPdf &pdf, const RooArgSet &obs) ;
~CachingMultiPdf() ;
virtual const std::vector<Double_t> & eval(const RooAbsData &data) ;
const RooAbsReal *pdf() const { return pdf_; }
virtual void setDataDirty() ;
protected:
const RooMultiPdf * pdf_;
boost::ptr_vector<CachingPdfBase> cachingPdfs_;
};

class CachingAddPdf : public CachingPdfBase {
public:
CachingAddPdf(const RooAddPdf &pdf, const RooArgSet &obs) ;
~CachingAddPdf() ;
virtual const std::vector<Double_t> & eval(const RooAbsData &data) ;
const RooAbsReal *pdf() const { return pdf_; }
virtual void setDataDirty() ;
protected:
const RooAddPdf * pdf_;
std::vector<const RooAbsReal *> coeffs_;
boost::ptr_vector<CachingPdfBase> cachingPdfs_;
std::vector<Double_t> work_;
};

} // namespace

#endif
21 changes: 16 additions & 5 deletions interface/CachingNLL.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ namespace cacheutils {
private:
std::vector<RooRealVar *> vars_;
std::vector<double> vals_;
std::vector<RooCategory *> cats_;
std::vector<int> states_;
};

// Part zero point five: Cache of pdf values for different parameters
Expand Down Expand Up @@ -56,14 +58,22 @@ namespace cacheutils {
Item *items[MaxItems_];
};
// Part one: cache all values of a pdf
class CachingPdf {
class CachingPdfBase {
public:
CachingPdfBase() {}
virtual ~CachingPdfBase() {}
virtual const std::vector<Double_t> & eval(const RooAbsData &data) = 0;
virtual const RooAbsReal *pdf() const = 0;
virtual void setDataDirty() = 0;
};
class CachingPdf : public CachingPdfBase {
public:
CachingPdf(RooAbsReal *pdf, const RooArgSet *obs) ;
CachingPdf(const CachingPdf &other) ;
virtual ~CachingPdf() ;
const std::vector<Double_t> & eval(const RooAbsData &data) ;
virtual const std::vector<Double_t> & eval(const RooAbsData &data) ;
const RooAbsReal *pdf() const { return pdf_; }
void setDataDirty() { lastData_ = 0; }
virtual void setDataDirty() { lastData_ = 0; }
protected:
const RooArgSet *obs_;
RooAbsReal *pdfOriginal_;
Expand Down Expand Up @@ -91,6 +101,7 @@ class OptimizedCachingPdfT : public CachingPdf {
VPdfT *vpdf_;
};

CachingPdfBase * makeCachingPdf(RooAbsReal *pdf, const RooArgSet *obs) ;

class CachingAddNLL : public RooAbsReal {
public:
Expand Down Expand Up @@ -118,10 +129,10 @@ class CachingAddNLL : public RooAbsReal {
std::vector<Double_t> weights_;
double sumWeights_;
mutable std::vector<RooAbsReal*> coeffs_;
mutable boost::ptr_vector<CachingPdf> pdfs_;
mutable boost::ptr_vector<CachingPdfBase> pdfs_;
mutable boost::ptr_vector<RooAbsReal> prods_;
mutable std::vector<RooAbsReal*> integrals_;
mutable std::vector<std::pair<const RooMultiPdf*,CachingPdf*> > multiPdfs_;
mutable std::vector<std::pair<const RooMultiPdf*,CachingPdfBase*> > multiPdfs_;
mutable std::vector<Double_t> partialSum_;
mutable std::vector<Double_t> workingArea_;
mutable bool isRooRealSum_, fastExit_;
Expand Down
2 changes: 2 additions & 0 deletions interface/HGGRooPdfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class RooPower : public RooAbsPdf {

Int_t getAnalyticalIntegral(RooArgSet& allVars, RooArgSet& analVars, const char* rangeName=0) const ;
Double_t analyticalIntegral(Int_t code, const char* rangeName=0) const ;
const RooAbsReal & base() const { return x.arg(); }
const RooAbsReal & exponent() const { return c.arg(); }

protected:
RooRealProxy x;
Expand Down
31 changes: 31 additions & 0 deletions interface/VectorizedSimplePdfs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#ifndef VectorizedExponential_h
#define VectorizedExponential_h

#include <RooExponential.h>
#include <RooAbsData.h>
#include "../interface/HGGRooPdfs.h"
#include <vector>

class VectorizedExponential {
public:
VectorizedExponential(const RooExponential &pdf, const RooAbsData &data) ;
void fill(std::vector<Double_t> &out) const ;
private:
const RooRealVar * x_;
const RooAbsReal * lambda_;
std::vector<Double_t> xvals_;
mutable std::vector<Double_t> work_;
};

class VectorizedPower {
public:
VectorizedPower(const RooPower &pdf, const RooAbsData &data) ;
void fill(std::vector<Double_t> &out) const ;
private:
const RooRealVar * x_;
const RooAbsReal * exponent_;
std::vector<Double_t> xvals_;
mutable std::vector<Double_t> work_;
};

#endif
116 changes: 116 additions & 0 deletions src/CachingMultiPdf.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#include "../interface/CachingMultiPdf.h"
#include "vectorized.h"

// Uncomment do do regression testing wrt uncached multipdf
//#define CachingMultiPdf_VALIDATE

cacheutils::CachingMultiPdf::CachingMultiPdf(const RooMultiPdf &pdf, const RooArgSet &obs) :
pdf_(&pdf)
{
//std::cout << "Making a CachingMultiPdf for " << pdf.GetName() << " with " << pdf_->getNumPdfs() << " pdfs." << std::endl;
for (int i = 0, n = pdf_->getNumPdfs(); i < n; ++i) {
cachingPdfs_.push_back(makeCachingPdf(pdf_->getPdf(i), &obs));
//cachingPdfs_.push_back(new CachingPdf(pdf_->getPdf(i), &obs));
//std::cout << " MultiPdfAdding " << pdf.GetName() << "[" << i << "]: " << pdf_->getPdf(i)->ClassName() << " " << pdf_->getPdf(i)->GetName() << " using " << typeid(cachingPdfs_.back()).name() << std::endl;
}
#ifdef CachingMultiPdf_VALIDATE
cachingPdfs_.push_back(new CachingPdf((RooAbsPdf*)&pdf,&obs));
#endif

}

cacheutils::CachingMultiPdf::~CachingMultiPdf()
{
}

const std::vector<Double_t> & cacheutils::CachingMultiPdf::eval(const RooAbsData &data)
{
const std::vector<Double_t> & ret = cachingPdfs_[pdf_->getCurrentIndex()].eval(data);
#ifdef CachingMultiPdf_VALIDATE
cachingPdfs_.back().setDataDirty();
const std::vector<Double_t> & chk = cachingPdfs_.back().eval(data);
double normratio = 0.0;
for (unsigned int i = 0, n = ret.size(); i < n; ++i) {
if (chk[i] > 1e-6) normratio = max(normratio, abs(ret[i]/chk[i]-1));
}
printf("%-70s: normalization match %+13.10f, selected pdf %s\n", pdf_->GetName(), normratio, pdf_->getCurrentPdf()->ClassName()); fflush(stdout);
if (normratio > 1e-5 && typeid(*pdf_->getCurrentPdf()) == typeid(RooAddPdf) ) {
RooAddPdf *add = (RooAddPdf*)pdf_->getCurrentPdf();
for (unsigned int i = 0, n = add->pdfList().getSize(); i < n; ++i) {
printf("%-70s: RooAdd[%d] is a %s\n", pdf_->GetName(), i, add->pdfList().at(i)->ClassName());
}
}
if (normratio > 1e-5) {
double sum = 0.0, sumc = 0;
std::cout << "Before normalization " << std::endl;
for (unsigned int i = 0, n = ret.size(); i < n; ++i) {
printf("%s[%6d] %18.10f %18.10f %+13.10f %+13.10f\n", pdf_->GetName(), i, ret[i], chk[i], ret[i]-chk[i], ret[i]/chk[i]-1);
sum += ret[i]; sumc += chk[i];
}
printf("%s[%6s] %18.10f %18.10f %+13.10f %+13.10f\n", pdf_->GetName(), "TOTAL", sum, sumc, sum-sumc, sum/sumc-1);
}
return chk;
#endif
return ret;
}

void cacheutils::CachingMultiPdf::setDataDirty()
{
for (CachingPdfBase &pdf : cachingPdfs_) {
pdf.setDataDirty();
}
}

cacheutils::CachingAddPdf::CachingAddPdf(const RooAddPdf &pdf, const RooArgSet &obs) :
pdf_(&pdf)
{
const RooArgList & pdfs = pdf.pdfList();
const RooArgList & coeffs = pdf.coefList();
//std::cout << "Making a CachingAddPdf for " << pdf.GetName() << " with " << pdfs.getSize() << " pdfs, " << coeffs.getSize() << " coeffs." << std::endl;
for (int i = 0, n = pdfs.getSize(); i < n; ++i) {
RooAbsPdf *pdfi = (RooAbsPdf*) pdfs.at(i);
cachingPdfs_.push_back(makeCachingPdf(pdfi, &obs));
//std::cout << " AddPdfAdding " << pdf.GetName() << "[" << i << "]: " << pdfi->ClassName() << " " << pdfi->GetName() << " using " << typeid(cachingPdfs_.back()).name() << std::endl;
}
for (int i = 0, n = coeffs.getSize(); i < n; ++i) {
coeffs_.push_back((RooAbsReal*)coeffs.at(i));
}
}

cacheutils::CachingAddPdf::~CachingAddPdf()
{
}

const std::vector<Double_t> & cacheutils::CachingAddPdf::eval(const RooAbsData &data)
{
double coefSum = 0.0;
std::vector<double> coefVals(cachingPdfs_.size());
for (int i = 0, n = coeffs_.size(); i < n; ++i) {
coefVals[i] = coeffs_[i]->getVal();
coefSum += coefVals[i];
}
if (coeffs_.size() != coefVals.size()) {
coefVals.back() = 1.0 - coefSum;
} else if (coefSum != 1.0) {
for (int i = 0, n = coeffs_.size(); i < n; ++i) {
coefVals[i] /= coefSum;
}
}
const std::vector<Double_t> & one = cachingPdfs_.front().eval(data);
unsigned int size = one.size();
work_.resize(size);
std::fill(work_.begin(), work_.end(), 0.0);
for (int i = 0, n = coefVals.size(); i < n; ++i) {
vectorized::mul_add(size, coefVals[i], &(i ? cachingPdfs_[i].eval(data) : one)[0], &work_[0]);
}
//std::cout << "Evaluated " << pdf_->GetName() << " of type CachingAddPdf" << std::endl;
return work_;
}

void cacheutils::CachingAddPdf::setDataDirty()
{
for (CachingPdfBase &pdf : cachingPdfs_) {
pdf.setDataDirty();
}
}

Loading

0 comments on commit 89a5448

Please sign in to comment.