Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RF] Don't skip zero-weight events by default in new BatchMode #11134

Merged
merged 2 commits into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions roofit/roofitcore/res/RooFit/BatchModeDataHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@ class TNamed;
namespace RooFit {
namespace BatchModeDataHelpers {

std::map<RooFit::Detail::DataKey, RooSpan<const double>> getDataSpans(RooAbsData const &data,
std::string_view rangeName,
RooAbsCategory const *indexCat,
std::stack<std::vector<double>> &buffers);
std::map<RooFit::Detail::DataKey, RooSpan<const double>>
getDataSpans(RooAbsData const &data, std::string_view rangeName, RooAbsCategory const *indexCat,
std::stack<std::vector<double>> &buffers, bool skipZeroWeights);

} // namespace BatchModeDataHelpers
} // namespace RooFit
Expand Down
2 changes: 1 addition & 1 deletion roofit/roofitcore/res/RooFitDriver.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class RooFitDriver {
RooFit::BatchModeOption batchMode = RooFit::BatchModeOption::Cpu);

void setData(RooAbsData const &data, std::string_view rangeName = "",
RooAbsCategory const *indexCatForSplitting = nullptr);
RooAbsCategory const *indexCatForSplitting = nullptr, bool skipZeroWeights = false);
void setData(DataSpansMap const &dataSpans);

~RooFitDriver();
Expand Down
51 changes: 28 additions & 23 deletions roofit/roofitcore/src/BatchModeDataHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@

namespace {

void splitByCategory(std::map<RooFit::Detail::DataKey, RooSpan<const double>> &dataSpans, RooAbsCategory const &category,
std::stack<std::vector<double>> &buffers)
void splitByCategory(std::map<RooFit::Detail::DataKey, RooSpan<const double>> &dataSpans,
RooAbsCategory const &category, std::stack<std::vector<double>> &buffers)
{
std::stack<std::vector<double>> oldBuffers;
std::swap(buffers, oldBuffers);
Expand Down Expand Up @@ -71,28 +71,33 @@ void splitByCategory(std::map<RooFit::Detail::DataKey, RooSpan<const double>> &d
} // namespace

////////////////////////////////////////////////////////////////////////////////
// Extract all content from a RooFit datasets as a map of spans.
// Spans with the weights and squared weights will be also stored in the map,
// keyed with the names `_weight` and the `_weight_sumW2`. If the dataset is
// unweighted, these weight spans will only contain the single value `1.0`.
// Entries with zero weight will be skipped.
//
// \return A `std::map` with spans keyed to name pointers.
// \param[in] data The input dataset.
// \param[in] rangeName Select only entries from the data in a given range
// (empty string for no range).
// \param[in] indexCat If not `nullptr`, each span is spit up by this category,
// with the new names prefixed by the category component name
// surrounded by underscores. For example, if you have a category
// with `signal` and `control` samples, the span for a variable `x`
// will be split in two spans `_signal_x` and `_control_x`.
// \param[in] buffers Pass here an empty stack of `double` vectors, which will
// be used as memory for the data if the memory in the dataset
// object can't be used directly (e.g. because you used the range
// selection or the splitting by categories).
/// Extract all content from a RooFit datasets as a map of spans.
/// Spans with the weights and squared weights will be also stored in the map,
/// keyed with the names `_weight` and the `_weight_sumW2`. If the dataset is
/// unweighted, these weight spans will only contain the single value `1.0`.
/// Entries with zero weight will be skipped.
///
/// \return A `std::map` with spans keyed to name pointers.
/// \param[in] data The input dataset.
/// \param[in] rangeName Select only entries from the data in a given range
/// (empty string for no range).
/// \param[in] indexCat If not `nullptr`, each span is spit up by this category,
/// with the new names prefixed by the category component name
/// surrounded by underscores. For example, if you have a category
/// with `signal` and `control` samples, the span for a variable `x`
/// will be split in two spans `_signal_x` and `_control_x`.
/// \param[in] buffers Pass here an empty stack of `double` vectors, which will
/// be used as memory for the data if the memory in the dataset
/// object can't be used directly (e.g. because you used the range
/// selection or the splitting by categories).
/// \param[in] skipZeroWeights Skip entries with zero weight when filling the
/// data spans. Be very careful with enabling it, because the user
/// might not expect that the batch results are not aligned with the
/// original dataset anymore!
std::map<RooFit::Detail::DataKey, RooSpan<const double>>
RooFit::BatchModeDataHelpers::getDataSpans(RooAbsData const &data, std::string_view rangeName,
RooAbsCategory const *indexCat, std::stack<std::vector<double>> &buffers)
RooAbsCategory const *indexCat, std::stack<std::vector<double>> &buffers,
bool skipZeroWeights)
{
std::map<RooFit::Detail::DataKey, RooSpan<const double>> dataSpans; // output variable

Expand Down Expand Up @@ -137,7 +142,7 @@ RooFit::BatchModeDataHelpers::getDataSpans(RooAbsData const &data, std::string_v
buffer.reserve(nEvents);
bufferSumW2.reserve(nEvents);
for (std::size_t i = 0; i < nEvents; ++i) {
if (weight[i] != 0) {
if (!skipZeroWeights || weight[i] != 0) {
buffer.push_back(weight[i]);
bufferSumW2.push_back(weightSumW2[i]);
++nNonZeroWeight;
Expand Down
16 changes: 8 additions & 8 deletions roofit/roofitcore/src/BatchModeHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,17 @@ RooFit::BatchModeHelpers::createNLL(RooAbsPdf &pdf, RooAbsData &data, std::uniqu
nll->addOwnedComponents(std::move(binSamplingPdfs));
nll->addOwnedComponents(std::move(nllTerms));

RooAbsCategory const *indexCatForSplitting = nullptr;
if (auto simPdf = dynamic_cast<RooSimultaneous *>(&finalPdf)) {
RooArgSet parameters;
pdf.getParameters(data.get(), parameters);
nll->recursiveRedirectServers(parameters);
driver = std::make_unique<RooFitDriver>(*nll, observables, batchMode);
driver->setData(data, rangeName, &simPdf->indexCat());
} else {
driver = std::make_unique<RooFitDriver>(*nll, observables, batchMode);
driver->setData(data, rangeName);
indexCatForSplitting = &simPdf->indexCat();
}

driver = std::make_unique<RooFitDriver>(*nll, observables, batchMode);
driver->setData(data, rangeName, indexCatForSplitting, /*skipZeroWeights=*/true);

// Set the fitrange attribute so that RooPlot can automatically plot the fitting range by default
if (!rangeName.empty()) {

Expand Down Expand Up @@ -219,11 +219,11 @@ class RooAbsRealWrapper final : public RooAbsReal {

double defaultErrorLevel() const override { return _driver->topNode().defaultErrorLevel(); }

bool getParameters(const RooArgSet * observables, RooArgSet &outputSet,
bool /*stripDisconnected=true*/) const override
bool
getParameters(const RooArgSet *observables, RooArgSet &outputSet, bool /*stripDisconnected=true*/) const override
{
outputSet.add(_parameters);
if(observables) {
if (observables) {
outputSet.remove(*observables);
}
return false;
Expand Down
10 changes: 7 additions & 3 deletions roofit/roofitcore/src/NormalizationHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,9 @@ void treeNodeServerListAndNormSets(const RooAbsArg &arg, RooAbsCollection &list,
}

auto differentSet = arg.fillNormSetForServer(normSet, *server);
if (differentSet)
if (differentSet) {
differentSet->sort();
}

auto &serverNormSet = differentSet ? *differentSet : normSet;

Expand Down Expand Up @@ -219,14 +220,17 @@ std::vector<std::unique_ptr<RooAbsArg>> unfoldIntegrals(RooAbsArg const &topNode
treeNodeServerListAndNormSets(topNode, nodes, normSetSorted, normSets, checker);

// Clean normsets of the variables that the arg does not depend on
// std::unordered_map<std::pair<RooAbsArg const*,RooAbsArg const*>,bool> dependsResults;
for (auto &item : normSets) {
if (!item.second || item.second->empty())
continue;
auto actualNormSet = new RooArgSet{};
for (auto *narg : *item.second) {
if (checker.dependsOn(item.first, narg))
actualNormSet->add(*narg);
// Add the arg from the actual node list in the computation graph.
// Like this, we don't accidentally add internal variable clones
// that the client args returned. Looking this up is fast because
// of the name pointer hash map optimization.
actualNormSet->add(*nodes.find(*narg));
}
delete item.second;
item.second = actualNormSet;
Expand Down
5 changes: 3 additions & 2 deletions roofit/roofitcore/src/RooFitDriver.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,10 @@ RooFitDriver::RooFitDriver(const RooAbsReal &absReal, RooArgSet const &normSet,
}

void RooFitDriver::setData(RooAbsData const &data, std::string_view rangeName,
RooAbsCategory const *indexCatForSplitting)
RooAbsCategory const *indexCatForSplitting, bool skipZeroWeights)
{
setData(RooFit::BatchModeDataHelpers::getDataSpans(data, rangeName, indexCatForSplitting, _vectorBuffers));
setData(RooFit::BatchModeDataHelpers::getDataSpans(data, rangeName, indexCatForSplitting, _vectorBuffers,
skipZeroWeights));
}

void RooFitDriver::setData(DataSpansMap const &dataSpans)
Expand Down