From ba31ad1abaaee0a12dc6610df48ab9180b539f42 Mon Sep 17 00:00:00 2001
From: Jonas Rembser <jonas.rembser@cern.ch>
Date: Sat, 6 Aug 2022 17:16:50 +0200
Subject: [PATCH 1/2] [RF] Don't skip zero-weight events by default in new
 BatchMode

The new RooFit batchMode skipped zero-weight events to optimize the
likelihood calculation. However, this should not be done in general,
because it is unexpected to users is the output of batched computations
is not aligned with the original dataset.
---
 .../res/RooFit/BatchModeDataHelpers.h         |  7 ++-
 roofit/roofitcore/res/RooFitDriver.h          |  2 +-
 .../roofitcore/src/BatchModeDataHelpers.cxx   | 51 ++++++++++---------
 roofit/roofitcore/src/BatchModeHelpers.cxx    | 16 +++---
 roofit/roofitcore/src/RooFitDriver.cxx        |  5 +-
 5 files changed, 43 insertions(+), 38 deletions(-)
diff --git a/roofit/roofitcore/res/RooFit/BatchModeDataHelpers.h b/roofit/roofitcore/res/RooFit/BatchModeDataHelpers.h
index 38dbebe345228..c4ff48bc86886 100644
--- a/roofit/roofitcore/res/RooFit/BatchModeDataHelpers.h
+++ b/roofit/roofitcore/res/RooFit/BatchModeDataHelpers.h
@@ -30,10 +30,9 @@ class TNamed;
 namespace RooFit {
 namespace BatchModeDataHelpers {
 
-std::map<RooFit::Detail::DataKey, RooSpan<const double>> getDataSpans(RooAbsData const &data,
-                                                                      std::string_view rangeName,
-                                                                      RooAbsCategory const *indexCat,
-                                                                      std::stack<std::vector<double>> &buffers);
+std::map<RooFit::Detail::DataKey, RooSpan<const double>>
+getDataSpans(RooAbsData const &data, std::string_view rangeName, RooAbsCategory const *indexCat,
+             std::stack<std::vector<double>> &buffers, bool skipZeroWeights);
 
 } // namespace BatchModeDataHelpers
 } // namespace RooFit
diff --git a/roofit/roofitcore/res/RooFitDriver.h b/roofit/roofitcore/res/RooFitDriver.h
index 2485ac0080041..83e685b1ed350 100644
--- a/roofit/roofitcore/res/RooFitDriver.h
+++ b/roofit/roofitcore/res/RooFitDriver.h
@@ -50,7 +50,7 @@ class RooFitDriver {
                 RooFit::BatchModeOption batchMode = RooFit::BatchModeOption::Cpu);
 
    void setData(RooAbsData const &data, std::string_view rangeName = "",
-                RooAbsCategory const *indexCatForSplitting = nullptr);
+                RooAbsCategory const *indexCatForSplitting = nullptr, bool skipZeroWeights = false);
    void setData(DataSpansMap const &dataSpans);
 
    ~RooFitDriver();
diff --git a/roofit/roofitcore/src/BatchModeDataHelpers.cxx b/roofit/roofitcore/src/BatchModeDataHelpers.cxx
index b3f0a0e16a80a..3bc1f7c818260 100644
--- a/roofit/roofitcore/src/BatchModeDataHelpers.cxx
+++ b/roofit/roofitcore/src/BatchModeDataHelpers.cxx
@@ -22,8 +22,8 @@
 
 namespace {
 
-void splitByCategory(std::map<RooFit::Detail::DataKey, RooSpan<const double>> &dataSpans, RooAbsCategory const &category,
-                     std::stack<std::vector<double>> &buffers)
+void splitByCategory(std::map<RooFit::Detail::DataKey, RooSpan<const double>> &dataSpans,
+                     RooAbsCategory const &category, std::stack<std::vector<double>> &buffers)
 {
    std::stack<std::vector<double>> oldBuffers;
    std::swap(buffers, oldBuffers);
@@ -71,28 +71,33 @@ void splitByCategory(std::map<RooFit::Detail::DataKey, RooSpan<const double>> &d
 } // namespace
 
 ////////////////////////////////////////////////////////////////////////////////
-// Extract all content from a RooFit datasets as a map of spans.
-// Spans with the weights and squared weights will be also stored in the map,
-// keyed with the names `_weight` and the `_weight_sumW2`. If the dataset is
-// unweighted, these weight spans will only contain the single value `1.0`.
-// Entries with zero weight will be skipped.
-//
-// \return A `std::map` with spans keyed to name pointers.
-// \param[in] data The input dataset.
-// \param[in] rangeName Select only entries from the data in a given range
-//            (empty string for no range).
-// \param[in] indexCat If not `nullptr`, each span is spit up by this category,
-//            with the new names prefixed by the category component name
-//            surrounded by underscores. For example, if you have a category
-//            with `signal` and `control` samples, the span for a variable `x`
-//            will be split in two spans `_signal_x` and `_control_x`.
-// \param[in] buffers Pass here an empty stack of `double` vectors, which will
-//            be used as memory for the data if the memory in the dataset
-//            object can't be used directly (e.g. because you used the range
-//            selection or the splitting by categories).
+/// Extract all content from a RooFit datasets as a map of spans.
+/// Spans with the weights and squared weights will be also stored in the map,
+/// keyed with the names `_weight` and the `_weight_sumW2`. If the dataset is
+/// unweighted, these weight spans will only contain the single value `1.0`.
+/// Entries with zero weight will be skipped.
+///
+/// \return A `std::map` with spans keyed to name pointers.
+/// \param[in] data The input dataset.
+/// \param[in] rangeName Select only entries from the data in a given range
+///            (empty string for no range).
+/// \param[in] indexCat If not `nullptr`, each span is spit up by this category,
+///            with the new names prefixed by the category component name
+///            surrounded by underscores. For example, if you have a category
+///            with `signal` and `control` samples, the span for a variable `x`
+///            will be split in two spans `_signal_x` and `_control_x`.
+/// \param[in] buffers Pass here an empty stack of `double` vectors, which will
+///            be used as memory for the data if the memory in the dataset
+///            object can't be used directly (e.g. because you used the range
+///            selection or the splitting by categories).
+/// \param[in] skipZeroWeights Skip entries with zero weight when filling the
+///            data spans. Be very careful with enabling it, because the user
+///            might not expect that the batch results are not aligned with the
+///            original dataset anymore!
 std::map<RooFit::Detail::DataKey, RooSpan<const double>>
 RooFit::BatchModeDataHelpers::getDataSpans(RooAbsData const &data, std::string_view rangeName,
-                                           RooAbsCategory const *indexCat, std::stack<std::vector<double>> &buffers)
+                                           RooAbsCategory const *indexCat, std::stack<std::vector<double>> &buffers,
+                                           bool skipZeroWeights)
 {
    std::map<RooFit::Detail::DataKey, RooSpan<const double>> dataSpans; // output variable
 
@@ -137,7 +142,7 @@ RooFit::BatchModeDataHelpers::getDataSpans(RooAbsData const &data, std::string_v
          buffer.reserve(nEvents);
          bufferSumW2.reserve(nEvents);
          for (std::size_t i = 0; i < nEvents; ++i) {
-            if (weight[i] != 0) {
+            if (!skipZeroWeights || weight[i] != 0) {
                buffer.push_back(weight[i]);
                bufferSumW2.push_back(weightSumW2[i]);
                ++nNonZeroWeight;
diff --git a/roofit/roofitcore/src/BatchModeHelpers.cxx b/roofit/roofitcore/src/BatchModeHelpers.cxx
index 2530e0806733b..14246cd1bfb05 100644
--- a/roofit/roofitcore/src/BatchModeHelpers.cxx
+++ b/roofit/roofitcore/src/BatchModeHelpers.cxx
@@ -116,17 +116,17 @@ RooFit::BatchModeHelpers::createNLL(RooAbsPdf &pdf, RooAbsData &data, std::uniqu
    nll->addOwnedComponents(std::move(binSamplingPdfs));
    nll->addOwnedComponents(std::move(nllTerms));
 
+   RooAbsCategory const *indexCatForSplitting = nullptr;
    if (auto simPdf = dynamic_cast<RooSimultaneous *>(&finalPdf)) {
       RooArgSet parameters;
       pdf.getParameters(data.get(), parameters);
       nll->recursiveRedirectServers(parameters);
-      driver = std::make_unique<RooFitDriver>(*nll, observables, batchMode);
-      driver->setData(data, rangeName, &simPdf->indexCat());
-   } else {
-      driver = std::make_unique<RooFitDriver>(*nll, observables, batchMode);
-      driver->setData(data, rangeName);
+      indexCatForSplitting = &simPdf->indexCat();
    }
 
+   driver = std::make_unique<RooFitDriver>(*nll, observables, batchMode);
+   driver->setData(data, rangeName, indexCatForSplitting, /*skipZeroWeights=*/true);
+
    // Set the fitrange attribute so that RooPlot can automatically plot the fitting range by default
    if (!rangeName.empty()) {
 
@@ -219,11 +219,11 @@ class RooAbsRealWrapper final : public RooAbsReal {
 
    double defaultErrorLevel() const override { return _driver->topNode().defaultErrorLevel(); }
 
-   bool getParameters(const RooArgSet * observables, RooArgSet &outputSet,
-                      bool /*stripDisconnected=true*/) const override
+   bool
+   getParameters(const RooArgSet *observables, RooArgSet &outputSet, bool /*stripDisconnected=true*/) const override
    {
       outputSet.add(_parameters);
-      if(observables) {
+      if (observables) {
          outputSet.remove(*observables);
       }
       return false;
diff --git a/roofit/roofitcore/src/RooFitDriver.cxx b/roofit/roofitcore/src/RooFitDriver.cxx
index 07086ed4007b1..4f51e47f8bb0b 100644
--- a/roofit/roofitcore/src/RooFitDriver.cxx
+++ b/roofit/roofitcore/src/RooFitDriver.cxx
@@ -186,9 +186,10 @@ RooFitDriver::RooFitDriver(const RooAbsReal &absReal, RooArgSet const &normSet,
 }
 
 void RooFitDriver::setData(RooAbsData const &data, std::string_view rangeName,
-                           RooAbsCategory const *indexCatForSplitting)
+                           RooAbsCategory const *indexCatForSplitting, bool skipZeroWeights)
 {
-   setData(RooFit::BatchModeDataHelpers::getDataSpans(data, rangeName, indexCatForSplitting, _vectorBuffers));
+   setData(RooFit::BatchModeDataHelpers::getDataSpans(data, rangeName, indexCatForSplitting, _vectorBuffers,
+                                                      skipZeroWeights));
 }
 
 void RooFitDriver::setData(DataSpansMap const &dataSpans)

From 4cecb7e9fc4312091d17d393f22bc01ee18f5acf Mon Sep 17 00:00:00 2001
From: Jonas Rembser <jonas.rembser@cern.ch>
Date: Sat, 6 Aug 2022 17:22:14 +0200
Subject: [PATCH 2/2] [RF] Ensure norm set args are part of the graph in
 NormalizationHelpers

Add the arg from the actual node list in the computation graph. Like
this, we don't accidentally add internal variable clones that the client
args returned. Looking this up is fast because of the name pointer hash
map optimization.

This is done because it will prevent crashes in future RooFit
developments that yet have to be commited.
---
 roofit/roofitcore/src/NormalizationHelpers.cxx | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/roofit/roofitcore/src/NormalizationHelpers.cxx b/roofit/roofitcore/src/NormalizationHelpers.cxx
index 9e52c0c571b79..e0d72a5d376cb 100644
--- a/roofit/roofitcore/src/NormalizationHelpers.cxx
+++ b/roofit/roofitcore/src/NormalizationHelpers.cxx
@@ -170,8 +170,9 @@ void treeNodeServerListAndNormSets(const RooAbsArg &arg, RooAbsCollection &list,
          }
 
          auto differentSet = arg.fillNormSetForServer(normSet, *server);
-         if (differentSet)
+         if (differentSet) {
             differentSet->sort();
+         }
 
          auto &serverNormSet = differentSet ? *differentSet : normSet;
 
@@ -219,14 +220,17 @@ std::vector<std::unique_ptr<RooAbsArg>> unfoldIntegrals(RooAbsArg const &topNode
    treeNodeServerListAndNormSets(topNode, nodes, normSetSorted, normSets, checker);
 
    // Clean normsets of the variables that the arg does not depend on
-   // std::unordered_map<std::pair<RooAbsArg const*,RooAbsArg const*>,bool> dependsResults;
    for (auto &item : normSets) {
       if (!item.second || item.second->empty())
          continue;
       auto actualNormSet = new RooArgSet{};
       for (auto *narg : *item.second) {
          if (checker.dependsOn(item.first, narg))
-            actualNormSet->add(*narg);
+            // Add the arg from the actual node list in the computation graph.
+            // Like this, we don't accidentally add internal variable clones
+            // that the client args returned. Looking this up is fast because
+            // of the name pointer hash map optimization.
+            actualNormSet->add(*nodes.find(*narg));
       }
       delete item.second;
       item.second = actualNormSet;