Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create new ECAL DQM GpuTask to monitor and compare CPU and GPU generated ECAL RECO objects #36742

Merged
merged 6 commits into from
Feb 22, 2022
26 changes: 25 additions & 1 deletion DQM/EcalMonitorTasks/interface/Collections.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ namespace ecaldqm {
kEEBasicCluster,
kEBSuperCluster,
kEESuperCluster,
kEBCpuDigi,
kEECpuDigi,
kEBGpuDigi,
kEEGpuDigi,
kEBCpuUncalibRecHit,
kEECpuUncalibRecHit,
kEBGpuUncalibRecHit,
kEEGpuUncalibRecHit,
kEBCpuRecHit,
kEECpuRecHit,
kEBGpuRecHit,
kEEGpuRecHit,
nCollections
};

Expand Down Expand Up @@ -78,7 +90,19 @@ namespace ecaldqm {
"EBBasicCluster",
"EEBasicCluster",
"EBSuperCluster",
"EESuperCluster"};
"EESuperCluster",
"EBCpuDigi",
"EECpuDigi",
"EBGpuDigi",
"EEGpuDigi",
"EBCpuUncalibRecHit",
"EECpuUncalibRecHit",
"EBGpuUncalibRecHit",
"EEGpuUncalibRecHit",
"EBCpuRecHit",
"EECpuRecHit",
"EBGpuRecHit",
"EEGpuRecHit"};

} // namespace ecaldqm

Expand Down
104 changes: 104 additions & 0 deletions DQM/EcalMonitorTasks/interface/GpuTask.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#ifndef DQM_EcalMonitorTasks_GpuTask_H
#define DQM_EcalMonitorTasks_GpuTask_H

#include "DQWorkerTask.h"

#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h"
#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h"

namespace ecaldqm {

class GpuTask : public DQWorkerTask {
public:
GpuTask();
~GpuTask() override = default;

void addDependencies(DependencySet&) override;

bool filterRunType(short const*) override;

void beginEvent(edm::Event const&, edm::EventSetup const&, bool const&, bool&) override;
bool analyze(void const*, Collections) override;

template <typename DigiCollection>
void runOnCpuDigis(DigiCollection const&, Collections);
template <typename DigiCollection>
void runOnGpuDigis(DigiCollection const&, Collections);
void runOnCpuUncalibRecHits(EcalUncalibratedRecHitCollection const&, Collections);
void runOnGpuUncalibRecHits(EcalUncalibratedRecHitCollection const&, Collections);
void runOnCpuRecHits(EcalRecHitCollection const&, Collections);
void runOnGpuRecHits(EcalRecHitCollection const&, Collections);

private:
void setParams(edm::ParameterSet const&) override;

bool runGpuTask_;
bool gpuOnlyPlots_;
std::vector<int> uncalibOOTAmps_;

EBDigiCollection const* EBCpuDigis_;
EEDigiCollection const* EECpuDigis_;

EcalUncalibratedRecHitCollection const* EBCpuUncalibRecHits_;
EcalUncalibratedRecHitCollection const* EECpuUncalibRecHits_;

EcalRecHitCollection const* EBCpuRecHits_;
EcalRecHitCollection const* EECpuRecHits_;
};

inline bool GpuTask::analyze(void const* p, Collections collection) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CMS code rule 2.12 says to not use single character variable names except for loop indices. Please rename p.

switch (collection) {
case kEBCpuDigi:
if (p && runGpuTask_)
runOnCpuDigis(*static_cast<EBDigiCollection const*>(p), collection);
return runGpuTask_;
break;
case kEECpuDigi:
if (p && runGpuTask_)
runOnCpuDigis(*static_cast<EEDigiCollection const*>(p), collection);
return runGpuTask_;
break;
case kEBGpuDigi:
if (p && runGpuTask_)
runOnGpuDigis(*static_cast<EBDigiCollection const*>(p), collection);
return runGpuTask_;
break;
case kEEGpuDigi:
if (p && runGpuTask_)
runOnGpuDigis(*static_cast<EEDigiCollection const*>(p), collection);
return runGpuTask_;
break;
case kEBCpuUncalibRecHit:
case kEECpuUncalibRecHit:
if (p && runGpuTask_)
runOnCpuUncalibRecHits(*static_cast<EcalUncalibratedRecHitCollection const*>(p), collection);
return runGpuTask_;
break;
case kEBGpuUncalibRecHit:
case kEEGpuUncalibRecHit:
if (p && runGpuTask_)
runOnGpuUncalibRecHits(*static_cast<EcalUncalibratedRecHitCollection const*>(p), collection);
return runGpuTask_;
break;
case kEBCpuRecHit:
case kEECpuRecHit:
if (p && runGpuTask_)
runOnCpuRecHits(*static_cast<EcalRecHitCollection const*>(p), collection);
return runGpuTask_;
break;
case kEBGpuRecHit:
case kEEGpuRecHit:
if (p && runGpuTask_)
runOnGpuRecHits(*static_cast<EcalRecHitCollection const*>(p), collection);
return runGpuTask_;
break;
default:
break;
}

return false;
}

} // namespace ecaldqm

#endif
12 changes: 12 additions & 0 deletions DQM/EcalMonitorTasks/plugins/EcalDQMonitorTask2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,14 @@ void EcalDQMonitorTask::formSchedule(std::vector<ecaldqm::Collections> const& _p
sch.first = &EcalDQMonitorTask::runOnCollection<EESrFlagCollection>;
break;
case ecaldqm::kEBDigi:
case ecaldqm::kEBCpuDigi:
case ecaldqm::kEBGpuDigi:
collectionTokens_[*colItr] = edm::EDGetToken(consumes<EBDigiCollection>(tag));
sch.first = &EcalDQMonitorTask::runOnCollection<EBDigiCollection>;
break;
case ecaldqm::kEEDigi:
case ecaldqm::kEECpuDigi:
case ecaldqm::kEEGpuDigi:
collectionTokens_[*colItr] = edm::EDGetToken(consumes<EEDigiCollection>(tag));
sch.first = &EcalDQMonitorTask::runOnCollection<EEDigiCollection>;
break;
Expand All @@ -112,13 +116,21 @@ void EcalDQMonitorTask::formSchedule(std::vector<ecaldqm::Collections> const& _p
case ecaldqm::kEELaserLedUncalibRecHit:
case ecaldqm::kEBTestPulseUncalibRecHit:
case ecaldqm::kEETestPulseUncalibRecHit:
case ecaldqm::kEBCpuUncalibRecHit:
case ecaldqm::kEECpuUncalibRecHit:
case ecaldqm::kEBGpuUncalibRecHit:
case ecaldqm::kEEGpuUncalibRecHit:
collectionTokens_[*colItr] = edm::EDGetToken(consumes<EcalUncalibratedRecHitCollection>(tag));
sch.first = &EcalDQMonitorTask::runOnCollection<EcalUncalibratedRecHitCollection>;
break;
case ecaldqm::kEBRecHit:
case ecaldqm::kEBReducedRecHit:
case ecaldqm::kEERecHit:
case ecaldqm::kEEReducedRecHit:
case ecaldqm::kEBCpuRecHit:
case ecaldqm::kEECpuRecHit:
case ecaldqm::kEBGpuRecHit:
case ecaldqm::kEEGpuRecHit:
collectionTokens_[*colItr] = edm::EDGetToken(consumes<EcalRecHitCollection>(tag));
sch.first = &EcalDQMonitorTask::runOnCollection<EcalRecHitCollection>;
break;
Expand Down
14 changes: 13 additions & 1 deletion DQM/EcalMonitorTasks/python/CollectionTags_cfi.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,17 @@
EBBasicCluster = cms.untracked.InputTag("particleFlowClusterECAL"),
EEBasicCluster = cms.untracked.InputTag("particleFlowClusterECAL"),
EBSuperCluster = cms.untracked.InputTag("particleFlowSuperClusterECAL", "particleFlowSuperClusterECALBarrel"),
EESuperCluster = cms.untracked.InputTag("particleFlowSuperClusterECAL", "particleFlowSuperClusterECALEndcapWithPreshower")
EESuperCluster = cms.untracked.InputTag("particleFlowSuperClusterECAL", "particleFlowSuperClusterECALEndcapWithPreshower"),
EBCpuDigi = cms.untracked.InputTag("ecalDigis@cpu", "ebDigis"),
EECpuDigi = cms.untracked.InputTag("ecalDigis@cpu", "eeDigis"),
EBGpuDigi = cms.untracked.InputTag("ecalDigis@cuda", "ebDigis"),
EEGpuDigi = cms.untracked.InputTag("ecalDigis@cuda", "eeDigis"),
EBCpuUncalibRecHit = cms.untracked.InputTag("ecalMultiFitUncalibRecHit@cpu", "EcalUncalibRecHitsEB"),
EECpuUncalibRecHit = cms.untracked.InputTag("ecalMultiFitUncalibRecHit@cpu", "EcalUncalibRecHitsEE"),
EBGpuUncalibRecHit = cms.untracked.InputTag("ecalMultiFitUncalibRecHit@cuda", "EcalUncalibRecHitsEB"),
EEGpuUncalibRecHit = cms.untracked.InputTag("ecalMultiFitUncalibRecHit@cuda", "EcalUncalibRecHitsEE"),
EBCpuRecHit = cms.untracked.InputTag("ecalRecHit@cpu", "EcalRecHitsEB"),
EECpuRecHit = cms.untracked.InputTag("ecalRecHit@cpu", "EcalRecHitsEE"),
EBGpuRecHit = cms.untracked.InputTag("ecalRecHit@cuda", "EcalRecHitsEB"),
EEGpuRecHit = cms.untracked.InputTag("ecalRecHit@cuda", "EcalRecHitsEE")
)
11 changes: 11 additions & 0 deletions DQM/EcalMonitorTasks/python/EcalMonitorTask_cff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import FWCore.ParameterSet.Config as cms

from DQM.EcalMonitorTasks.EcalMonitorTask_cfi import *

# Customization to run the CPU vs GPU comparison task if the job runs on a GPU enabled machine
from Configuration.ProcessModifiers.gpu_cff import gpu
from DQM.EcalMonitorTasks.GpuTask_cfi import ecalGpuTask
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you rename DQM/EcalMonitorTasks/python/GpuTask_cfi.py to DQM/EcalMonitorTasks/python/ecalGpuTask_cfi.py, so the file name matches the main object defined in it ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure!


gpu.toModify(ecalGpuTask.params, runGpuTask = cms.untracked.bool(True))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that by setting runGpuTask to True here by default this will run the GPU to CPU comparison by default in the offline DQM if the gpu modifier is given. I do not think this is what we want since it implies to run the CPU and the GPU reconstruction.

As discussed in #35879 we should use a different modifier that is only given when the GPU vs. CPU comparison should be done. @jfernan2 does such a modifier exist already from DQM? In the issue mentioned it was proposed to call it gpu-validation.

gpu.toModify(ecalMonitorTask.workers, func = lambda workers: workers.append("GpuTask"))
gpu.toModify(ecalMonitorTask, workerParameters = dict(GpuTask = ecalGpuTask))
Loading