diff --git a/Alignment/CommonAlignment/python/tools/trackselectionRefitting.py b/Alignment/CommonAlignment/python/tools/trackselectionRefitting.py
index f936131c428b5..0a15276f22dd1 100644
--- a/Alignment/CommonAlignment/python/tools/trackselectionRefitting.py
+++ b/Alignment/CommonAlignment/python/tools/trackselectionRefitting.py
@@ -363,10 +363,6 @@ def getSequence(process, collection,
## put the sequence together ##
###############################
- if "Fast" in TTRHBuilder:
- print("PixelCPEFast has been chosen, here we must include CUDAService first")
- process.load('HeterogeneousCore.CUDAServices.CUDAService_cfi')
-
modules = []
src = collection
prevsrc = None
diff --git a/Configuration/StandardSequences/python/Accelerators_cff.py b/Configuration/StandardSequences/python/Accelerators_cff.py
index aeef6ea5f367b..f313750526119 100644
--- a/Configuration/StandardSequences/python/Accelerators_cff.py
+++ b/Configuration/StandardSequences/python/Accelerators_cff.py
@@ -4,3 +4,4 @@
# used in production
from HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi import ProcessAcceleratorCUDA
+from HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi import ProcessAcceleratorROCm
diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_rocm.h b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm.h
new file mode 100644
index 0000000000000..32d58300c3f78
--- /dev/null
+++ b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm.h
@@ -0,0 +1,5 @@
+#include "DataFormats/Common/interface/DeviceProduct.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+#include "DataFormats/Portable/interface/Product.h"
+#include "DataFormats/PortableTestObjects/interface/TestSoA.h"
+#include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h"
diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml
new file mode 100644
index 0000000000000..bc61b9fcdfdfa
--- /dev/null
+++ b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc b/EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc
index 9ca33340f7036..80ac575ff2230 100644
--- a/EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc
+++ b/EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc
@@ -9,7 +9,7 @@
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
class HcalDigisProducerGPU : public edm::stream::EDProducer {
@@ -97,8 +97,8 @@ HcalDigisProducerGPU::HcalDigisProducerGPU(const edm::ParameterSet& ps)
hf3_.stride = hcal::compute_stride(QIE11DigiCollection::MAXSAMPLES);
// preallocate pinned host memory only if CUDA is available
- edm::Service cs;
- if (cs and cs->enabled()) {
+ edm::Service cuda;
+ if (cuda and cuda->enabled()) {
hf01_.reserve(config_.maxChannelsF01HE);
hf5_.reserve(config_.maxChannelsF5HB);
hf3_.reserve(config_.maxChannelsF3HB);
diff --git a/EventFilter/HcalRawToDigi/plugins/HcalRawToDigiGPU.cc b/EventFilter/HcalRawToDigi/plugins/HcalRawToDigiGPU.cc
index 8cabcb9aa634e..5fd50199d248b 100644
--- a/EventFilter/HcalRawToDigi/plugins/HcalRawToDigiGPU.cc
+++ b/EventFilter/HcalRawToDigi/plugins/HcalRawToDigiGPU.cc
@@ -1,5 +1,4 @@
-#include
-
+#include "CUDADataFormats/Common/interface/Product.h"
#include "CondFormats/DataRecord/interface/HcalElectronicsMapRcd.h"
#include "DataFormats/FEDRawData/interface/FEDNumbering.h"
#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h"
@@ -8,11 +7,7 @@
#include "FWCore/Framework/interface/MakerMacros.h"
#include "FWCore/Framework/interface/stream/EDProducer.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "DeclsForKernels.h"
#include "DecodeGPU.h"
diff --git a/HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py b/HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py
index 8ee21c2d055d3..673a479d8eb2b 100644
--- a/HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py
+++ b/HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py
@@ -1,11 +1,17 @@
import FWCore.ParameterSet.Config as cms
+import os
+
+from HeterogeneousCore.Common.PlatformStatus import PlatformStatus
+
class ModuleTypeResolverAlpaka:
def __init__(self, accelerators, backend):
- # first element is used as the default is nothing is set
+ # first element is used as the default if nothing is set
self._valid_backends = []
if "gpu-nvidia" in accelerators:
self._valid_backends.append("cuda_async")
+ if "gpu-amd" in accelerators:
+ self._valid_backends.append("rocm_async")
if "cpu" in accelerators:
self._valid_backends.append("serial_sync")
if len(self._valid_backends) == 0:
@@ -45,26 +51,64 @@ class ProcessAcceleratorAlpaka(cms.ProcessAccelerator):
ProcessAcceleratorCUDA) define.
"""
def __init__(self):
- super(ProcessAcceleratorAlpaka,self).__init__()
+ super(ProcessAcceleratorAlpaka, self).__init__()
self._backend = None
+
# User-facing interface
def setBackend(self, backend):
self._backend = backend
+
# Framework-facing interface
def moduleTypeResolver(self, accelerators):
return ModuleTypeResolverAlpaka(accelerators, self._backend)
+
def apply(self, process, accelerators):
- if not hasattr(process, "AlpakaServiceSerialSync"):
+ # Propagate the AlpakaService messages through the MessageLogger
+ if not hasattr(process.MessageLogger, "AlpakaService"):
+ process.MessageLogger.AlpakaService = cms.untracked.PSet()
+
+ # Check if the CPU backend is available
+ try:
+ if not "cpu" in accelerators:
+ raise False
from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync
- process.add_(AlpakaServiceSerialSync)
- if not hasattr(process, "AlpakaServiceCudaAsync"):
+ except:
+ # the CPU backend is not available, do not load the AlpakaServiceSerialSync
+ if hasattr(process, "AlpakaServiceSerialSync"):
+ del process.AlpakaServiceSerialSync
+ else:
+ # the CPU backend is available, ensure the AlpakaServiceSerialSync is loaded
+ if not hasattr(process, "AlpakaServiceSerialSync"):
+ process.add_(AlpakaServiceSerialSync)
+
+ # Check if CUDA is available, and if the system has at least one usable NVIDIA GPU
+ try:
+ if not "gpu-nvidia" in accelerators:
+ raise False
from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync
- process.add_(AlpakaServiceCudaAsync)
+ except:
+ # CUDA is not available, do not load the AlpakaServiceCudaAsync
+ if hasattr(process, "AlpakaServiceCudaAsync"):
+ del process.AlpakaServiceCudaAsync
+ else:
+ # CUDA is available, ensure the AlpakaServiceCudaAsync is loaded
+ if not hasattr(process, "AlpakaServiceCudaAsync"):
+ process.add_(AlpakaServiceCudaAsync)
- if not hasattr(process.MessageLogger, "AlpakaService"):
- process.MessageLogger.AlpakaService = cms.untracked.PSet()
+ # Check if ROCm is available, and if the system has at least one usable AMD GPU
+ try:
+ if not "gpu-amd" in accelerators:
+ raise False
+ from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
+ except:
+ # ROCm is not available, do not load the AlpakaServiceROCmAsync
+ if hasattr(process, "AlpakaServiceROCmAsync"):
+ del process.AlpakaServiceROCmAsync
+ else:
+ # ROCm is available, ensure the AlpakaServiceROCmAsync is loaded
+ if not hasattr(process, "AlpakaServiceROCmAsync"):
+ process.add_(AlpakaServiceROCmAsync)
- process.AlpakaServiceSerialSync.enabled = "cpu" in accelerators
- process.AlpakaServiceCudaAsync.enabled = "gpu-nvidia" in accelerators
+# Ensure this module is kept in the configuration when dumping it
cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorAlpaka, "from HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka import ProcessAcceleratorAlpaka")
diff --git a/HeterogeneousCore/AlpakaCore/src/module_backend_config.cc b/HeterogeneousCore/AlpakaCore/src/module_backend_config.cc
index 995e954dd67fb..55b3ee5e11b30 100644
--- a/HeterogeneousCore/AlpakaCore/src/module_backend_config.cc
+++ b/HeterogeneousCore/AlpakaCore/src/module_backend_config.cc
@@ -17,7 +17,7 @@ namespace cms::alpakatools {
descAlpaka.addUntracked("backend", "")
->setComment(
"Alpaka backend for this module. Can be empty string (for the global default), 'serial_sync', or "
- "'cuda_async'");
+ " - depending on the architecture and available hardware - 'cuda_async', 'rocm_async'");
if (iDesc.defaultDescription()) {
if (iDesc.defaultDescription()->isLabelUnused(kPSetName)) {
diff --git a/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc b/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc
index d06a6adc39168..fbc0777c03b99 100644
--- a/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc
+++ b/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc
@@ -16,12 +16,12 @@
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
#include "FWCore/ServiceRegistry/interface/Service.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
#include "FWCore/ServiceRegistry/interface/Service.h"
-#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
#endif // ALPAKA_ACC_GPU_HIP_ENABLED
namespace ALPAKA_ACCELERATOR_NAMESPACE {
@@ -31,11 +31,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
verbose_(config.getUntrackedParameter("verbose")) {
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
// rely on the CUDAService to initialise the CUDA devices
- edm::Service cudaService;
+ edm::Service cuda;
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
// rely on the ROCmService to initialise the ROCm devices
- edm::Service rocmService;
+ edm::Service rocm;
#endif // ALPAKA_ACC_GPU_HIP_ENABLED
// TODO from Andrea Bocci:
@@ -48,14 +48,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
}
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
- if (not cudaService->enabled()) {
+ if (not cuda or not cuda->enabled()) {
enabled_ = false;
edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by CUDAService";
return;
}
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
- if (not rocmService->enabled()) {
+ if (not rocm or not rocm->enabled()) {
enabled_ = false;
edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by ROCmService";
return;
diff --git a/HeterogeneousCore/CUDACore/README.md b/HeterogeneousCore/CUDACore/README.md
index 57224926e70ed..af7701f856e81 100644
--- a/HeterogeneousCore/CUDACore/README.md
+++ b/HeterogeneousCore/CUDACore/README.md
@@ -83,7 +83,14 @@ This page documents the CUDA integration within CMSSW
stream must synchronize with the work queued on other CUDA
streams (with CUDA events and `cudaStreamWaitEvent()`)
4. Outside of `acquire()`/`produce()`, CUDA API functions may be
- called only if `CUDAService::enabled()` returns `true`.
+ called only if the `CUDAService` implementation of the `CUDAInterface`
+ is available and `CUDAService::enabled()` returns `true`:
+ ```c++
+ edm::Service cuda;
+ if (cuda and cuda->enabled()) {
+ // CUDA calls ca be made here
+ }
+ ```
* With point 3 it follows that in these cases multiple devices have
to be dealt with explicitly, as well as CUDA streams
diff --git a/HeterogeneousCore/CUDACore/python/ProcessAcceleratorCUDA.py b/HeterogeneousCore/CUDACore/python/ProcessAcceleratorCUDA.py
index a92d49f9014c8..0849c6180cc2f 100644
--- a/HeterogeneousCore/CUDACore/python/ProcessAcceleratorCUDA.py
+++ b/HeterogeneousCore/CUDACore/python/ProcessAcceleratorCUDA.py
@@ -2,29 +2,44 @@
import os
+from HeterogeneousCore.Common.PlatformStatus import PlatformStatus
+
class ProcessAcceleratorCUDA(cms.ProcessAccelerator):
def __init__(self):
- super(ProcessAcceleratorCUDA,self).__init__()
+ super(ProcessAcceleratorCUDA, self).__init__()
self._label = "gpu-nvidia"
+
def labels(self):
- return [self._label]
+ return [ self._label ]
+
def enabledLabels(self):
- enabled = (os.system("cudaIsEnabled") == 0)
- if enabled:
- return self.labels()
- else:
- return []
- def apply(self, process, accelerators):
- if not hasattr(process, "CUDAService"):
- from HeterogeneousCore.CUDAServices.CUDAService_cfi import CUDAService
- process.add_(CUDAService)
+ # Check if CUDA is available, and if the system has at least one usable device.
+ # These should be checked on each worker node, because it depends both
+ # on the architecture and on the actual hardware present in the machine.
+ status = PlatformStatus(os.waitstatus_to_exitcode(os.system("cudaIsEnabled")))
+ return self.labels() if status == PlatformStatus.Success else []
- if not hasattr(process.MessageLogger, "CUDAService"):
- process.MessageLogger.CUDAService = cms.untracked.PSet()
+ def apply(self, process, accelerators):
if self._label in accelerators:
- process.CUDAService.enabled = True
+ # Ensure that the CUDAService is loaded
+ if not hasattr(process, "CUDAService"):
+ from HeterogeneousCore.CUDAServices.CUDAService_cfi import CUDAService
+ process.add_(CUDAService)
+
+ # Propagate the CUDAService messages through the MessageLogger
+ if not hasattr(process.MessageLogger, "CUDAService"):
+ process.MessageLogger.CUDAService = cms.untracked.PSet()
+
else:
- process.CUDAService.enabled = False
-
+ # Make sure the CUDAService is not loaded
+ if hasattr(process, "CUDAService"):
+ del process.CUDAService
+
+ # Drop the CUDAService messages from the MessageLogger
+ if hasattr(process.MessageLogger, "CUDAService"):
+ del process.MessageLogger.CUDAService
+
+
+# Ensure this module is kept in the configuration when dumping it
cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorCUDA, "from HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA import ProcessAcceleratorCUDA")
diff --git a/HeterogeneousCore/CUDACore/src/chooseDevice.cc b/HeterogeneousCore/CUDACore/src/chooseDevice.cc
index 3c1d253537679..a768cea02c5ca 100644
--- a/HeterogeneousCore/CUDACore/src/chooseDevice.cc
+++ b/HeterogeneousCore/CUDACore/src/chooseDevice.cc
@@ -1,17 +1,18 @@
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/Utilities/interface/Exception.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "chooseDevice.h"
namespace cms::cuda {
int chooseDevice(edm::StreamID id) {
- edm::Service cudaService;
- if (not cudaService->enabled()) {
+ edm::Service cuda;
+ if (not cuda or not cuda->enabled()) {
cms::Exception ex("CUDAError");
- ex << "Unable to choose current device because CUDAService is disabled. If CUDAService was not explicitly\n"
- "disabled in the configuration, the probable cause is that there is no GPU or there is some problem\n"
- "in the CUDA runtime or drivers.";
+ ex << "Unable to choose current device because CUDAService is not preset or disabled.\n"
+ << "If CUDAService was not explicitly disabled in the configuration, the probable\n"
+ << "cause is that there is no GPU or there is some problem in the CUDA runtime or\n"
+ << "drivers.";
ex.addContext("Calling cms::cuda::chooseDevice()");
throw ex;
}
@@ -22,6 +23,6 @@ namespace cms::cuda {
// (and even then there is no load balancing).
//
// TODO: improve the "assignment" logic
- return id % cudaService->numberOfDevices();
+ return id % cuda->numberOfDevices();
}
} // namespace cms::cuda
diff --git a/HeterogeneousCore/CUDAServices/BuildFile.xml b/HeterogeneousCore/CUDAServices/BuildFile.xml
index a48e1c639eaf3..5bb08cd9726fb 100644
--- a/HeterogeneousCore/CUDAServices/BuildFile.xml
+++ b/HeterogeneousCore/CUDAServices/BuildFile.xml
@@ -1,12 +1,4 @@
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
diff --git a/HeterogeneousCore/CUDAServices/interface/CUDAInterface.h b/HeterogeneousCore/CUDAServices/interface/CUDAInterface.h
new file mode 100644
index 0000000000000..ef43d867ae908
--- /dev/null
+++ b/HeterogeneousCore/CUDAServices/interface/CUDAInterface.h
@@ -0,0 +1,19 @@
+#ifndef HeterogeneousCore_CUDAServices_interface_CUDAInterface
+#define HeterogeneousCore_CUDAServices_interface_CUDAInterface
+
+#include
+
+class CUDAInterface {
+public:
+ CUDAInterface() = default;
+ virtual ~CUDAInterface() = default;
+
+ virtual bool enabled() const = 0;
+
+ virtual int numberOfDevices() const = 0;
+
+ // Returns the (major, minor) CUDA compute capability of the given device.
+ virtual std::pair computeCapability(int device) const = 0;
+};
+
+#endif // HeterogeneousCore_CUDAServices_interface_CUDAInterface
diff --git a/HeterogeneousCore/CUDAServices/interface/CUDAService.h b/HeterogeneousCore/CUDAServices/interface/CUDAService.h
deleted file mode 100644
index 5c8472bc00951..0000000000000
--- a/HeterogeneousCore/CUDAServices/interface/CUDAService.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef HeterogeneousCore_CUDAServices_CUDAService_h
-#define HeterogeneousCore_CUDAServices_CUDAService_h
-
-#include
-#include
-
-#include "FWCore/Utilities/interface/StreamID.h"
-
-namespace edm {
- class ParameterSet;
- class ActivityRegistry;
- class ConfigurationDescriptions;
-} // namespace edm
-
-class CUDAService {
-public:
- CUDAService(edm::ParameterSet const& iConfig);
- ~CUDAService();
-
- static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
-
- bool enabled() const { return enabled_; }
-
- int numberOfDevices() const { return numberOfDevices_; }
-
- // major, minor
- std::pair computeCapability(int device) const { return computeCapabilities_.at(device); }
-
- // Returns the id of device with most free memory. If none is found, returns -1.
- int deviceWithMostFreeMemory() const;
-
-private:
- int numberOfDevices_ = 0;
- std::vector> computeCapabilities_;
- bool enabled_ = false;
- bool verbose_ = false;
-};
-
-namespace edm {
- namespace service {
- inline bool isProcessWideService(CUDAService const*) { return true; }
- } // namespace service
-} // namespace edm
-
-#endif
diff --git a/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml b/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml
index 942a573f77515..4d33d5d73cb18 100644
--- a/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml
+++ b/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml
@@ -9,7 +9,8 @@
-
+
+
diff --git a/HeterogeneousCore/CUDAServices/plugins/CUDAMonitoringService.cc b/HeterogeneousCore/CUDAServices/plugins/CUDAMonitoringService.cc
index 6271b1cc0941b..f0a2a5e897a99 100644
--- a/HeterogeneousCore/CUDAServices/plugins/CUDAMonitoringService.cc
+++ b/HeterogeneousCore/CUDAServices/plugins/CUDAMonitoringService.cc
@@ -11,7 +11,7 @@
#include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/deviceAllocatorStatus.h"
@@ -37,10 +37,11 @@ class CUDAMonitoringService {
CUDAMonitoringService::CUDAMonitoringService(edm::ParameterSet const& config, edm::ActivityRegistry& registry) {
// make sure that CUDA is initialised, and that the CUDAService destructor is called after this service's destructor
- edm::Service cudaService;
- if (!cudaService->enabled())
+ edm::Service cuda;
+ if (not cuda or not cuda->enabled())
return;
- numberOfDevices_ = cudaService->numberOfDevices();
+
+ numberOfDevices_ = cuda->numberOfDevices();
if (config.getUntrackedParameter("memoryConstruction")) {
registry.watchPostModuleConstruction(this, &CUDAMonitoringService::postModuleConstruction);
diff --git a/HeterogeneousCore/CUDAServices/src/CUDAService.cc b/HeterogeneousCore/CUDAServices/plugins/CUDAService.cc
similarity index 90%
rename from HeterogeneousCore/CUDAServices/src/CUDAService.cc
rename to HeterogeneousCore/CUDAServices/plugins/CUDAService.cc
index 972bd14629d2f..c0dc04ba008b9 100644
--- a/HeterogeneousCore/CUDAServices/src/CUDAService.cc
+++ b/HeterogeneousCore/CUDAServices/plugins/CUDAService.cc
@@ -2,7 +2,9 @@
#include
#include
#include
+#include
#include
+#include
#include
#include
@@ -16,7 +18,7 @@
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/Utilities/interface/ResourceInformation.h"
#include "FWCore/Utilities/interface/ReusableObjectHolder.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/EventCache.h"
#include "HeterogeneousCore/CUDAUtilities/interface/StreamCache.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cachingAllocators.h"
@@ -26,6 +28,34 @@
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/nvmlCheck.h"
+class CUDAService : public CUDAInterface {
+public:
+ CUDAService(edm::ParameterSet const& config);
+ ~CUDAService() override;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+ bool enabled() const final { return enabled_; }
+
+ int numberOfDevices() const final { return numberOfDevices_; }
+
+ // Return the (major, minor) CUDA compute capability of the given device.
+ std::pair computeCapability(int device) const final {
+ int size = computeCapabilities_.size();
+ if (device < 0 or device >= size) {
+ throw std::out_of_range("Invalid device index" + std::to_string(device) + ": the valid range is from 0 to " +
+ std::to_string(size - 1));
+ }
+ return computeCapabilities_[device];
+ }
+
+private:
+ int numberOfDevices_ = 0;
+ std::vector> computeCapabilities_;
+ bool enabled_ = false;
+ bool verbose_ = false;
+};
+
void setCudaLimit(cudaLimit limit, const char* name, size_t request) {
// read the current device
int device;
@@ -91,6 +121,14 @@ constexpr unsigned int getCudaCoresPerSM(unsigned int major, unsigned int minor)
case 86: // SM 8.6: GA10x class
return 128;
+ // Ada Lovelace architectures
+ case 89: // SM 8.9: AD10x class
+ return 128;
+
+ // Hopper architecture
+ case 90: // SM 9.0: GH100 class
+ return 128;
+
// unknown architecture, return a default value
default:
return 64;
@@ -109,7 +147,7 @@ namespace {
auto streamPtr = cms::cuda::getStreamCache().get();
- std::vector > buffers;
+ std::vector> buffers;
buffers.reserve(bufferSizes.size());
for (auto size : bufferSizes) {
buffers.push_back(allocate(size, streamPtr.get()));
@@ -137,8 +175,7 @@ namespace {
/// Constructor
CUDAService::CUDAService(edm::ParameterSet const& config) : verbose_(config.getUntrackedParameter("verbose")) {
- bool configEnabled = config.getUntrackedParameter("enabled");
- if (not configEnabled) {
+ if (not config.getUntrackedParameter("enabled")) {
edm::LogInfo("CUDAService") << "CUDAService disabled by configuration";
return;
}
@@ -386,8 +423,8 @@ CUDAService::CUDAService(edm::ParameterSet const& config) : verbose_(config.getU
// Preallocate buffers if asked to
auto const& allocator = config.getUntrackedParameter("allocator");
- devicePreallocate(numberOfDevices_, allocator.getUntrackedParameter >("devicePreallocate"));
- hostPreallocate(allocator.getUntrackedParameter >("hostPreallocate"));
+ devicePreallocate(numberOfDevices_, allocator.getUntrackedParameter>("devicePreallocate"));
+ hostPreallocate(allocator.getUntrackedParameter>("hostPreallocate"));
}
CUDAService::~CUDAService() {
@@ -431,34 +468,21 @@ void CUDAService::fillDescriptions(edm::ConfigurationDescriptions& descriptions)
"the default value.");
edm::ParameterSetDescription allocator;
- allocator.addUntracked >("devicePreallocate", std::vector{})
+ allocator.addUntracked>("devicePreallocate", std::vector{})
->setComment("Preallocates buffers of given bytes on all devices");
- allocator.addUntracked >("hostPreallocate", std::vector{})
+ allocator.addUntracked>("hostPreallocate", std::vector{})
->setComment("Preallocates buffers of given bytes on the host");
desc.addUntracked("allocator", allocator);
descriptions.add("CUDAService", desc);
}
-int CUDAService::deviceWithMostFreeMemory() const {
- // save the current device
- int currentDevice;
- cudaCheck(cudaGetDevice(¤tDevice));
+namespace edm {
+ namespace service {
+ inline bool isProcessWideService(CUDAService const*) { return true; }
+ } // namespace service
+} // namespace edm
- size_t maxFreeMemory = 0;
- int device = -1;
- for (int i = 0; i < numberOfDevices_; ++i) {
- size_t freeMemory, totalMemory;
- cudaSetDevice(i);
- cudaMemGetInfo(&freeMemory, &totalMemory);
- edm::LogPrint("CUDAService") << "CUDA device " << i << ": " << freeMemory / (1 << 20) << " MB free / "
- << totalMemory / (1 << 20) << " MB total memory";
- if (freeMemory > maxFreeMemory) {
- maxFreeMemory = freeMemory;
- device = i;
- }
- }
- // restore the current device
- cudaCheck(cudaSetDevice(currentDevice));
- return device;
-}
+#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
+using CUDAServiceMaker = edm::serviceregistry::ParameterSetMaker;
+DEFINE_FWK_SERVICE_MAKER(CUDAService, CUDAServiceMaker);
diff --git a/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc b/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc
index 5cbf1819618b4..5e7db50a953f6 100644
--- a/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc
+++ b/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc
@@ -1,8 +1,3 @@
-// -*- C++ -*-
-//
-// Package: HeterogeneousCore/CUDAServices
-// Class : NVProfilerService
-
#include
#include
#include
@@ -40,7 +35,7 @@
#include "FWCore/Utilities/interface/Exception.h"
#include "FWCore/Utilities/interface/ProductKindOfType.h"
#include "FWCore/Utilities/interface/TimeOfDay.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HLTrigger/Timer/interface/ProcessCallGraph.h"
using namespace std::string_literals;
@@ -310,8 +305,10 @@ NVProfilerService::NVProfilerService(edm::ParameterSet const& config, edm::Activ
: highlightModules_(config.getUntrackedParameter>("highlightModules")),
showModulePrefetching_(config.getUntrackedParameter("showModulePrefetching")),
skipFirstEvent_(config.getUntrackedParameter("skipFirstEvent")) {
- // make sure that CUDA is initialised, and that the CUDAService destructor is called after this service's destructor
- edm::Service cudaService;
+ // make sure that CUDA is initialised, and that the CUDAInterface destructor is called after this service's destructor
+ edm::Service cuda;
+ if (not cuda or not cuda->enabled())
+ return;
std::sort(highlightModules_.begin(), highlightModules_.end());
diff --git a/HeterogeneousCore/CUDAServices/plugins/plugins.cc b/HeterogeneousCore/CUDAServices/plugins/plugins.cc
deleted file mode 100644
index 169bfd19195f6..0000000000000
--- a/HeterogeneousCore/CUDAServices/plugins/plugins.cc
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
-#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
-
-DEFINE_FWK_SERVICE_MAKER(CUDAService, edm::serviceregistry::ParameterSetMaker);
diff --git a/HeterogeneousCore/CUDAServices/src/numberOfDevices.cc b/HeterogeneousCore/CUDAServices/src/numberOfDevices.cc
index 4bc852f3a60c4..97e0e2b10843d 100644
--- a/HeterogeneousCore/CUDAServices/src/numberOfDevices.cc
+++ b/HeterogeneousCore/CUDAServices/src/numberOfDevices.cc
@@ -1,10 +1,10 @@
#include "HeterogeneousCore/CUDAServices/interface/numberOfDevices.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
namespace cms::cuda {
int numberOfDevices() {
- edm::Service cs;
- return cs->enabled() ? cs->numberOfDevices() : 0;
+ edm::Service cuda;
+ return (cuda and cuda->enabled()) ? cuda->numberOfDevices() : 0;
}
} // namespace cms::cuda
diff --git a/HeterogeneousCore/CUDAServices/test/BuildFile.xml b/HeterogeneousCore/CUDAServices/test/BuildFile.xml
index 99219eef7e6fd..707f469ff941d 100644
--- a/HeterogeneousCore/CUDAServices/test/BuildFile.xml
+++ b/HeterogeneousCore/CUDAServices/test/BuildFile.xml
@@ -1,12 +1,14 @@
+
+
+
+
+
+
+
+
-
-
+
-
-
-
-
-
diff --git a/HeterogeneousCore/CUDAServices/test/testCUDAService.cpp b/HeterogeneousCore/CUDAServices/test/testCUDAService.cpp
index 3565a5e6ad48f..5cda281e4fc0f 100644
--- a/HeterogeneousCore/CUDAServices/test/testCUDAService.cpp
+++ b/HeterogeneousCore/CUDAServices/test/testCUDAService.cpp
@@ -6,26 +6,49 @@
#include
-#include "catch.hpp"
+#include
+
+#include
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSetReader/interface/ParameterSetReader.h"
-#include "FWCore/PluginManager/interface/PluginManager.h"
-#include "FWCore/PluginManager/interface/standard.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
#include "FWCore/ServiceRegistry/interface/ServiceToken.h"
#include "FWCore/Utilities/interface/Exception.h"
#include "FWCore/Utilities/interface/ResourceInformation.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
namespace {
- CUDAService makeCUDAService(edm::ParameterSet ps) {
- auto desc = edm::ConfigurationDescriptions("Service", "CUDAService");
- CUDAService::fillDescriptions(desc);
- desc.validate(ps, "CUDAService");
- return CUDAService(ps);
+ std::string makeProcess(std::string const& name) {
+ return fmt::format(R"_(
+import FWCore.ParameterSet.Config as cms
+process = cms.Process('{}')
+)_",
+ name);
+ }
+
+ void addResourceInformationService(std::string& config) {
+ config += R"_(
+process.add_(cms.Service('ResourceInformationService'))
+ )_";
+ }
+
+ void addCUDAService(std::string& config, bool enabled = true) {
+ config += fmt::format(R"_(
+process.add_(cms.Service('CUDAService',
+ enabled = cms.untracked.bool({}),
+ verbose = cms.untracked.bool(True)
+))
+ )_",
+ enabled ? "True" : "False");
+ }
+
+ edm::ServiceToken getServiceToken(std::string const& config) {
+ std::unique_ptr params;
+ edm::makeParameterSets(config, params);
+ return edm::ServiceToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params)));
}
} // namespace
@@ -40,31 +63,27 @@ TEST_CASE("Tests of CUDAService", "[CUDAService]") {
<< ret << ") " << cudaGetErrorString(ret) << ". Running only tests not requiring devices.");
}
- // Make Service system available as CUDAService depends on ResourceInformationService
- std::vector psets;
- edm::ServiceToken serviceToken = edm::ServiceRegistry::createSet(psets);
+ std::string config = makeProcess("Test");
+ addCUDAService(config);
+ auto serviceToken = getServiceToken(config);
edm::ServiceRegistry::Operate operate(serviceToken);
- SECTION("CUDAService enabled") {
- edm::ParameterSet ps;
- ps.addUntrackedParameter("enabled", true);
- SECTION("Enabled only if there are CUDA capable GPUs") {
- auto cs = makeCUDAService(ps);
- if (deviceCount <= 0) {
- REQUIRE(cs.enabled() == false);
- WARN("CUDAService is disabled as there are no CUDA GPU devices");
- } else {
- REQUIRE(cs.enabled() == true);
- INFO("CUDAService is enabled");
- }
- }
-
+ SECTION("Enable the CUDAService only if there are CUDA capable GPUs") {
+ edm::Service cuda;
if (deviceCount <= 0) {
+ REQUIRE((not cuda or not cuda->enabled()));
+ WARN("CUDAService is not present, or disabled because there are no CUDA GPU devices");
return;
+ } else {
+ REQUIRE(cuda);
+ REQUIRE(cuda->enabled());
+ INFO("CUDAService is enabled");
}
+ }
- auto cs = makeCUDAService(ps);
+ SECTION("CUDAService enabled") {
int driverVersion = 0, runtimeVersion = 0;
+ edm::Service cuda;
ret = cudaDriverGetVersion(&driverVersion);
if (ret != cudaSuccess) {
FAIL("Unable to query the CUDA driver version from the CUDA runtime API: (" << ret << ") "
@@ -82,8 +101,8 @@ TEST_CASE("Tests of CUDAService", "[CUDAService]") {
// Test that the number of devices found by the service
// is the same as detected by the CUDA runtime API
- REQUIRE(cs.numberOfDevices() == deviceCount);
- WARN("Detected " << cs.numberOfDevices() << " CUDA Capable device(s)");
+ REQUIRE(cuda->numberOfDevices() == deviceCount);
+ WARN("Detected " << cuda->numberOfDevices() << " CUDA Capable device(s)");
// Test that the compute capabilities of each device
// are the same as detected by the CUDA runtime API
@@ -95,45 +114,25 @@ TEST_CASE("Tests of CUDAService", "[CUDAService]") {
<< cudaGetErrorString(ret));
}
- REQUIRE(deviceProp.major == cs.computeCapability(i).first);
- REQUIRE(deviceProp.minor == cs.computeCapability(i).second);
+ REQUIRE(deviceProp.major == cuda->computeCapability(i).first);
+ REQUIRE(deviceProp.minor == cuda->computeCapability(i).second);
INFO("Device " << i << ": " << deviceProp.name << "\n CUDA Capability Major/Minor version number: "
<< deviceProp.major << "." << deviceProp.minor);
}
}
- SECTION("CUDAService device free memory") {
- size_t mem = 0;
- int dev = -1;
- for (int i = 0; i < deviceCount; ++i) {
- size_t free, tot;
- cudaSetDevice(i);
- cudaMemGetInfo(&free, &tot);
- WARN("Device " << i << " memory total " << tot << " free " << free);
- if (free > mem) {
- mem = free;
- dev = i;
- }
- }
- WARN("Device with most free memory " << dev << "\n"
- << " as given by CUDAService " << cs.deviceWithMostFreeMemory());
- }
-
SECTION("With ResourceInformationService available") {
- edmplugin::PluginManager::configure(edmplugin::standard::config());
-
- std::string const config = R"_(import FWCore.ParameterSet.Config as cms
-process = cms.Process('Test')
-process.add_(cms.Service('ResourceInformationService'))
-)_";
- std::unique_ptr params;
- edm::makeParameterSets(config, params);
- edm::ServiceToken tempToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params)));
- edm::ServiceRegistry::Operate operate2(tempToken);
-
- auto cs = makeCUDAService(edm::ParameterSet{});
- REQUIRE(cs.enabled());
+ std::string config = makeProcess("Test");
+ addResourceInformationService(config);
+ addCUDAService(config);
+ auto serviceToken = getServiceToken(config);
+ edm::ServiceRegistry::Operate operate(serviceToken);
+
+ edm::Service cuda;
+ REQUIRE(cuda);
+ REQUIRE(cuda->enabled());
edm::Service ri;
+ REQUIRE(ri);
REQUIRE(ri->gpuModels().size() > 0);
REQUIRE(ri->nvidiaDriverVersion().size() > 0);
REQUIRE(ri->cudaDriverVersion() == driverVersion);
@@ -141,11 +140,14 @@ process.add_(cms.Service('ResourceInformationService'))
}
}
- SECTION("Force to be disabled") {
- edm::ParameterSet ps;
- ps.addUntrackedParameter("enabled", false);
- auto cs = makeCUDAService(ps);
- REQUIRE(cs.enabled() == false);
- REQUIRE(cs.numberOfDevices() == 0);
+ SECTION("CUDAService disabled") {
+ std::string config = makeProcess("Test");
+ addCUDAService(config, false);
+ auto serviceToken = getServiceToken(config);
+ edm::ServiceRegistry::Operate operate(serviceToken);
+
+ edm::Service cuda;
+ REQUIRE(cuda->enabled() == false);
+ REQUIRE(cuda->numberOfDevices() == 0);
}
}
diff --git a/HeterogeneousCore/CUDAServices/test/test_main.cpp b/HeterogeneousCore/CUDAServices/test/test_main.cpp
index 0c7c351f437f5..417b2599ee8c5 100644
--- a/HeterogeneousCore/CUDAServices/test/test_main.cpp
+++ b/HeterogeneousCore/CUDAServices/test/test_main.cpp
@@ -1,2 +1,16 @@
#define CATCH_CONFIG_MAIN
-#include "catch.hpp"
+#include
+
+#include "FWCore/PluginManager/interface/PluginManager.h"
+#include "FWCore/PluginManager/interface/standard.h"
+
+class ServiceRegistryListener : public Catch::TestEventListenerBase {
+public:
+ using Catch::TestEventListenerBase::TestEventListenerBase; // inherit constructor
+
+ void testRunStarting(Catch::TestRunInfo const& testRunInfo) override {
+ edmplugin::PluginManager::configure(edmplugin::standard::config());
+ }
+};
+
+CATCH_REGISTER_LISTENER(ServiceRegistryListener);
diff --git a/HeterogeneousCore/CUDATest/plugins/TestCUDAAnalyzerGPU.cc b/HeterogeneousCore/CUDATest/plugins/TestCUDAAnalyzerGPU.cc
index 2778ed02f3ac6..09d85f6c1c47d 100644
--- a/HeterogeneousCore/CUDATest/plugins/TestCUDAAnalyzerGPU.cc
+++ b/HeterogeneousCore/CUDATest/plugins/TestCUDAAnalyzerGPU.cc
@@ -8,7 +8,7 @@
#include "CUDADataFormats/Common/interface/Product.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDATest/interface/Thing.h"
#include "HeterogeneousCore/CUDAUtilities/interface/StreamCache.h"
@@ -38,8 +38,8 @@ TestCUDAAnalyzerGPU::TestCUDAAnalyzerGPU(edm::ParameterSet const& iConfig)
srcToken_(consumes>(iConfig.getParameter("src"))),
minValue_(iConfig.getParameter("minValue")),
maxValue_(iConfig.getParameter("maxValue")) {
- edm::Service cs;
- if (cs->enabled()) {
+ edm::Service cuda;
+ if (cuda and cuda->enabled()) {
auto streamPtr = cms::cuda::getStreamCache().get();
gpuAlgo_ = std::make_unique(streamPtr.get());
}
diff --git a/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEW.cc b/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEW.cc
index 9b6fe85636026..b8b3f9058d496 100644
--- a/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEW.cc
+++ b/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEW.cc
@@ -10,7 +10,7 @@
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDACore/interface/ContextState.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDATest/interface/Thing.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
@@ -42,8 +42,8 @@ TestCUDAProducerGPUEW::TestCUDAProducerGPUEW(edm::ParameterSet const& iConfig)
: label_{iConfig.getParameter("@module_label")},
srcToken_{consumes>(iConfig.getParameter("src"))},
dstToken_{produces>()} {
- edm::Service cs;
- if (cs->enabled()) {
+ edm::Service cuda;
+ if (cuda and cuda->enabled()) {
hostData_ = cms::cuda::make_host_noncached_unique();
}
}
diff --git a/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEWTask.cc b/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEWTask.cc
index d1e4f94a30d96..80135880bd324 100644
--- a/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEWTask.cc
+++ b/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEWTask.cc
@@ -13,7 +13,7 @@
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDACore/interface/ContextState.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDATest/interface/Thing.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
@@ -49,8 +49,8 @@ TestCUDAProducerGPUEWTask::TestCUDAProducerGPUEWTask(edm::ParameterSet const& iC
: label_{iConfig.getParameter("@module_label")},
srcToken_{consumes>(iConfig.getParameter("src"))},
dstToken_{produces>()} {
- edm::Service cs;
- if (cs->enabled()) {
+ edm::Service cuda;
+ if (cuda and cuda->enabled()) {
hostData_ = cms::cuda::make_host_noncached_unique();
}
}
diff --git a/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled.cpp b/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled.cpp
index b8847568f44e2..2af114d179a0c 100644
--- a/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled.cpp
+++ b/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled.cpp
@@ -4,23 +4,28 @@
// CUDA headers
#include
+// CMSSW headers
+#include "HeterogeneousCore/Common/interface/PlatformStatus.h"
+
// local headers
#include "isCudaDeviceSupported.h"
-// returns EXIT_SUCCESS if at least one visible CUDA device can be used, or EXIT_FAILURE otherwise
+// returns PlatformStatus::Success if at least one visible CUDA device can be used,
+// or different failure codes depending on the problem.
int main() {
int devices = 0;
auto status = cudaGetDeviceCount(&devices);
if (status != cudaSuccess) {
- return EXIT_FAILURE;
+ // could not initialise the CUDA runtime
+ return PlatformStatus::RuntimeNotAvailable;
}
// check that at least one visible CUDA device can be used
for (int i = 0; i < devices; ++i) {
if (isCudaDeviceSupported(i))
- return EXIT_SUCCESS;
+ return PlatformStatus::Success;
}
- // no visible usable devices
- return EXIT_FAILURE;
+ // could not find any usable CUDA devices
+ return PlatformStatus::DevicesNotAvailable;
}
diff --git a/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled_fallback.cpp b/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled_fallback.cpp
index 92785c12af592..ba8d547a22442 100644
--- a/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled_fallback.cpp
+++ b/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled_fallback.cpp
@@ -1,5 +1,10 @@
// C/C++ headers
#include
-// always returns EXIT_FAILURE
-int main() { return EXIT_FAILURE; }
+// CMSSW headers
+#include "HeterogeneousCore/Common/interface/PlatformStatus.h"
+
+int main() {
+ // CUDA is not available on this architecture, OS and compiler combination
+ return PlatformStatus::PlatformNotAvailable;
+}
diff --git a/HeterogeneousCore/Common/interface/PlatformStatus.h b/HeterogeneousCore/Common/interface/PlatformStatus.h
new file mode 100644
index 0000000000000..49b39a67b2290
--- /dev/null
+++ b/HeterogeneousCore/Common/interface/PlatformStatus.h
@@ -0,0 +1,13 @@
+#ifndef HeterogeneousCore_Common_interface_PlatformStatus_h
+#define HeterogeneousCore_Common_interface_PlatformStatus_h
+
+// Please note: these values must be kept in sync with HeterogeneousCore/Common/python/PlatformStatus.py
+
+enum PlatformStatus : int {
+ Success = 0,
+ PlatformNotAvailable = 1, // the platform is not available for this architecture, OS or compiler
+ RuntimeNotAvailable = 2, // the runtime could not be initialised
+ DevicesNotAvailable = 3, // there are no visible, usable devices
+};
+
+#endif // HeterogeneousCore_Common_interface_PlatformStatus_h
diff --git a/HeterogeneousCore/Common/python/PlatformStatus.py b/HeterogeneousCore/Common/python/PlatformStatus.py
new file mode 100644
index 0000000000000..17d69f5e752c4
--- /dev/null
+++ b/HeterogeneousCore/Common/python/PlatformStatus.py
@@ -0,0 +1,9 @@
+import enum
+
+# Please note: these values must be kept in sync with HeterogeneousCore/Common/interface/PlatformStatus.h
+
+class PlatformStatus(enum.IntEnum):
+ Success = 0
+ PlatformNotAvailable = 1 # the platform is not available for this architecture, OS or compiler
+ RuntimeNotAvailable = 2 # the runtime could not be initialised
+ DevicesNotAvailable = 3 # there are no visible, usable devices
diff --git a/HeterogeneousCore/ROCmCore/python/ProcessAcceleratorROCm.py b/HeterogeneousCore/ROCmCore/python/ProcessAcceleratorROCm.py
new file mode 100644
index 0000000000000..81121f3610e06
--- /dev/null
+++ b/HeterogeneousCore/ROCmCore/python/ProcessAcceleratorROCm.py
@@ -0,0 +1,45 @@
+import FWCore.ParameterSet.Config as cms
+
+import os
+
+from HeterogeneousCore.Common.PlatformStatus import PlatformStatus
+
+class ProcessAcceleratorROCm(cms.ProcessAccelerator):
+ def __init__(self):
+ super(ProcessAcceleratorROCm, self).__init__()
+ self._label = "gpu-amd"
+
+ def labels(self):
+ return [ self._label ]
+
+ def enabledLabels(self):
+ # Check if ROCm is available, and if the system has at least one usable device.
+ # These should be checked on each worker node, because it depends both
+ # on the architecture and on the actual hardware present in the machine.
+ status = PlatformStatus(os.waitstatus_to_exitcode(os.system("rocmIsEnabled")))
+ return self.labels() if status == PlatformStatus.Success else []
+
+ def apply(self, process, accelerators):
+
+ if self._label in accelerators:
+ # Ensure that the ROCmService is loaded
+ if not hasattr(process, "ROCmService"):
+ from HeterogeneousCore.ROCmServices.ROCmService_cfi import ROCmService
+ process.add_(ROCmService)
+
+ # Propagate the ROCmService messages through the MessageLogger
+ if not hasattr(process.MessageLogger, "ROCmService"):
+ process.MessageLogger.ROCmService = cms.untracked.PSet()
+
+ else:
+ # Make sure the ROCmService is not loaded
+ if hasattr(process, "ROCmService"):
+ del process.ROCmService
+
+ # Drop the ROCmService messages from the MessageLogger
+ if hasattr(process.MessageLogger, "ROCmService"):
+ del process.MessageLogger.ROCmService
+
+
+# Ensure this module is kept in the configuration when dumping it
+cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorROCm, "from HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm import ProcessAcceleratorROCm")
diff --git a/HeterogeneousCore/ROCmCore/python/ProcessAcceleratorROCm_cfi.py b/HeterogeneousCore/ROCmCore/python/ProcessAcceleratorROCm_cfi.py
new file mode 100644
index 0000000000000..4d6fa00fc84d6
--- /dev/null
+++ b/HeterogeneousCore/ROCmCore/python/ProcessAcceleratorROCm_cfi.py
@@ -0,0 +1,3 @@
+from HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm import ProcessAcceleratorROCm as _ProcessAcceleratorROCm
+
+ProcessAcceleratorROCm = _ProcessAcceleratorROCm()
diff --git a/HeterogeneousCore/ROCmServices/BuildFile.xml b/HeterogeneousCore/ROCmServices/BuildFile.xml
index 0ff47a94f4ebc..061859fab7928 100644
--- a/HeterogeneousCore/ROCmServices/BuildFile.xml
+++ b/HeterogeneousCore/ROCmServices/BuildFile.xml
@@ -1,11 +1,5 @@
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
diff --git a/HeterogeneousCore/ROCmServices/interface/ROCmInterface.h b/HeterogeneousCore/ROCmServices/interface/ROCmInterface.h
new file mode 100644
index 0000000000000..b7e20b1dd081c
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/interface/ROCmInterface.h
@@ -0,0 +1,19 @@
+#ifndef HeterogeneousCore_ROCmServices_interface_ROCmInterface_h
+#define HeterogeneousCore_ROCmServices_interface_ROCmInterface_h
+
+#include
+
+class ROCmInterface {
+public:
+ ROCmInterface() = default;
+ virtual ~ROCmInterface() = default;
+
+ virtual bool enabled() const = 0;
+
+ virtual int numberOfDevices() const = 0;
+
+ // Returns the (major, minor) compute capability of the given device.
+ virtual std::pair computeCapability(int device) const = 0;
+};
+
+#endif // HeterogeneousCore_ROCmServices_interface_ROCmInterface_h
diff --git a/HeterogeneousCore/ROCmServices/interface/ROCmService.h b/HeterogeneousCore/ROCmServices/interface/ROCmService.h
deleted file mode 100644
index c78ec27f51d80..0000000000000
--- a/HeterogeneousCore/ROCmServices/interface/ROCmService.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef HeterogeneousCore_ROCmServices_interface_ROCmService_h
-#define HeterogeneousCore_ROCmServices_interface_ROCmService_h
-
-#include
-#include
-
-#include "FWCore/Utilities/interface/StreamID.h"
-
-namespace edm {
- class ParameterSet;
- class ActivityRegistry;
- class ConfigurationDescriptions;
-} // namespace edm
-
-class ROCmService {
-public:
- ROCmService(edm::ParameterSet const& config);
- ~ROCmService();
-
- static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
-
- bool enabled() const { return enabled_; }
-
- int numberOfDevices() const { return numberOfDevices_; }
-
- // major, minor
- std::pair computeCapability(int device) const { return computeCapabilities_.at(device); }
-
- // Returns the id of device with most free memory. If none is found, returns -1.
- int deviceWithMostFreeMemory() const;
-
-private:
- int numberOfDevices_ = 0;
- std::vector> computeCapabilities_;
- bool enabled_ = false;
- bool verbose_ = false;
-};
-
-namespace edm {
- namespace service {
- inline bool isProcessWideService(ROCmService const*) { return true; }
- } // namespace service
-} // namespace edm
-
-#endif // HeterogeneousCore_ROCmServices_interface_ROCmService_h
diff --git a/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml b/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml
index 42f9e3024fc2f..11220ef4c4fce 100644
--- a/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml
+++ b/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml
@@ -4,9 +4,10 @@
+
-
+
diff --git a/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc b/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc
index 3bd0f2448f1b4..ff6588292c06f 100644
--- a/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc
+++ b/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc
@@ -11,7 +11,7 @@
#include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
-#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
namespace edm {
@@ -36,10 +36,11 @@ class ROCmMonitoringService {
ROCmMonitoringService::ROCmMonitoringService(edm::ParameterSet const& config, edm::ActivityRegistry& registry) {
// make sure that ROCm is initialised, and that the ROCmService destructor is called after this service's destructor
- edm::Service rocmService;
- if (!rocmService->enabled())
+ edm::Service service;
+ if (not service or not service->enabled())
return;
- numberOfDevices_ = rocmService->numberOfDevices();
+
+ numberOfDevices_ = service->numberOfDevices();
if (config.getUntrackedParameter("memoryConstruction")) {
registry.watchPostModuleConstruction(this, &ROCmMonitoringService::postModuleConstruction);
diff --git a/HeterogeneousCore/ROCmServices/src/ROCmService.cc b/HeterogeneousCore/ROCmServices/plugins/ROCmService.cc
similarity index 91%
rename from HeterogeneousCore/ROCmServices/src/ROCmService.cc
rename to HeterogeneousCore/ROCmServices/plugins/ROCmService.cc
index 2cabaed127d99..d8e598e8cad15 100644
--- a/HeterogeneousCore/ROCmServices/src/ROCmService.cc
+++ b/HeterogeneousCore/ROCmServices/plugins/ROCmService.cc
@@ -16,12 +16,40 @@
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/Utilities/interface/ResourceInformation.h"
-#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
/*
#include "HeterogeneousCore/ROCmUtilities/interface/nvmlCheck.h"
*/
+class ROCmService : public ROCmInterface {
+public:
+ ROCmService(edm::ParameterSet const& config);
+ ~ROCmService() override;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+ bool enabled() const final { return enabled_; }
+
+ int numberOfDevices() const final { return numberOfDevices_; }
+
+ // Return the (major, minor) compute capability of the given device.
+ std::pair computeCapability(int device) const final {
+ int size = computeCapabilities_.size();
+ if (device < 0 or device >= size) {
+ throw std::out_of_range("Invalid device index" + std::to_string(device) + ": the valid range is from 0 to " +
+ std::to_string(size - 1));
+ }
+ return computeCapabilities_[device];
+ }
+
+private:
+ int numberOfDevices_ = 0;
+ std::vector> computeCapabilities_;
+ bool enabled_ = false;
+ bool verbose_ = false;
+};
+
void setHipLimit(hipLimit_t limit, const char* name, size_t request) {
// read the current device
int device;
@@ -50,8 +78,7 @@ std::string decodeVersion(int version) {
/// Constructor
ROCmService::ROCmService(edm::ParameterSet const& config) : verbose_(config.getUntrackedParameter("verbose")) {
- bool configEnabled = config.getUntrackedParameter("enabled");
- if (not configEnabled) {
+ if (not config.getUntrackedParameter("enabled")) {
edm::LogInfo("ROCmService") << "ROCmService disabled by configuration";
return;
}
@@ -358,25 +385,12 @@ void ROCmService::fillDescriptions(edm::ConfigurationDescriptions& descriptions)
descriptions.add("ROCmService", desc);
}
-int ROCmService::deviceWithMostFreeMemory() const {
- // save the current device
- int currentDevice;
- hipCheck(hipGetDevice(¤tDevice));
+namespace edm {
+ namespace service {
+ inline bool isProcessWideService(ROCmService const*) { return true; }
+ } // namespace service
+} // namespace edm
- size_t maxFreeMemory = 0;
- int device = -1;
- for (int i = 0; i < numberOfDevices_; ++i) {
- size_t freeMemory, totalMemory;
- hipCheck(hipSetDevice(i));
- hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
- edm::LogPrint("ROCmService") << "ROCm device " << i << ": " << freeMemory / (1 << 20) << " MB free / "
- << totalMemory / (1 << 20) << " MB total memory";
- if (freeMemory > maxFreeMemory) {
- maxFreeMemory = freeMemory;
- device = i;
- }
- }
- // restore the current device
- hipCheck(hipSetDevice(currentDevice));
- return device;
-}
+#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
+using ROCmServiceMaker = edm::serviceregistry::ParameterSetMaker;
+DEFINE_FWK_SERVICE_MAKER(ROCmService, ROCmServiceMaker);
diff --git a/HeterogeneousCore/ROCmServices/plugins/plugins.cc b/HeterogeneousCore/ROCmServices/plugins/plugins.cc
deleted file mode 100644
index a418eeced333f..0000000000000
--- a/HeterogeneousCore/ROCmServices/plugins/plugins.cc
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
-#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
-
-DEFINE_FWK_SERVICE_MAKER(ROCmService, edm::serviceregistry::ParameterSetMaker);
diff --git a/HeterogeneousCore/ROCmServices/test/BuildFile.xml b/HeterogeneousCore/ROCmServices/test/BuildFile.xml
index 7fbe8d1931848..65d9bf58ca565 100644
--- a/HeterogeneousCore/ROCmServices/test/BuildFile.xml
+++ b/HeterogeneousCore/ROCmServices/test/BuildFile.xml
@@ -1,12 +1,14 @@
+
+
+
+
+
+
+
+
-
-
+
-
-
-
-
-
diff --git a/HeterogeneousCore/ROCmServices/test/testROCmService.cpp b/HeterogeneousCore/ROCmServices/test/testROCmService.cpp
index 06b2c90c6db8b..343b8150f2f6d 100644
--- a/HeterogeneousCore/ROCmServices/test/testROCmService.cpp
+++ b/HeterogeneousCore/ROCmServices/test/testROCmService.cpp
@@ -6,28 +6,49 @@
#include
-#define CATCH_CONFIG_MAIN
-#include "catch.hpp"
+#include
+
+#include
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSetReader/interface/ParameterSetReader.h"
-#include "FWCore/PluginManager/interface/PluginManager.h"
-#include "FWCore/PluginManager/interface/standard.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
#include "FWCore/ServiceRegistry/interface/ServiceToken.h"
#include "FWCore/Utilities/interface/Exception.h"
#include "FWCore/Utilities/interface/ResourceInformation.h"
-#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
-#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
namespace {
- ROCmService makeROCmService(edm::ParameterSet ps) {
- auto desc = edm::ConfigurationDescriptions("Service", "ROCmService");
- ROCmService::fillDescriptions(desc);
- desc.validate(ps, "ROCmService");
- return ROCmService(ps);
+ std::string makeProcess(std::string const& name) {
+ return fmt::format(R"_(
+import FWCore.ParameterSet.Config as cms
+process = cms.Process('{}')
+)_",
+ name);
+ }
+
+ void addResourceInformationService(std::string& config) {
+ config += R"_(
+process.add_(cms.Service('ResourceInformationService'))
+ )_";
+ }
+
+ void addROCmService(std::string& config, bool enabled = true) {
+ config += fmt::format(R"_(
+process.add_(cms.Service('ROCmService',
+ enabled = cms.untracked.bool({}),
+ verbose = cms.untracked.bool(True)
+))
+ )_",
+ enabled ? "True" : "False");
+ }
+
+ edm::ServiceToken getServiceToken(std::string const& config) {
+ std::unique_ptr params;
+ edm::makeParameterSets(config, params);
+ return edm::ServiceToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params)));
}
} // namespace
@@ -42,31 +63,27 @@ TEST_CASE("Tests of ROCmService", "[ROCmService]") {
<< ret << ") " << hipGetErrorString(ret) << ". Running only tests not requiring devices.");
}
- // Make Service system available as ROCmService depends on ResourceInformationService
- std::vector psets;
- edm::ServiceToken serviceToken = edm::ServiceRegistry::createSet(psets);
+ std::string config = makeProcess("Test");
+ addROCmService(config);
+ auto serviceToken = getServiceToken(config);
edm::ServiceRegistry::Operate operate(serviceToken);
- SECTION("ROCmService enabled") {
- edm::ParameterSet ps;
- ps.addUntrackedParameter("enabled", true);
- SECTION("Enabled only if there are ROCm capable GPUs") {
- auto cs = makeROCmService(ps);
- if (deviceCount <= 0) {
- REQUIRE(cs.enabled() == false);
- WARN("ROCmService is disabled as there are no ROCm GPU devices");
- } else {
- REQUIRE(cs.enabled() == true);
- INFO("ROCmService is enabled");
- }
- }
-
+ SECTION("Enable the ROCmService only if there are ROCm capable GPUs") {
+ edm::Service service;
if (deviceCount <= 0) {
+ REQUIRE((not service or not service->enabled()));
+ WARN("ROCmService is not present, or disabled because there are no ROCm GPU devices");
return;
+ } else {
+ REQUIRE(service);
+ REQUIRE(service->enabled());
+ INFO("ROCmService is enabled");
}
+ }
- auto cs = makeROCmService(ps);
+ SECTION("ROCmService enabled") {
int driverVersion = 0, runtimeVersion = 0;
+ edm::Service service;
ret = hipDriverGetVersion(&driverVersion);
if (ret != hipSuccess) {
FAIL("Unable to query the ROCm driver version from the ROCm runtime API: (" << ret << ") "
@@ -84,8 +101,8 @@ TEST_CASE("Tests of ROCmService", "[ROCmService]") {
// Test that the number of devices found by the service
// is the same as detected by the ROCm runtime API
- REQUIRE(cs.numberOfDevices() == deviceCount);
- WARN("Detected " << cs.numberOfDevices() << " ROCm Capable device(s)");
+ REQUIRE(service->numberOfDevices() == deviceCount);
+ WARN("Detected " << service->numberOfDevices() << " ROCm Capable device(s)");
// Test that the compute capabilities of each device
// are the same as detected by the ROCm runtime API
@@ -97,59 +114,41 @@ TEST_CASE("Tests of ROCmService", "[ROCmService]") {
<< hipGetErrorString(ret));
}
- REQUIRE(deviceProp.major == cs.computeCapability(i).first);
- REQUIRE(deviceProp.minor == cs.computeCapability(i).second);
+ REQUIRE(deviceProp.major == service->computeCapability(i).first);
+ REQUIRE(deviceProp.minor == service->computeCapability(i).second);
INFO("Device " << i << ": " << deviceProp.name << "\n ROCm Capability Major/Minor version number: "
<< deviceProp.major << "." << deviceProp.minor);
}
}
- SECTION("ROCmService device free memory") {
- size_t mem = 0;
- int dev = -1;
- for (int i = 0; i < deviceCount; ++i) {
- size_t free, tot;
- REQUIRE_NOTHROW(hipCheck(hipSetDevice(i)));
- REQUIRE_NOTHROW(hipCheck(hipMemGetInfo(&free, &tot)));
- WARN("Device " << i << " memory total " << tot << " free " << free);
- if (free > mem) {
- mem = free;
- dev = i;
- }
- }
- WARN("Device with most free memory " << dev << "\n"
- << " as given by ROCmService " << cs.deviceWithMostFreeMemory());
- }
-
SECTION("With ResourceInformationService available") {
- edmplugin::PluginManager::configure(edmplugin::standard::config());
-
- std::string const config = R"_(import FWCore.ParameterSet.Config as cms
-process = cms.Process('Test')
-process.add_(cms.Service('ResourceInformationService'))
-)_";
- std::unique_ptr params;
- edm::makeParameterSets(config, params);
- edm::ServiceToken tempToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params)));
- edm::ServiceRegistry::Operate operate2(tempToken);
-
- auto cs = makeROCmService(edm::ParameterSet{});
- REQUIRE(cs.enabled());
+ std::string config = makeProcess("Test");
+ addResourceInformationService(config);
+ addROCmService(config);
+ auto serviceToken = getServiceToken(config);
+ edm::ServiceRegistry::Operate operate(serviceToken);
+
+ edm::Service service;
+ REQUIRE(service);
+ REQUIRE(service->enabled());
edm::Service ri;
REQUIRE(ri->gpuModels().size() > 0);
/*
- REQUIRE(ri->nvidiaDriverVersion().size() > 0);
- REQUIRE(ri->cudaDriverVersion() == driverVersion);
- REQUIRE(ri->cudaRuntimeVersion() == runtimeVersion);
+ REQUIRE(ri->amdDriverVersion().size() > 0);
+ REQUIRE(ri->rocmDriverVersion() == driverVersion);
+ REQUIRE(ri->rocmRuntimeVersion() == runtimeVersion);
*/
}
}
SECTION("Force to be disabled") {
- edm::ParameterSet ps;
- ps.addUntrackedParameter("enabled", false);
- auto cs = makeROCmService(ps);
- REQUIRE(cs.enabled() == false);
- REQUIRE(cs.numberOfDevices() == 0);
+ std::string config = makeProcess("Test");
+ addROCmService(config, false);
+ auto serviceToken = getServiceToken(config);
+ edm::ServiceRegistry::Operate operate(serviceToken);
+
+ edm::Service service;
+ REQUIRE(service->enabled() == false);
+ REQUIRE(service->numberOfDevices() == 0);
}
}
diff --git a/HeterogeneousCore/ROCmServices/test/test_main.cpp b/HeterogeneousCore/ROCmServices/test/test_main.cpp
new file mode 100644
index 0000000000000..417b2599ee8c5
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/test/test_main.cpp
@@ -0,0 +1,16 @@
+#define CATCH_CONFIG_MAIN
+#include
+
+#include "FWCore/PluginManager/interface/PluginManager.h"
+#include "FWCore/PluginManager/interface/standard.h"
+
+class ServiceRegistryListener : public Catch::TestEventListenerBase {
+public:
+ using Catch::TestEventListenerBase::TestEventListenerBase; // inherit constructor
+
+ void testRunStarting(Catch::TestRunInfo const& testRunInfo) override {
+ edmplugin::PluginManager::configure(edmplugin::standard::config());
+ }
+};
+
+CATCH_REGISTER_LISTENER(ServiceRegistryListener);
diff --git a/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled.cpp b/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled.cpp
index 9fb97efbcc745..15a66b2e0d36d 100644
--- a/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled.cpp
+++ b/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled.cpp
@@ -4,23 +4,28 @@
// ROCm headers
#include
+// CMSSW headers
+#include "HeterogeneousCore/Common/interface/PlatformStatus.h"
+
// local headers
#include "isRocmDeviceSupported.h"
-// returns EXIT_SUCCESS if at least one visible ROCm device can be used, or EXIT_FAILURE otherwise
+// returns PlatformStatus::Success if at least one visible ROCm device can be used,
+// or different failure codes depending on the problem.
int main() {
int devices = 0;
auto status = hipGetDeviceCount(&devices);
if (status != hipSuccess) {
- return EXIT_FAILURE;
+ // could not initialise the ROCm runtime
+ return PlatformStatus::RuntimeNotAvailable;
}
// check that at least one visible ROCm device can be used
for (int i = 0; i < devices; ++i) {
if (isRocmDeviceSupported(i))
- return EXIT_SUCCESS;
+ return PlatformStatus::Success;
}
- // no visible usable devices
- return EXIT_FAILURE;
+ // no usable ROCm devices were found
+ return PlatformStatus::DevicesNotAvailable;
}
diff --git a/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled_fallback.cpp b/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled_fallback.cpp
index 92785c12af592..6da07162a0bf6 100644
--- a/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled_fallback.cpp
+++ b/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled_fallback.cpp
@@ -1,5 +1,10 @@
// C/C++ headers
#include
-// always returns EXIT_FAILURE
-int main() { return EXIT_FAILURE; }
+// CMSSW headers
+#include "HeterogeneousCore/Common/interface/PlatformStatus.h"
+
+int main() {
+ // ROCm is not available on this architecture, OS and compiler combination
+ return PlatformStatus::PlatformNotAvailable;
+}
diff --git a/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc b/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc
index 8e10f2f50499e..c5d7f7ac272be 100644
--- a/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc
+++ b/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc
@@ -13,7 +13,7 @@
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "CUDATestDeviceAdditionAlgo.h"
@@ -42,9 +42,9 @@ void CUDATestDeviceAdditionModule::fillDescriptions(edm::ConfigurationDescriptio
void CUDATestDeviceAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
// require CUDA for running
- edm::Service cs;
- if (not cs->enabled()) {
- std::cout << "The CUDAService is disabled, the test will be skipped.\n";
+ edm::Service cuda;
+ if (not cuda or not cuda->enabled()) {
+ std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n";
return;
}
diff --git a/HeterogeneousTest/CUDADevice/test/testCUDATestDeviceAdditionModule.py b/HeterogeneousTest/CUDADevice/test/testCUDATestDeviceAdditionModule.py
index 60dc51b9e9733..2d5c232f73e95 100644
--- a/HeterogeneousTest/CUDADevice/test/testCUDATestDeviceAdditionModule.py
+++ b/HeterogeneousTest/CUDADevice/test/testCUDATestDeviceAdditionModule.py
@@ -1,11 +1,10 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('TestCUDATestDeviceAdditionModule')
+process.load('HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi')
process.source = cms.Source('EmptySource')
-process.CUDAService = cms.Service('CUDAService')
-
process.cudaTestDeviceAdditionModule = cms.EDAnalyzer('CUDATestDeviceAdditionModule',
size = cms.uint32( 1024*1024 )
)
diff --git a/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc b/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc
index de7eba987698e..666e9acd537ca 100644
--- a/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc
+++ b/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc
@@ -13,7 +13,7 @@
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "CUDATestKernelAdditionAlgo.h"
@@ -42,9 +42,9 @@ void CUDATestKernelAdditionModule::fillDescriptions(edm::ConfigurationDescriptio
void CUDATestKernelAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
// require CUDA for running
- edm::Service cs;
- if (not cs->enabled()) {
- std::cout << "The CUDAService is disabled, the test will be skipped.\n";
+ edm::Service service;
+ if (not service or not service->enabled()) {
+ std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n";
return;
}
diff --git a/HeterogeneousTest/CUDAKernel/test/testCUDATestKernelAdditionModule.py b/HeterogeneousTest/CUDAKernel/test/testCUDATestKernelAdditionModule.py
index 229d40231681e..708a1c6a7d7ba 100644
--- a/HeterogeneousTest/CUDAKernel/test/testCUDATestKernelAdditionModule.py
+++ b/HeterogeneousTest/CUDAKernel/test/testCUDATestKernelAdditionModule.py
@@ -1,11 +1,10 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('TestCUDATestKernelAdditionModule')
+process.load('HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi')
process.source = cms.Source('EmptySource')
-process.CUDAService = cms.Service('CUDAService')
-
process.cudaTestKernelAdditionModule = cms.EDAnalyzer('CUDATestKernelAdditionModule',
size = cms.uint32( 1024*1024 )
)
diff --git a/HeterogeneousTest/CUDAOpaque/plugins/CUDATestOpaqueAdditionModule.cc b/HeterogeneousTest/CUDAOpaque/plugins/CUDATestOpaqueAdditionModule.cc
index a4dabe7060155..bf60b97b48eb9 100644
--- a/HeterogeneousTest/CUDAOpaque/plugins/CUDATestOpaqueAdditionModule.cc
+++ b/HeterogeneousTest/CUDAOpaque/plugins/CUDATestOpaqueAdditionModule.cc
@@ -11,7 +11,7 @@
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousTest/CUDAOpaque/interface/DeviceAdditionOpaque.h"
class CUDATestOpaqueAdditionModule : public edm::global::EDAnalyzer<> {
@@ -38,9 +38,9 @@ void CUDATestOpaqueAdditionModule::fillDescriptions(edm::ConfigurationDescriptio
void CUDATestOpaqueAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
// require CUDA for running
- edm::Service cs;
- if (not cs->enabled()) {
- std::cout << "The CUDAService is disabled, the test will be skipped.\n";
+ edm::Service cuda;
+ if (not cuda or not cuda->enabled()) {
+ std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n";
return;
}
diff --git a/HeterogeneousTest/CUDAOpaque/test/testCUDATestAdditionModules.py b/HeterogeneousTest/CUDAOpaque/test/testCUDATestAdditionModules.py
index 60d0183823b0f..16eb7c9f248f4 100644
--- a/HeterogeneousTest/CUDAOpaque/test/testCUDATestAdditionModules.py
+++ b/HeterogeneousTest/CUDAOpaque/test/testCUDATestAdditionModules.py
@@ -1,11 +1,10 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('TestCUDATestOpaqueAdditionModule')
+process.load('HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi')
process.source = cms.Source('EmptySource')
-process.CUDAService = cms.Service('CUDAService')
-
process.cudaTestDeviceAdditionModule = cms.EDAnalyzer('CUDATestDeviceAdditionModule',
size = cms.uint32( 1024*1024 )
)
diff --git a/HeterogeneousTest/CUDAOpaque/test/testCUDATestOpaqueAdditionModule.py b/HeterogeneousTest/CUDAOpaque/test/testCUDATestOpaqueAdditionModule.py
index 244e283ec7c3e..1f54cfd812252 100644
--- a/HeterogeneousTest/CUDAOpaque/test/testCUDATestOpaqueAdditionModule.py
+++ b/HeterogeneousTest/CUDAOpaque/test/testCUDATestOpaqueAdditionModule.py
@@ -1,11 +1,10 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('TestCUDATestOpaqueAdditionModule')
+process.load('HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi')
process.source = cms.Source('EmptySource')
-process.CUDAService = cms.Service('CUDAService')
-
process.cudaTestOpaqueAdditionModule = cms.EDAnalyzer('CUDATestOpaqueAdditionModule',
size = cms.uint32( 1024*1024 )
)
diff --git a/HeterogeneousTest/CUDAWrapper/plugins/CUDATestWrapperAdditionModule.cc b/HeterogeneousTest/CUDAWrapper/plugins/CUDATestWrapperAdditionModule.cc
index 46b1cd2d3761e..fb70aba2474d4 100644
--- a/HeterogeneousTest/CUDAWrapper/plugins/CUDATestWrapperAdditionModule.cc
+++ b/HeterogeneousTest/CUDAWrapper/plugins/CUDATestWrapperAdditionModule.cc
@@ -13,7 +13,7 @@
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousTest/CUDAWrapper/interface/DeviceAdditionWrapper.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
@@ -43,9 +43,9 @@ void CUDATestWrapperAdditionModule::analyze(edm::StreamID,
edm::Event const& event,
edm::EventSetup const& setup) const {
// require CUDA for running
- edm::Service cs;
- if (not cs->enabled()) {
- std::cout << "The CUDAService is disabled, the test will be skipped.\n";
+ edm::Service cuda;
+ if (not cuda or not cuda->enabled()) {
+ std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n";
return;
}
diff --git a/HeterogeneousTest/CUDAWrapper/test/testCUDATestWrapperAdditionModule.py b/HeterogeneousTest/CUDAWrapper/test/testCUDATestWrapperAdditionModule.py
index b3e37de34a951..af5e9dfb5f708 100644
--- a/HeterogeneousTest/CUDAWrapper/test/testCUDATestWrapperAdditionModule.py
+++ b/HeterogeneousTest/CUDAWrapper/test/testCUDATestWrapperAdditionModule.py
@@ -1,11 +1,10 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('TestCUDATestWrapperAdditionModule')
+process.load('HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi')
process.source = cms.Source('EmptySource')
-process.CUDAService = cms.Service('CUDAService')
-
process.cudaTestWrapperAdditionModule = cms.EDAnalyzer('CUDATestWrapperAdditionModule',
size = cms.uint32( 1024*1024 )
)
diff --git a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc
index 7cb12d3b0ce70..bf46ae35da8bf 100644
--- a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc
+++ b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc
@@ -13,7 +13,7 @@
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
-#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
#include "ROCmTestDeviceAdditionAlgo.h"
@@ -42,9 +42,9 @@ void ROCmTestDeviceAdditionModule::fillDescriptions(edm::ConfigurationDescriptio
void ROCmTestDeviceAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
// require ROCm for running
- edm::Service cs;
- if (not cs->enabled()) {
- std::cout << "The ROCmService is disabled, the test will be skipped.\n";
+ edm::Service rocm;
+ if (not rocm or not rocm->enabled()) {
+ std::cout << "The ROCmService is not available or disabled, the test will be skipped.\n";
return;
}
diff --git a/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py b/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py
index 5e31e902452f0..1c93e667741d6 100644
--- a/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py
+++ b/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py
@@ -1,11 +1,10 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('TestROCmTestDeviceAdditionModule')
+process.load('HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi')
process.source = cms.Source('EmptySource')
-process.ROCmService = cms.Service('ROCmService')
-
process.rocmTestDeviceAdditionModule = cms.EDAnalyzer('ROCmTestDeviceAdditionModule',
size = cms.uint32( 1024*1024 )
)
diff --git a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc
index cab3415e4551d..c33e42e3c49b0 100644
--- a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc
+++ b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc
@@ -13,7 +13,7 @@
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
-#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
#include "ROCmTestKernelAdditionAlgo.h"
@@ -42,9 +42,9 @@ void ROCmTestKernelAdditionModule::fillDescriptions(edm::ConfigurationDescriptio
void ROCmTestKernelAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
// require ROCm for running
- edm::Service cs;
- if (not cs->enabled()) {
- std::cout << "The ROCmService is disabled, the test will be skipped.\n";
+ edm::Service rocm;
+ if (not rocm or not rocm->enabled()) {
+ std::cout << "The ROCmService is not available or disabled, the test will be skipped.\n";
return;
}
diff --git a/HeterogeneousTest/ROCmKernel/test/testROCmTestKernelAdditionModule.py b/HeterogeneousTest/ROCmKernel/test/testROCmTestKernelAdditionModule.py
index b05991338da3b..fb6b54564bbea 100644
--- a/HeterogeneousTest/ROCmKernel/test/testROCmTestKernelAdditionModule.py
+++ b/HeterogeneousTest/ROCmKernel/test/testROCmTestKernelAdditionModule.py
@@ -1,11 +1,10 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('TestROCmTestKernelAdditionModule')
+process.load('HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi')
process.source = cms.Source('EmptySource')
-process.ROCmService = cms.Service('ROCmService')
-
process.rocmTestKernelAdditionModule = cms.EDAnalyzer('ROCmTestKernelAdditionModule',
size = cms.uint32( 1024*1024 )
)
diff --git a/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc b/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc
index 901b6eac51122..e3315fa0ff0e4 100644
--- a/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc
+++ b/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc
@@ -11,7 +11,7 @@
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
-#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
#include "HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h"
class ROCmTestOpaqueAdditionModule : public edm::global::EDAnalyzer<> {
@@ -38,9 +38,9 @@ void ROCmTestOpaqueAdditionModule::fillDescriptions(edm::ConfigurationDescriptio
void ROCmTestOpaqueAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
// require ROCm for running
- edm::Service cs;
- if (not cs->enabled()) {
- std::cout << "The ROCmService is disabled, the test will be skipped.\n";
+ edm::Service rocm;
+ if (not rocm or not rocm->enabled()) {
+ std::cout << "The ROCmService is not available or disabled, the test will be skipped.\n";
return;
}
diff --git a/HeterogeneousTest/ROCmOpaque/test/testROCmTestAdditionModules.py b/HeterogeneousTest/ROCmOpaque/test/testROCmTestAdditionModules.py
index 2ae6853a8e7ee..a2156489b7c21 100644
--- a/HeterogeneousTest/ROCmOpaque/test/testROCmTestAdditionModules.py
+++ b/HeterogeneousTest/ROCmOpaque/test/testROCmTestAdditionModules.py
@@ -1,11 +1,10 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('TestROCmTestOpaqueAdditionModule')
+process.load('HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi')
process.source = cms.Source('EmptySource')
-process.ROCmService = cms.Service('ROCmService')
-
process.rocmTestDeviceAdditionModule = cms.EDAnalyzer('ROCmTestDeviceAdditionModule',
size = cms.uint32( 1024*1024 )
)
diff --git a/HeterogeneousTest/ROCmOpaque/test/testROCmTestOpaqueAdditionModule.py b/HeterogeneousTest/ROCmOpaque/test/testROCmTestOpaqueAdditionModule.py
index 05c4bf20d3f17..0d6f67183620d 100644
--- a/HeterogeneousTest/ROCmOpaque/test/testROCmTestOpaqueAdditionModule.py
+++ b/HeterogeneousTest/ROCmOpaque/test/testROCmTestOpaqueAdditionModule.py
@@ -1,11 +1,10 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('TestROCmTestOpaqueAdditionModule')
+process.load('HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi')
process.source = cms.Source('EmptySource')
-process.ROCmService = cms.Service('ROCmService')
-
process.rocmTestOpaqueAdditionModule = cms.EDAnalyzer('ROCmTestOpaqueAdditionModule',
size = cms.uint32( 1024*1024 )
)
diff --git a/HeterogeneousTest/ROCmWrapper/plugins/ROCmTestWrapperAdditionModule.cc b/HeterogeneousTest/ROCmWrapper/plugins/ROCmTestWrapperAdditionModule.cc
index 48b2b9dc91a20..010e9f9dad618 100644
--- a/HeterogeneousTest/ROCmWrapper/plugins/ROCmTestWrapperAdditionModule.cc
+++ b/HeterogeneousTest/ROCmWrapper/plugins/ROCmTestWrapperAdditionModule.cc
@@ -13,7 +13,7 @@
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
-#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
#include "HeterogeneousTest/ROCmWrapper/interface/DeviceAdditionWrapper.h"
#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
@@ -43,9 +43,9 @@ void ROCmTestWrapperAdditionModule::analyze(edm::StreamID,
edm::Event const& event,
edm::EventSetup const& setup) const {
// require ROCm for running
- edm::Service cs;
- if (not cs->enabled()) {
- std::cout << "The ROCmService is disabled, the test will be skipped.\n";
+ edm::Service rocm;
+ if (not rocm or not rocm->enabled()) {
+ std::cout << "The ROCmService is not available or disabled, the test will be skipped.\n";
return;
}
diff --git a/HeterogeneousTest/ROCmWrapper/test/testROCmTestWrapperAdditionModule.py b/HeterogeneousTest/ROCmWrapper/test/testROCmTestWrapperAdditionModule.py
index b493b484ed82a..56ca14ed90f8b 100644
--- a/HeterogeneousTest/ROCmWrapper/test/testROCmTestWrapperAdditionModule.py
+++ b/HeterogeneousTest/ROCmWrapper/test/testROCmTestWrapperAdditionModule.py
@@ -1,11 +1,10 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('TestROCmTestWrapperAdditionModule')
+process.load('HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi')
process.source = cms.Source('EmptySource')
-process.ROCmService = cms.Service('ROCmService')
-
process.rocmTestWrapperAdditionModule = cms.EDAnalyzer('ROCmTestWrapperAdditionModule',
size = cms.uint32( 1024*1024 )
)
diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc
index c08d27c4ad196..9edf3ad0087b1 100644
--- a/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc
+++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc
@@ -27,7 +27,7 @@
#include "FWCore/Utilities/interface/ESGetToken.h"
#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "EcalRecHitBuilderKernels.h"
diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/EERecHitGPUtoSoA.cc b/RecoLocalCalo/HGCalRecProducers/plugins/EERecHitGPUtoSoA.cc
index 4ef1d4530722d..d73b1fbb82777 100644
--- a/RecoLocalCalo/HGCalRecProducers/plugins/EERecHitGPUtoSoA.cc
+++ b/RecoLocalCalo/HGCalRecProducers/plugins/EERecHitGPUtoSoA.cc
@@ -22,7 +22,7 @@
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDACore/interface/ContextState.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HEBRecHitGPUtoSoA.cc b/RecoLocalCalo/HGCalRecProducers/plugins/HEBRecHitGPUtoSoA.cc
index 2322128fb09a3..01415c7834807 100644
--- a/RecoLocalCalo/HGCalRecProducers/plugins/HEBRecHitGPUtoSoA.cc
+++ b/RecoLocalCalo/HGCalRecProducers/plugins/HEBRecHitGPUtoSoA.cc
@@ -22,7 +22,7 @@
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDACore/interface/ContextState.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HEFRecHitGPUtoSoA.cc b/RecoLocalCalo/HGCalRecProducers/plugins/HEFRecHitGPUtoSoA.cc
index 16a252fb33e48..b26800a5aff52 100644
--- a/RecoLocalCalo/HGCalRecProducers/plugins/HEFRecHitGPUtoSoA.cc
+++ b/RecoLocalCalo/HGCalRecProducers/plugins/HEFRecHitGPUtoSoA.cc
@@ -22,7 +22,7 @@
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDACore/interface/ContextState.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
diff --git a/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc b/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc
index b598947423adf..193e5b8bba0f7 100644
--- a/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc
+++ b/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc
@@ -6,7 +6,7 @@
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "SimpleAlgoGPU.h"
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc
index 6f83a8b414485..9e19c5ec5ff15 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc
@@ -34,7 +34,7 @@
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "RecoTracker/Record/interface/CkfComponentsRecord.h"
// local includes
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc
index 76cc641d365c5..fd6109958f8ef 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc
@@ -34,7 +34,7 @@
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "RecoTracker/Record/interface/CkfComponentsRecord.h"
// local includes
diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc
index 97530a9567e22..6765703f35a73 100644
--- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc
+++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc
@@ -22,7 +22,7 @@
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/Utilities/interface/EDMException.h"
#include "FWCore/Utilities/interface/isFinite.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "TrackingTools/DetLayers/interface/BarrelDetLayer.h"
#include "CAHitNtupletGeneratorOnGPU.h"
@@ -246,8 +246,8 @@ template
void CAHitNtupletGeneratorOnGPU::beginJob() {
if (m_params.onGPU_) {
// allocate pinned host memory only if CUDA is available
- edm::Service cs;
- if (cs and cs->enabled()) {
+ edm::Service cuda;
+ if (cuda and cuda->enabled()) {
cudaCheck(cudaMalloc(&m_counters, sizeof(Counters)));
cudaCheck(cudaMemset(m_counters, 0, sizeof(Counters)));
}
@@ -261,8 +261,8 @@ template
void CAHitNtupletGeneratorOnGPU::endJob() {
if (m_params.onGPU_) {
// print the gpu statistics and free pinned host memory only if CUDA is available
- edm::Service cs;
- if (cs and cs->enabled()) {
+ edm::Service cuda;
+ if (cuda and cuda->enabled()) {
if (m_params.doStats_) {
// crash on multi-gpu processes
CAHitNtupletGeneratorKernelsGPU::printCounters(m_counters);
diff --git a/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc b/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc
index 8b0de1c739076..a62c6efb5abdb 100644
--- a/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc
+++ b/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc
@@ -12,7 +12,7 @@
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
+#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
@@ -48,8 +48,8 @@ class BeamSpotToCUDA : public edm::global::EDProducer beginStream(edm::StreamID) const override {
- edm::Service cs;
- if (cs->enabled()) {
+ edm::Service cuda;
+ if (cuda and cuda->enabled()) {
return std::make_unique();
} else {
return nullptr;