diff --git a/RecoParticleFlow/Configuration/test/run_HBHEandPF_onCPUandGPU.py b/RecoParticleFlow/Configuration/test/run_HBHEandPF_onCPUandGPU.py index 46f5d040bf175..f6664469eb45f 100644 --- a/RecoParticleFlow/Configuration/test/run_HBHEandPF_onCPUandGPU.py +++ b/RecoParticleFlow/Configuration/test/run_HBHEandPF_onCPUandGPU.py @@ -110,6 +110,9 @@ process.load("RecoParticleFlow.PFClusterProducer.pfhbheRecHitParamsGPUESProducer_cfi") process.load("RecoParticleFlow.PFClusterProducer.pfhbheTopologyGPUESProducer_cfi") +from RecoParticleFlow.PFClusterProducer.pfClusteringParamsGPUESSource_cfi import pfClusteringParamsGPUESSource as _pfClusteringParamsGPUESSource +process.pfClusteringParamsGPUESSource = _pfClusteringParamsGPUESSource.clone() + # Automatic addition of the customisation function from HLTrigger.Configuration.customizeHLTforPatatrack #from HLTrigger.Configuration.customizeHLTforPatatrack import customizeHLTforPatatrack, customiseCommon, customiseHcalLocalReconstruction diff --git a/RecoParticleFlow/Configuration/test/testPFOnGPUvsCPU_HBHE.sh b/RecoParticleFlow/Configuration/test/testPFOnGPUvsCPU_HBHE.sh new file mode 100755 index 0000000000000..38994be6c0457 --- /dev/null +++ b/RecoParticleFlow/Configuration/test/testPFOnGPUvsCPU_HBHE.sh @@ -0,0 +1,7 @@ +#!/bin/bash -e + +[ ! -z "${CMSSW_BASE}" ] || exit 1 + +cmsRun "${CMSSW_BASE}"/src/RecoParticleFlow/Configuration/test/run_HBHEandPF_onCPUandGPU.py +cmsRun "${CMSSW_BASE}"/src/Validation/RecoParticleFlow/test/run_DQM_forGPU_HLT.py +cmsRun "${CMSSW_BASE}"/src/Validation/RecoParticleFlow/test/run_HARVESTING_forGPU_HLT.py &> /dev/null diff --git a/RecoParticleFlow/PFClusterProducer/BuildFile.xml b/RecoParticleFlow/PFClusterProducer/BuildFile.xml index 06ef529ae273b..c2684b2034de4 100644 --- a/RecoParticleFlow/PFClusterProducer/BuildFile.xml +++ b/RecoParticleFlow/PFClusterProducer/BuildFile.xml @@ -15,6 +15,11 @@ + + + + + diff --git a/RecoParticleFlow/PFClusterProducer/interface/PFClusteringParamsGPU.h b/RecoParticleFlow/PFClusterProducer/interface/PFClusteringParamsGPU.h new file mode 100644 index 0000000000000..ce15c79b976b2 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/interface/PFClusteringParamsGPU.h @@ -0,0 +1,84 @@ +#ifndef RecoParticleFlow_PFClusterProducer_PFClusteringParamsGPU_h +#define RecoParticleFlow_PFClusterProducer_PFClusteringParamsGPU_h + +#include + +#include "FWCore/Utilities/interface/propagate_const.h" +#include "FWCore/Utilities/interface/propagate_const_array.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" +#include "CUDADataFormats/Common/interface/PortableHostCollection.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif + +namespace edm { + class ParameterSet; + class ParameterSetDescription; +} + +class PFClusteringParamsGPU { +public: + GENERATE_SOA_LAYOUT(ProductSoALayout, + SOA_SCALAR(int32_t, nNeigh), + SOA_SCALAR(float, seedPt2ThresholdEB), + SOA_SCALAR(float, seedPt2ThresholdEE), + SOA_COLUMN(float, seedEThresholdEB_vec), + SOA_COLUMN(float, seedEThresholdEE_vec), + SOA_COLUMN(float, topoEThresholdEB_vec), + SOA_COLUMN(float, topoEThresholdEE_vec), + SOA_SCALAR(float, showerSigma2), + SOA_SCALAR(float, minFracToKeep), + SOA_SCALAR(float, minFracTot), + SOA_SCALAR(uint32_t, maxIterations), + SOA_SCALAR(bool, excludeOtherSeeds), + SOA_SCALAR(float, stoppingTolerance), + SOA_SCALAR(float, minFracInCalc), + SOA_SCALAR(float, minAllowedNormalization), + SOA_COLUMN(float, recHitEnergyNormInvEB_vec), + SOA_COLUMN(float, recHitEnergyNormInvEE_vec), + SOA_SCALAR(float, barrelTimeResConsts_corrTermLowE), + SOA_SCALAR(float, barrelTimeResConsts_threshLowE), + SOA_SCALAR(float, barrelTimeResConsts_noiseTerm), + SOA_SCALAR(float, barrelTimeResConsts_constantTermLowE2), + SOA_SCALAR(float, barrelTimeResConsts_noiseTermLowE), + SOA_SCALAR(float, barrelTimeResConsts_threshHighE), + SOA_SCALAR(float, barrelTimeResConsts_constantTerm2), + SOA_SCALAR(float, barrelTimeResConsts_resHighE2), + SOA_SCALAR(float, endcapTimeResConsts_corrTermLowE), + SOA_SCALAR(float, endcapTimeResConsts_threshLowE), + SOA_SCALAR(float, endcapTimeResConsts_noiseTerm), + SOA_SCALAR(float, endcapTimeResConsts_constantTermLowE2), + SOA_SCALAR(float, endcapTimeResConsts_noiseTermLowE), + SOA_SCALAR(float, endcapTimeResConsts_threshHighE), + SOA_SCALAR(float, endcapTimeResConsts_constantTerm2), + SOA_SCALAR(float, endcapTimeResConsts_resHighE2)) + + using HostProduct = cms::cuda::PortableHostCollection>; + using DeviceProduct = cms::cuda::PortableDeviceCollection>; + +#ifndef __CUDACC__ + PFClusteringParamsGPU(edm::ParameterSet const&); + ~PFClusteringParamsGPU() = default; + + static void fillDescription(edm::ParameterSetDescription& psetDesc); + + DeviceProduct const& getProduct(cudaStream_t) const; + +private: + constexpr static uint32_t kMaxDepth_barrel = 4; + constexpr static uint32_t kMaxDepth_endcap = 7; + + void setParameterValues(edm::ParameterSet const& iConfig); + + HostProduct params_; + cms::cuda::ESProduct product_; +#endif +}; + +#endif // RecoParticleFlow_PFClusterProducer_PFClusteringParamsGPU_h diff --git a/RecoParticleFlow/PFClusterProducer/plugins/BuildFile.xml b/RecoParticleFlow/PFClusterProducer/plugins/BuildFile.xml index 08ec67b03792c..c77de92af7f90 100644 --- a/RecoParticleFlow/PFClusterProducer/plugins/BuildFile.xml +++ b/RecoParticleFlow/PFClusterProducer/plugins/BuildFile.xml @@ -1,4 +1,11 @@ - + + + + + + + + diff --git a/RecoParticleFlow/PFClusterProducer/plugins/PFClusterCudaHCAL.cu b/RecoParticleFlow/PFClusterProducer/plugins/PFClusterCudaHCAL.cu index a863ad11c7cda..ecf545b6725a9 100644 --- a/RecoParticleFlow/PFClusterProducer/plugins/PFClusterCudaHCAL.cu +++ b/RecoParticleFlow/PFClusterProducer/plugins/PFClusterCudaHCAL.cu @@ -15,19 +15,12 @@ using PFClustering::common::PFLayer; -// Uncomment for debugging -//#define DEBUG_GPU_HCAL - constexpr const float PI_F = 3.141592654f; -namespace PFClusterCudaHCAL { - __constant__ PFClustering::common::CudaHCALConstants constantsHCAL_d; - - __constant__ int nNT = 8; // Number of neighbors considered for topo clustering - - //int nTopoLoops = 100; - int nTopoLoops = 35; +// Number of neighbors considered for topo clustering +constexpr const int nNT = 8; +namespace PFClusterCudaHCAL { // // --- kernel summary -- // initializeCudaConstants @@ -68,56 +61,50 @@ namespace PFClusterCudaHCAL { // passingTopoThreshold // printRhfIndex - void initializeCudaConstants(const PFClustering::common::CudaHCALConstants& cudaConstants, - const cudaStream_t cudaStream) { - cudaCheck(cudaMemcpyToSymbolAsync( - constantsHCAL_d, &cudaConstants, sizeof(cudaConstants), 0, cudaMemcpyHostToDevice, cudaStream)); - } - - __device__ __forceinline__ float timeResolution2Endcap(const float energy) { + __device__ __forceinline__ float timeResolution2Endcap(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, const float energy) { float res2 = 10000.; if (energy <= 0.) return res2; - else if (energy < constantsHCAL_d.endcapTimeResConsts.threshLowE) { - if (constantsHCAL_d.endcapTimeResConsts.corrTermLowE > 0.) { // different parametrisation - const float res = constantsHCAL_d.endcapTimeResConsts.noiseTermLowE / energy + - constantsHCAL_d.endcapTimeResConsts.corrTermLowE / (energy * energy); + else if (energy < pfClusParams.endcapTimeResConsts_threshLowE()) { + if (pfClusParams.endcapTimeResConsts_corrTermLowE() > 0.) { // different parametrisation + const float res = pfClusParams.endcapTimeResConsts_noiseTermLowE() / energy + + pfClusParams.endcapTimeResConsts_corrTermLowE() / (energy * energy); res2 = res * res; } else { - const float noiseDivE = constantsHCAL_d.endcapTimeResConsts.noiseTermLowE / energy; - res2 = noiseDivE * noiseDivE + constantsHCAL_d.endcapTimeResConsts.constantTermLowE2; + const float noiseDivE = pfClusParams.endcapTimeResConsts_noiseTermLowE() / energy; + res2 = noiseDivE * noiseDivE + pfClusParams.endcapTimeResConsts_constantTermLowE2(); } - } else if (energy < constantsHCAL_d.endcapTimeResConsts.threshHighE) { - const float noiseDivE = constantsHCAL_d.endcapTimeResConsts.noiseTerm / energy; - res2 = noiseDivE * noiseDivE + constantsHCAL_d.endcapTimeResConsts.constantTerm2; + } else if (energy < pfClusParams.endcapTimeResConsts_threshHighE()) { + const float noiseDivE = pfClusParams.endcapTimeResConsts_noiseTerm() / energy; + res2 = noiseDivE * noiseDivE + pfClusParams.endcapTimeResConsts_constantTerm2(); } else // if (energy >=threshHighE_) - res2 = constantsHCAL_d.endcapTimeResConsts.resHighE2; + res2 = pfClusParams.endcapTimeResConsts_resHighE2(); if (res2 > 10000.) return 10000.; return res2; } - __device__ __forceinline__ float timeResolution2Barrel(const float energy) { + __device__ __forceinline__ float timeResolution2Barrel(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, const float energy) { float res2 = 10000.; if (energy <= 0.) return res2; - else if (energy < constantsHCAL_d.barrelTimeResConsts.threshLowE) { - if (constantsHCAL_d.barrelTimeResConsts.corrTermLowE > 0.) { // different parametrisation - const float res = constantsHCAL_d.barrelTimeResConsts.noiseTermLowE / energy + - constantsHCAL_d.barrelTimeResConsts.corrTermLowE / (energy * energy); + else if (energy < pfClusParams.barrelTimeResConsts_threshLowE()) { + if (pfClusParams.barrelTimeResConsts_corrTermLowE() > 0.) { // different parametrisation + const float res = pfClusParams.barrelTimeResConsts_noiseTermLowE() / energy + + pfClusParams.barrelTimeResConsts_corrTermLowE() / (energy * energy); res2 = res * res; } else { - const float noiseDivE = constantsHCAL_d.barrelTimeResConsts.noiseTermLowE / energy; - res2 = noiseDivE * noiseDivE + constantsHCAL_d.barrelTimeResConsts.constantTermLowE2; + const float noiseDivE = pfClusParams.barrelTimeResConsts_noiseTermLowE() / energy; + res2 = noiseDivE * noiseDivE + pfClusParams.barrelTimeResConsts_constantTermLowE2(); } - } else if (energy < constantsHCAL_d.barrelTimeResConsts.threshHighE) { - const float noiseDivE = constantsHCAL_d.barrelTimeResConsts.noiseTerm / energy; - res2 = noiseDivE * noiseDivE + constantsHCAL_d.barrelTimeResConsts.constantTerm2; + } else if (energy < pfClusParams.barrelTimeResConsts_threshHighE()) { + const float noiseDivE = pfClusParams.barrelTimeResConsts_noiseTerm() / energy; + res2 = noiseDivE * noiseDivE + pfClusParams.barrelTimeResConsts_constantTerm2(); } else // if (energy >=threshHighE_) - res2 = constantsHCAL_d.barrelTimeResConsts.resHighE2; + res2 = pfClusParams.barrelTimeResConsts_resHighE2(); if (res2 > 10000.) return 10000.; @@ -163,7 +150,8 @@ namespace PFClusterCudaHCAL { return __int_as_float(ret); } - __global__ void seedingTopoThreshKernel_HCAL(size_t size, + __global__ void seedingTopoThreshKernel_HCAL(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t size, const float* __restrict__ pfrh_energy, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, @@ -205,10 +193,10 @@ namespace PFClusterCudaHCAL { float pt2 = energy * energy * (pos.x * pos.x + pos.y * pos.y) / (pos.x * pos.x + pos.y * pos.y + pos.z * pos.z); // Seeding threshold test - if ((layer == PFLayer::HCAL_BARREL1 && energy > constantsHCAL_d.seedEThresholdEB_vec[depthOffset] && - pt2 > constantsHCAL_d.seedPt2ThresholdEB) || - (layer == PFLayer::HCAL_ENDCAP && energy > constantsHCAL_d.seedEThresholdEE_vec[depthOffset] && - pt2 > constantsHCAL_d.seedPt2ThresholdEE)) { + if ((layer == PFLayer::HCAL_BARREL1 && energy > pfClusParams.seedEThresholdEB_vec()[depthOffset] && + pt2 > pfClusParams.seedPt2ThresholdEB()) || + (layer == PFLayer::HCAL_ENDCAP && energy > pfClusParams.seedEThresholdEE_vec()[depthOffset] && + pt2 > pfClusParams.seedPt2ThresholdEE())) { pfrh_isSeed[i] = 1; for (int k = 0; k < 4; k++) { if (neigh4_Ind[8 * i + k] < 0) @@ -219,9 +207,9 @@ namespace PFClusterCudaHCAL { break; } } - // for(int k=0; k constantsHCAL_d.topoEThresholdEE_vec[depthOffset]) || - (layer == PFLayer::HCAL_BARREL1 && energy > constantsHCAL_d.topoEThresholdEB_vec[depthOffset])) { + if ((layer == PFLayer::HCAL_ENDCAP && energy > pfClusParams.topoEThresholdEE_vec()[depthOffset]) || + (layer == PFLayer::HCAL_BARREL1 && energy > pfClusParams.topoEThresholdEB_vec()[depthOffset])) { pfrh_passTopoThresh[i] = true; } //else { pfrh_passTopoThresh[i] = false; } @@ -244,7 +232,8 @@ namespace PFClusterCudaHCAL { } } } - __global__ void seedingKernel_HCAL(size_t size, + __global__ void seedingKernel_HCAL(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t size, const float* __restrict__ pfrh_energy, const float* __restrict__ pfrh_pt2, int* pfrh_isSeed, @@ -256,16 +245,16 @@ namespace PFClusterCudaHCAL { if (i < size) { if ((pfrh_layer[i] == PFLayer::HCAL_BARREL1 && - pfrh_energy[i] > constantsHCAL_d.seedEThresholdEB_vec[pfrh_depth[i] - 1] && - pfrh_pt2[i] > constantsHCAL_d.seedPt2ThresholdEB) || + pfrh_energy[i] > pfClusParams.seedEThresholdEB_vec()[pfrh_depth[i] - 1] && + pfrh_pt2[i] > pfClusParams.seedPt2ThresholdEB()) || (pfrh_layer[i] == PFLayer::HCAL_ENDCAP && - pfrh_energy[i] > constantsHCAL_d.seedEThresholdEE_vec[pfrh_depth[i] - 1] && - pfrh_pt2[i] > constantsHCAL_d.seedPt2ThresholdEE)) { + pfrh_energy[i] > pfClusParams.seedEThresholdEE_vec()[pfrh_depth[i] - 1] && + pfrh_pt2[i] > pfClusParams.seedPt2ThresholdEE())) { pfrh_isSeed[i] = 1; - for (int k = 0; k < constantsHCAL_d.nNeigh; k++) { - if (neigh4_Ind[constantsHCAL_d.nNeigh * i + k] < 0) + for (int k = 0; k < pfClusParams.nNeigh(); k++) { + if (neigh4_Ind[pfClusParams.nNeigh() * i + k] < 0) continue; - if (pfrh_energy[i] < pfrh_energy[neigh4_Ind[constantsHCAL_d.nNeigh * i + k]]) { + if (pfrh_energy[i] < pfrh_energy[neigh4_Ind[pfClusParams.nNeigh() * i + k]]) { pfrh_isSeed[i] = 0; //pfrh_topoId[i]=-1; break; @@ -278,7 +267,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void seedingKernel_HCAL_serialize(size_t size, + __global__ void seedingKernel_HCAL_serialize(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t size, const float* __restrict__ pfrh_energy, const float* __restrict__ pfrh_pt2, int* pfrh_isSeed, @@ -290,16 +280,16 @@ namespace PFClusterCudaHCAL { for (int i = 0; i < size; i++) { if (i < size) { if ((pfrh_layer[i] == PFLayer::HCAL_BARREL1 && - pfrh_energy[i] > constantsHCAL_d.seedEThresholdEB_vec[pfrh_depth[i] - 1] && - pfrh_pt2[i] > constantsHCAL_d.seedPt2ThresholdEB) || + pfrh_energy[i] > pfClusParams.seedEThresholdEB_vec()[pfrh_depth[i] - 1] && + pfrh_pt2[i] > pfClusParams.seedPt2ThresholdEB()) || (pfrh_layer[i] == PFLayer::HCAL_ENDCAP && - pfrh_energy[i] > constantsHCAL_d.seedEThresholdEE_vec[pfrh_depth[i] - 1] && - pfrh_pt2[i] > constantsHCAL_d.seedPt2ThresholdEE)) { + pfrh_energy[i] > pfClusParams.seedEThresholdEE_vec()[pfrh_depth[i] - 1] && + pfrh_pt2[i] > pfClusParams.seedPt2ThresholdEE())) { pfrh_isSeed[i] = 1; - for (int k = 0; k < constantsHCAL_d.nNeigh; k++) { - if (neigh4_Ind[constantsHCAL_d.nNeigh * i + k] < 0) + for (int k = 0; k < pfClusParams.nNeigh(); k++) { + if (neigh4_Ind[pfClusParams.nNeigh() * i + k] < 0) continue; - if (pfrh_energy[i] < pfrh_energy[neigh4_Ind[constantsHCAL_d.nNeigh * i + k]]) { + if (pfrh_energy[i] < pfrh_energy[neigh4_Ind[pfClusParams.nNeigh() * i + k]]) { pfrh_isSeed[i] = 0; //pfrh_topoId[i]=-1; break; @@ -335,7 +325,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void topoKernel_HCALV2(size_t size, + __global__ void topoKernel_HCALV2(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t size, const float* __restrict__ pfrh_energy, int* pfrh_topoId, const int* __restrict__ pfrh_layer, @@ -350,14 +341,14 @@ namespace PFClusterCudaHCAL { while (neigh8_Ind[nNT * l + k] > -1 && pfrh_topoId[l] != pfrh_topoId[neigh8_Ind[nNT * l + k]] && ((pfrh_layer[neigh8_Ind[nNT * l + k]] == PFLayer::HCAL_ENDCAP && pfrh_energy[neigh8_Ind[nNT * l + k]] > - constantsHCAL_d.topoEThresholdEE_vec[pfrh_depth[neigh8_Ind[nNT * l + k]] - 1]) || + pfClusParams.topoEThresholdEE_vec()[pfrh_depth[neigh8_Ind[nNT * l + k]] - 1]) || (pfrh_layer[neigh8_Ind[nNT * l + k]] == PFLayer::HCAL_BARREL1 && pfrh_energy[neigh8_Ind[nNT * l + k]] > - constantsHCAL_d.topoEThresholdEB_vec[pfrh_depth[neigh8_Ind[nNT * l + k]] - 1])) && + pfClusParams.topoEThresholdEB_vec()[pfrh_depth[neigh8_Ind[nNT * l + k]] - 1])) && ((pfrh_layer[l] == PFLayer::HCAL_ENDCAP && - pfrh_energy[l] > constantsHCAL_d.topoEThresholdEE_vec[pfrh_depth[l] - 1]) || + pfrh_energy[l] > pfClusParams.topoEThresholdEE_vec()[pfrh_depth[l] - 1]) || (pfrh_layer[l] == PFLayer::HCAL_BARREL1 && - pfrh_energy[l] > constantsHCAL_d.topoEThresholdEB_vec[pfrh_depth[l] - 1]))) { + pfrh_energy[l] > pfClusParams.topoEThresholdEB_vec()[pfrh_depth[l] - 1]))) { if (pfrh_topoId[l] > pfrh_topoId[neigh8_Ind[nNT * l + k]]) { atomicMax(&pfrh_topoId[neigh8_Ind[nNT * l + k]], pfrh_topoId[l]); } @@ -368,7 +359,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void topoKernel_HCAL_serialize(size_t size, + __global__ void topoKernel_HCAL_serialize(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t size, const float* __restrict__ pfrh_energy, int* pfrh_topoId, const int* __restrict__ pfrh_layer, @@ -383,14 +375,14 @@ namespace PFClusterCudaHCAL { while (neigh8_Ind[nNT * l + k] > -1 && pfrh_topoId[l] != pfrh_topoId[neigh8_Ind[nNT * l + k]] && ((pfrh_layer[neigh8_Ind[nNT * l + k]] == PFLayer::HCAL_ENDCAP && pfrh_energy[neigh8_Ind[nNT * l + k]] > - constantsHCAL_d.topoEThresholdEE_vec[pfrh_depth[neigh8_Ind[nNT * l + k]] - 1]) || + pfClusParams.topoEThresholdEE_vec()[pfrh_depth[neigh8_Ind[nNT * l + k]] - 1]) || (pfrh_layer[neigh8_Ind[nNT * l + k]] == PFLayer::HCAL_BARREL1 && pfrh_energy[neigh8_Ind[nNT * l + k]] > - constantsHCAL_d.topoEThresholdEB_vec[pfrh_depth[neigh8_Ind[nNT * l + k]] - 1])) && + pfClusParams.topoEThresholdEB_vec()[pfrh_depth[neigh8_Ind[nNT * l + k]] - 1])) && ((pfrh_layer[l] == PFLayer::HCAL_ENDCAP && - pfrh_energy[l] > constantsHCAL_d.topoEThresholdEE_vec[pfrh_depth[l] - 1]) || + pfrh_energy[l] > pfClusParams.topoEThresholdEE_vec()[pfrh_depth[l] - 1]) || (pfrh_layer[l] == PFLayer::HCAL_BARREL1 && - pfrh_energy[l] > constantsHCAL_d.topoEThresholdEB_vec[pfrh_depth[l] - 1]))) { + pfrh_energy[l] > pfClusParams.topoEThresholdEB_vec()[pfrh_depth[l] - 1]))) { if (pfrh_topoId[l] > pfrh_topoId[neigh8_Ind[nNT * l + k]]) { atomicMax(&pfrh_topoId[neigh8_Ind[nNT * l + k]], pfrh_topoId[l]); } @@ -402,7 +394,8 @@ namespace PFClusterCudaHCAL { } } - __device__ void dev_hcalFastCluster_optimizedSimple(int topoId, + __device__ void dev_hcalFastCluster_optimizedSimple(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + int topoId, int nRHTopo, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, @@ -429,12 +422,12 @@ namespace PFClusterCudaHCAL { prevClusterPos = seedPos; seedEnergy = pfrh_energy[i]; clusterEnergy = seedEnergy; - tol = constantsHCAL_d.stoppingTolerance; // stopping tolerance * tolerance scaling + tol = pfClusParams.stoppingTolerance(); // stopping tolerance * tolerance scaling if (pfrh_layer[i] == PFLayer::HCAL_BARREL1) - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[i] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[i] - 1]; else if (pfrh_layer[i] == PFLayer::HCAL_ENDCAP) - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[i] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[i] - 1]; else { rhENormInv = 0.; printf("Rechit %d has invalid layer %d!\n", i, pfrh_layer[i]); @@ -471,11 +464,11 @@ namespace PFClusterCudaHCAL { (clusterPos.y - rhPos.y) * (clusterPos.y - rhPos.y) + (clusterPos.z - rhPos.z) * (clusterPos.z - rhPos.z); - d2 = dist2 / constantsHCAL_d.showerSigma2; + d2 = dist2 / pfClusParams.showerSigma2(); fraction = clusterEnergy * rhENormInv * expf(-0.5 * d2); // For single seed clusters, rechit fraction is either 1 (100%) or -1 (not included) - if (fraction > constantsHCAL_d.minFracTot && d2 < 100.) + if (fraction > pfClusParams.minFracTot() && d2 < 100.) fraction = 1.; else fraction = -1.; @@ -496,7 +489,7 @@ namespace PFClusterCudaHCAL { // Recalculate cluster position and energy if (fraction > -0.5) { atomicAdd(&clusterEnergy, rhEnergy); - //computeClusterPos(clusterPos, rechitPos, rhEnergy, rhENormInv, debug); + //computeClusterPos(pfClusParams, clusterPos, rechitPos, rhEnergy, rhENormInv, debug); atomicAdd(&clusterPos.x, rhPos.x * rhPosNorm); atomicAdd(&clusterPos.y, rhPos.y * rhPosNorm); atomicAdd(&clusterPos.z, rhPos.z * rhPosNorm); @@ -506,7 +499,7 @@ namespace PFClusterCudaHCAL { if (tid == 0) { // Normalize the seed postiion - if (clusterPos.w >= constantsHCAL_d.minAllowedNormalization) { + if (clusterPos.w >= pfClusParams.minAllowedNormalization()) { // Divide by position norm clusterPos.x /= clusterPos.w; clusterPos.y /= clusterPos.w; @@ -524,7 +517,7 @@ namespace PFClusterCudaHCAL { printf("\tPF cluster (seed %d) position norm (%f) less than minimum (%f)\n", i, clusterPos.w, - constantsHCAL_d.minAllowedNormalization); + pfClusParams.minAllowedNormalization()); clusterPos.x = 0.; clusterPos.y = 0.; clusterPos.z = 0.; @@ -536,7 +529,7 @@ namespace PFClusterCudaHCAL { float diff = sqrtf(diff2); iter++; - notDone = (diff > tol) && (iter < constantsHCAL_d.maxIterations); + notDone = (diff > tol) && (iter < pfClusParams.maxIterations()); if (debug) { if (diff > tol) printf("\tTopoId %d has diff = %f greater than tolerance %f (continuing)\n", topoId, diff, tol); @@ -550,7 +543,8 @@ namespace PFClusterCudaHCAL { pfcIter[topoId] = iter; } - __device__ void dev_hcalFastCluster_optimizedComplex(int topoId, + __device__ void dev_hcalFastCluster_optimizedComplex(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + int topoId, int nSeeds, int nRHTopo, const float* __restrict__ pfrh_x, @@ -579,8 +573,7 @@ namespace PFClusterCudaHCAL { if (threadIdx.x == 0) { nRHNotSeed = nRHTopo - nSeeds + 1; // 1 + (# rechits per topoId that are NOT seeds) topoSeedBegin = topoSeedOffsets[topoId]; - tol = constantsHCAL_d.stoppingTolerance * - powf(fmaxf(1.0, nSeeds - 1.0), 2.0); // stopping tolerance * tolerance scaling + tol = pfClusParams.stoppingTolerance() * powf(fmaxf(1.0, nSeeds - 1.0), 2.0); // stopping tolerance * tolerance scaling //gridStride = blockDim.x * gridDim.x; gridStride = blockDim.x; iter = 0; @@ -594,9 +587,9 @@ namespace PFClusterCudaHCAL { int i = topoSeedList[topoSeedBegin]; if (pfrh_layer[i] == PFLayer::HCAL_BARREL1) - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[i] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[i] - 1]; else if (pfrh_layer[i] == PFLayer::HCAL_ENDCAP) - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[i] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[i] - 1]; else printf("Rechit %d has invalid layer %d!\n", i, pfrh_layer[i]); } @@ -647,10 +640,11 @@ namespace PFClusterCudaHCAL { __syncthreads(); } - auto computeClusterPos = [&](float4& pos4, float frac, int rhInd, bool isDebug) { + auto computeClusterPos = [&](PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + float4& pos4, float frac, int rhInd, bool isDebug) { float4 rechitPos = make_float4(pfrh_x[rhInd], pfrh_y[rhInd], pfrh_z[rhInd], 1.0); const auto rh_energy = pfrh_energy[rhInd] * frac; - const auto norm = (frac < constantsHCAL_d.minFracInCalc ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); + const auto norm = (frac < pfClusParams.minFracInCalc() ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); if (isDebug) printf("\t\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n", rhInd, @@ -701,7 +695,7 @@ namespace PFClusterCudaHCAL { seedEnergy = pfrh_energy[seedThreadIdx]; // Compute initial cluster position shift for seed - computeClusterPos(seedInitClusterPos, 1., seedThreadIdx, debug); + computeClusterPos(pfClusParams, seedInitClusterPos, 1., seedThreadIdx, debug); } do { @@ -720,7 +714,7 @@ namespace PFClusterCudaHCAL { (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); //pcrhfrac[seedFracOffsets[seeds[s]]+tid+1] = fraction; @@ -736,13 +730,13 @@ namespace PFClusterCudaHCAL { (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); - if (rhFracSum[tid] > constantsHCAL_d.minFracTot) { + if (rhFracSum[tid] > pfClusParams.minFracTot()) { float fracpct = fraction / rhFracSum[tid]; //float fracpct = pcrhfrac[seedFracOffsets[i]+tid+1] / rhFracSum[tid]; - if (fracpct > 0.9999 || (d2 < 100. && fracpct > constantsHCAL_d.minFracToKeep)) { + if (fracpct > 0.9999 || (d2 < 100. && fracpct > pfClusParams.minFracToKeep())) { pcrhfrac[seedFracOffsets[i] + tid + 1] = fracpct; } else { pcrhfrac[seedFracOffsets[i] + tid + 1] = -1; @@ -785,7 +779,7 @@ namespace PFClusterCudaHCAL { if (nSeeds == 1 || j == seedNeighbors.x || j == seedNeighbors.y || j == seedNeighbors.z || j == seedNeighbors.w) - computeClusterPos(clusterPos[tid], frac, j, debug); + computeClusterPos(pfClusParams, clusterPos[tid], frac, j, debug); } } } @@ -793,7 +787,7 @@ namespace PFClusterCudaHCAL { // Position normalization if (tid < nSeeds) { - if (clusterPos[tid].w >= constantsHCAL_d.minAllowedNormalization) { + if (clusterPos[tid].w >= pfClusParams.minAllowedNormalization()) { // Divide by position norm clusterPos[tid].x /= clusterPos[tid].w; clusterPos[tid].y /= clusterPos[tid].w; @@ -813,7 +807,7 @@ namespace PFClusterCudaHCAL { tid, seedThreadIdx, clusterPos[tid].w, - constantsHCAL_d.minAllowedNormalization); + pfClusParams.minAllowedNormalization()); clusterPos[tid].x = 0.0; clusterPos[tid].y = 0.0; clusterPos[tid].z = 0.0; @@ -833,7 +827,7 @@ namespace PFClusterCudaHCAL { if (tid == 0) { float diff = sqrtf(diff2); iter++; - notDone = (diff > tol) && (iter < constantsHCAL_d.maxIterations); + notDone = (diff > tol) && (iter < pfClusParams.maxIterations()); if (debug) { if (diff > tol) printf("\tTopoId %d has diff = %f greater than tolerance %f (continuing)\n", topoId, diff, tol); @@ -848,7 +842,8 @@ namespace PFClusterCudaHCAL { } // For clustering largest topos - __device__ void dev_hcalFastCluster_original(int topoId, + __device__ void dev_hcalFastCluster_original(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + int topoId, int nSeeds, int nRHTopo, const float* __restrict__ pfrh_x, @@ -874,7 +869,7 @@ namespace PFClusterCudaHCAL { if (threadIdx.x == 0) { nRHNotSeed = nRHTopo - nSeeds + 1; // 1 + (# rechits per topoId that are NOT seeds) topoSeedBegin = topoSeedOffsets[topoId]; - tol = constantsHCAL_d.stoppingTolerance * + tol = pfClusParams.stoppingTolerance() * powf(fmaxf(1.0, nSeeds - 1.0), 2.0); // stopping tolerance * tolerance scaling gridStride = blockDim.x; iter = 0; @@ -883,9 +878,9 @@ namespace PFClusterCudaHCAL { int i = topoSeedList[topoSeedBegin]; if (pfrh_layer[i] == PFLayer::HCAL_BARREL1) - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[i] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[i] - 1]; else if (pfrh_layer[i] == PFLayer::HCAL_ENDCAP) - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[i] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[i] - 1]; else printf("Rechit %d has invalid layer %d!\n", i, pfrh_layer[i]); } @@ -936,10 +931,11 @@ namespace PFClusterCudaHCAL { __syncthreads(); } - auto computeClusterPos = [&](float4& pos4, float frac, int rhInd, bool isDebug) { + auto computeClusterPos = [&](PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + float4& pos4, float frac, int rhInd, bool isDebug) { float4 rechitPos = make_float4(pfrh_x[rhInd], pfrh_y[rhInd], pfrh_z[rhInd], 1.0); const auto rh_energy = pfrh_energy[rhInd] * frac; - const auto norm = (frac < constantsHCAL_d.minFracInCalc ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); + const auto norm = (frac < pfClusParams.minFracInCalc() ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); if (isDebug) printf("\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n", rhInd, @@ -986,7 +982,7 @@ namespace PFClusterCudaHCAL { (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); rhFracSum[tid] += fraction; @@ -1003,13 +999,13 @@ namespace PFClusterCudaHCAL { (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); - if (rhFracSum[tid] > constantsHCAL_d.minFracTot) { + if (rhFracSum[tid] > pfClusParams.minFracTot()) { float fracpct = fraction / rhFracSum[tid]; //float fracpct = pcrhfrac[seedFracOffsets[i]+tid+1] / rhFracSum[tid]; - if (fracpct > 0.9999 || (d2 < 100. && fracpct > constantsHCAL_d.minFracToKeep)) { + if (fracpct > 0.9999 || (d2 < 100. && fracpct > pfClusParams.minFracToKeep())) { pcrhfrac[seedFracOffsets[i] + tid + 1] = fracpct; } else { pcrhfrac[seedFracOffsets[i] + tid + 1] = -1; @@ -1055,7 +1051,7 @@ namespace PFClusterCudaHCAL { if (nSeeds == 1 || j == pfrh_neighbours[8 * seedRhIdx] || j == pfrh_neighbours[8 * seedRhIdx + 1] || j == pfrh_neighbours[8 * seedRhIdx + 2] || j == pfrh_neighbours[8 * seedRhIdx + 3]) - computeClusterPos(clusterPos[s], frac, j, debug); + computeClusterPos(pfClusParams, clusterPos[s], frac, j, debug); } } } @@ -1063,7 +1059,7 @@ namespace PFClusterCudaHCAL { // Position normalization for (int s = threadIdx.x; s < nSeeds; s += gridStride) { - if (clusterPos[s].w >= constantsHCAL_d.minAllowedNormalization) { + if (clusterPos[s].w >= pfClusParams.minAllowedNormalization()) { // Divide by position norm clusterPos[s].x /= clusterPos[s].w; clusterPos[s].y /= clusterPos[s].w; @@ -1083,7 +1079,7 @@ namespace PFClusterCudaHCAL { s, seeds[s], clusterPos[s].w, - constantsHCAL_d.minAllowedNormalization); + pfClusParams.minAllowedNormalization()); clusterPos[s].x = 0.0; clusterPos[s].y = 0.0; clusterPos[s].z = 0.0; @@ -1103,7 +1099,7 @@ namespace PFClusterCudaHCAL { if (threadIdx.x == 0) { float diff = sqrtf(diff2); iter++; - notDone = (diff > tol) && (iter < constantsHCAL_d.maxIterations); + notDone = (diff > tol) && (iter < pfClusParams.maxIterations()); if (debug) { if (diff > tol) printf("\tTopoId %d has diff = %f greater than tolerance %f (continuing)\n", topoId, diff, tol); @@ -1117,7 +1113,8 @@ namespace PFClusterCudaHCAL { pfcIter[topoId] = iter; } - __global__ void hcalFastCluster_optimizedSimple(size_t nRH, + __global__ void hcalFastCluster_optimizedSimple(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t nRH, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, const float* __restrict__ pfrh_z, @@ -1148,7 +1145,7 @@ namespace PFClusterCudaHCAL { clusterPos = make_float4(pfrh_x[i], pfrh_y[i], pfrh_z[i], 1.); prevClusterPos = clusterPos; clusterEnergy = pfrh_energy[i]; - tol = constantsHCAL_d.stoppingTolerance; // stopping tolerance * tolerance scaling + tol = pfClusParams.stoppingTolerance(); // stopping tolerance * tolerance scaling iter = 0; notDone = true; debug = false; @@ -1167,9 +1164,9 @@ namespace PFClusterCudaHCAL { rhEnergy = pfrh_energy[j]; if (pfrh_layer[j] == PFLayer::HCAL_BARREL1) { - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[j] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[j] - 1]; } else if (pfrh_layer[j] == PFLayer::HCAL_ENDCAP) { - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[j] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[j] - 1]; } else { printf("Rechit %d has invalid layer %d!\n", j, pfrh_layer[j]); } @@ -1188,11 +1185,11 @@ namespace PFClusterCudaHCAL { (clusterPos.y - rhPos.y) * (clusterPos.y - rhPos.y) + (clusterPos.z - rhPos.z) * (clusterPos.z - rhPos.z); - d2 = dist2 / constantsHCAL_d.showerSigma2; + d2 = dist2 / pfClusParams.showerSigma2(); fraction = clusterEnergy * rhENormInv * expf(-0.5 * d2); // For single seed clusters, rechit fraction is either 1 (100%) or -1 (not included) - if (fraction > constantsHCAL_d.minFracTot && d2 < 100.) + if (fraction > pfClusParams.minFracTot() && d2 < 100.) fraction = 1.; else fraction = -1.; @@ -1213,7 +1210,7 @@ namespace PFClusterCudaHCAL { // Recalculate cluster position and energy if (fraction > -0.5) { atomicAdd(&clusterEnergy, rhEnergy); - //computeClusterPos(clusterPos, rechitPos, rhEnergy, rhENormInv, debug); + //computeClusterPos(pfClusParams, clusterPos, rechitPos, rhEnergy, rhENormInv, debug); atomicAdd(&clusterPos.x, rhPos.x * rhPosNorm); atomicAdd(&clusterPos.y, rhPos.y * rhPosNorm); atomicAdd(&clusterPos.z, rhPos.z * rhPosNorm); @@ -1223,7 +1220,7 @@ namespace PFClusterCudaHCAL { if (r == 0) { // Normalize the seed postiion - if (clusterPos.w >= constantsHCAL_d.minAllowedNormalization) { + if (clusterPos.w >= pfClusParams.minAllowedNormalization()) { // Divide by position norm clusterPos.x /= clusterPos.w; clusterPos.y /= clusterPos.w; @@ -1241,7 +1238,7 @@ namespace PFClusterCudaHCAL { printf("\tPF cluster (seed %d) position norm (%f) less than minimum (%f)\n", i, clusterPos.w, - constantsHCAL_d.minAllowedNormalization); + pfClusParams.minAllowedNormalization()); clusterPos.x = 0.; clusterPos.y = 0.; clusterPos.z = 0.; @@ -1253,7 +1250,7 @@ namespace PFClusterCudaHCAL { diff = sqrtf(diff2); iter++; - notDone = (diff > tol) && (iter < constantsHCAL_d.maxIterations); + notDone = (diff > tol) && (iter < pfClusParams.maxIterations()); if (debug) { if (diff > tol) printf("\tTopoId %d has diff = %f greater than tolerance %f (continuing)\n", topoId, diff, tol); @@ -1272,7 +1269,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void hcalFastCluster_optimizedComplex(size_t nRH, + __global__ void hcalFastCluster_optimizedComplex(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t nRH, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, const float* __restrict__ pfrh_z, @@ -1317,7 +1315,7 @@ namespace PFClusterCudaHCAL { nRHTopo = topoRHCount[topoId]; nRHNotSeed = nRHTopo - nSeeds + 1; // 1 + (# rechits per topoId that are NOT seeds) topoSeedBegin = topoSeedOffsets[topoId]; - tol = constantsHCAL_d.stoppingTolerance * + tol = pfClusParams.stoppingTolerance() * powf(fmaxf(1.0, nSeeds - 1.0), 2.0); // stopping tolerance * tolerance scaling //gridStride = blockDim.x * gridDim.x; gridStride = blockDim.x; @@ -1330,9 +1328,9 @@ namespace PFClusterCudaHCAL { int i = topoSeedList[topoSeedBegin]; if (pfrh_layer[i] == PFLayer::HCAL_BARREL1) - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[i] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[i] - 1]; else if (pfrh_layer[i] == PFLayer::HCAL_ENDCAP) - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[i] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[i] - 1]; else printf("Rechit %d has invalid layer %d!\n", i, pfrh_layer[i]); } @@ -1383,10 +1381,11 @@ namespace PFClusterCudaHCAL { __syncthreads(); } - auto computeClusterPos = [&](float4& pos4, float frac, int rhInd, bool isDebug) { + auto computeClusterPos = [&](PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + float4& pos4, float frac, int rhInd, bool isDebug) { float4 rechitPos = make_float4(pfrh_x[rhInd], pfrh_y[rhInd], pfrh_z[rhInd], 1.0); const auto rh_energy = pfrh_energy[rhInd] * frac; - const auto norm = (frac < constantsHCAL_d.minFracInCalc ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); + const auto norm = (frac < pfClusParams.minFracInCalc() ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); if (isDebug) printf("\t\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n", rhInd, @@ -1408,7 +1407,7 @@ namespace PFClusterCudaHCAL { const auto rh_energy = pfrh_energy[rhInd] * _frac; const auto norm = - (_frac < constantsHCAL_d.minFracInCalc ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); + (_frac < pfClusParams.minFracInCalc() ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); if (isDebug) printf("\t\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n", rhInd, norm, _frac, rh_energy, rechitPos.x, rechitPos.y, rechitPos.z); @@ -1446,14 +1445,14 @@ namespace PFClusterCudaHCAL { float4 seedInitClusterPos = make_float4(0., 0., 0., 0.); if (tid < nSeeds) { seedThreadIdx = getSeedRhIdx(tid); - seedNeighbors = make_int4(neigh4_Ind[constantsHCAL_d.nNeigh * seedThreadIdx], - neigh4_Ind[constantsHCAL_d.nNeigh * seedThreadIdx + 1], - neigh4_Ind[constantsHCAL_d.nNeigh * seedThreadIdx + 2], - neigh4_Ind[constantsHCAL_d.nNeigh * seedThreadIdx + 3]); + seedNeighbors = make_int4(neigh4_Ind[pfClusParams.nNeigh() * seedThreadIdx], + neigh4_Ind[pfClusParams.nNeigh() * seedThreadIdx + 1], + neigh4_Ind[pfClusParams.nNeigh() * seedThreadIdx + 2], + neigh4_Ind[pfClusParams.nNeigh() * seedThreadIdx + 3]); seedEnergy = pfrh_energy[seedThreadIdx]; // Compute initial cluster position shift for seed - computeClusterPos(seedInitClusterPos, 1., seedThreadIdx, debug); + computeClusterPos(pfClusParams, seedInitClusterPos, 1., seedThreadIdx, debug); } do { @@ -1472,7 +1471,7 @@ namespace PFClusterCudaHCAL { (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); //pcrhfrac[seedFracOffsets[seeds[s]]+tid+1] = fraction; @@ -1488,13 +1487,13 @@ namespace PFClusterCudaHCAL { (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) + (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); - if (rhFracSum[tid] > constantsHCAL_d.minFracTot) { + if (rhFracSum[tid] > pfClusParams.minFracTot()) { float fracpct = fraction / rhFracSum[tid]; //float fracpct = pcrhfrac[seedFracOffsets[i]+tid+1] / rhFracSum[tid]; - if (fracpct > 0.9999 || (d2 < 100. && fracpct > constantsHCAL_d.minFracToKeep)) { + if (fracpct > 0.9999 || (d2 < 100. && fracpct > pfClusParams.minFracToKeep())) { pcrhfrac[seedFracOffsets[i] + tid + 1] = fracpct; } else { pcrhfrac[seedFracOffsets[i] + tid + 1] = -1; @@ -1538,7 +1537,7 @@ namespace PFClusterCudaHCAL { if (nSeeds == 1 || j == seedNeighbors.x || j == seedNeighbors.y || j == seedNeighbors.z || j == seedNeighbors.w) - computeClusterPos(clusterPos[tid], frac, j, debug); + computeClusterPos(pfClusParams, clusterPos[tid], frac, j, debug); } } } @@ -1546,7 +1545,7 @@ namespace PFClusterCudaHCAL { // Position normalization if (tid < nSeeds) { - if (clusterPos[tid].w >= constantsHCAL_d.minAllowedNormalization) { + if (clusterPos[tid].w >= pfClusParams.minAllowedNormalization()) { // Divide by position norm clusterPos[tid].x /= clusterPos[tid].w; clusterPos[tid].y /= clusterPos[tid].w; @@ -1566,7 +1565,7 @@ namespace PFClusterCudaHCAL { tid, seedThreadIdx, clusterPos[tid].w, - constantsHCAL_d.minAllowedNormalization); + pfClusParams.minAllowedNormalization()); clusterPos[tid].x = 0.0; clusterPos[tid].y = 0.0; clusterPos[tid].z = 0.0; @@ -1592,7 +1591,7 @@ namespace PFClusterCudaHCAL { if (tid == 0) { float diff = sqrtf(diff2); iter++; - notDone = (diff > tol) && (iter < constantsHCAL_d.maxIterations); + notDone = (diff > tol) && (iter < pfClusParams.maxIterations()); if (debug) { if (diff > tol) printf("\tTopoId %d has diff = %f greater than tolerance %f (continuing)\n", topoId, diff, tol); @@ -1611,7 +1610,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void hcalFastCluster_sharedRHList(size_t nRH, + __global__ void hcalFastCluster_sharedRHList(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t nRH, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, const float* __restrict__ pfrh_z, @@ -1655,7 +1655,7 @@ namespace PFClusterCudaHCAL { nRHTopo = topoRHCount[topoId]; nRHNotSeed = nRHTopo - nSeeds + 1; // 1 + (# rechits per topoId that are NOT seeds) topoSeedBegin = topoSeedOffsets[topoId]; - tol = constantsHCAL_d.stoppingTolerance * + tol = pfClusParams.stoppingTolerance() * powf(fmaxf(1.0, nSeeds - 1.0), 2.0); // stopping tolerance * tolerance scaling //gridStride = blockDim.x * gridDim.x; gridStride = blockDim.x; @@ -1669,9 +1669,9 @@ namespace PFClusterCudaHCAL { int i = topoSeedList[topoSeedBegin]; if (pfrh_layer[i] == PFLayer::HCAL_BARREL1) - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[i] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[i] - 1]; else if (pfrh_layer[i] == PFLayer::HCAL_ENDCAP) - rhENormInv = constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[i] - 1]; + rhENormInv = pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[i] - 1]; else printf("Rechit %d has invalid layer %d!\n", i, pfrh_layer[i]); } @@ -1722,10 +1722,11 @@ namespace PFClusterCudaHCAL { __syncthreads(); } - auto computeClusterPos = [&](float4& pos4, float frac, int rhInd, bool isDebug) { + auto computeClusterPos = [&](PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + float4& pos4, float frac, int rhInd, bool isDebug) { float4 rechitPos = make_float4(pfrh_x[rhInd], pfrh_y[rhInd], pfrh_z[rhInd], 1.0); const auto rh_energy = pfrh_energy[rhInd] * frac; - const auto norm = (frac < constantsHCAL_d.minFracInCalc ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); + const auto norm = (frac < pfClusParams.minFracInCalc() ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); if (isDebug) printf("\t\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n", rhInd, @@ -1747,7 +1748,7 @@ namespace PFClusterCudaHCAL { const auto rh_energy = pfrh_energy[rhInd] * _frac; const auto norm = - (_frac < constantsHCAL_d.minFracInCalc ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); + (_frac < pfClusParams.minFracInCalc() ? 0.0f : max(0.0f, logf(rh_energy * rhENormInv))); if (isDebug) printf("\t\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n", rhInd, norm, _frac, rh_energy, rechitPos.x, rechitPos.y, rechitPos.z); @@ -1805,7 +1806,7 @@ namespace PFClusterCudaHCAL { (clusterPos[s].y - pfrh_y[j]) * (clusterPos[s].y - pfrh_y[j]) + (clusterPos[s].z - pfrh_z[j]) * (clusterPos[s].z - pfrh_z[j]); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); fracSum[j] += fraction; @@ -1826,13 +1827,13 @@ namespace PFClusterCudaHCAL { (clusterPos[s].y - pfrh_y[j]) * (clusterPos[s].y - pfrh_y[j]) + (clusterPos[s].z - pfrh_z[j]) * (clusterPos[s].z - pfrh_z[j]); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5 * d2); //if(fraction < 0.) printf("FRACTION is NEGATIVE!!!"); - if (fracSum[j] > constantsHCAL_d.minFracTot) { + if (fracSum[j] > pfClusParams.minFracTot()) { float fracpct = fraction / fracSum[j]; - if (fracpct > 0.9999 || (d2 < 100. && fracpct > constantsHCAL_d.minFracToKeep)) { + if (fracpct > 0.9999 || (d2 < 100. && fracpct > pfClusParams.minFracToKeep())) { pcrhfrac[seedFracOffsets[i] + r] = fracpct; } else { pcrhfrac[seedFracOffsets[i] + r] = -1; @@ -1861,14 +1862,14 @@ namespace PFClusterCudaHCAL { int i = getSeedRhIdx(s); // Seed index // Seed rechit first clusterEnergy[s] += pfrh_energy[i]; - computeClusterPos(clusterPos[s], 1., i, debug); + computeClusterPos(pfClusParams, clusterPos[s], 1., i, debug); for (int r = 1; r < nRHNotSeed; r++) { // Rechits if (debug) { printf("\tNow on seed %d\t\tneigh4Ind = [", i); - for (int k = 0; k < constantsHCAL_d.nNeigh; k++) { + for (int k = 0; k < pfClusParams.nNeigh(); k++) { if (k != 0) printf(", "); - printf("%d", neigh4_Ind[constantsHCAL_d.nNeigh * i + k]); + printf("%d", neigh4_Ind[pfClusParams.nNeigh() * i + k]); } printf("]\n"); } @@ -1893,10 +1894,10 @@ namespace PFClusterCudaHCAL { updateClusterPos = true; } else { // Check if this is one of the neighboring rechits - for (int k = 0; k < constantsHCAL_d.nNeigh; k++) { - if (neigh4_Ind[constantsHCAL_d.nNeigh * i + k] < 0) + for (int k = 0; k < pfClusParams.nNeigh(); k++) { + if (neigh4_Ind[pfClusParams.nNeigh() * i + k] < 0) continue; - if (neigh4_Ind[constantsHCAL_d.nNeigh * i + k] == j) { + if (neigh4_Ind[pfClusParams.nNeigh() * i + k] == j) { // Found it if (debug) printf("\t\tRechit %d is one of the 4 neighbors of seed %d\n", j, i); @@ -1907,7 +1908,7 @@ namespace PFClusterCudaHCAL { } } if (updateClusterPos) - computeClusterPos(clusterPos[s], frac, j, debug); + computeClusterPos(pfClusParams, clusterPos[s], frac, j, debug); } } // rechit loop } // seed loop @@ -1916,7 +1917,7 @@ namespace PFClusterCudaHCAL { // Normalize the seed postiions for (int s = threadIdx.x; s < nSeeds; s += gridStride) { //int i = getSeedRhIdx(s); // Seed index - if (clusterPos[s].w >= constantsHCAL_d.minAllowedNormalization) { + if (clusterPos[s].w >= pfClusParams.minAllowedNormalization()) { // Divide by position norm clusterPos[s].x /= clusterPos[s].w; clusterPos[s].y /= clusterPos[s].w; @@ -1936,7 +1937,7 @@ namespace PFClusterCudaHCAL { s, getSeedRhIdx(s), clusterPos[s].w, - constantsHCAL_d.minAllowedNormalization); + pfClusParams.minAllowedNormalization()); clusterPos[s].x = 0.0; clusterPos[s].y = 0.0; clusterPos[s].z = 0.0; @@ -1968,7 +1969,7 @@ namespace PFClusterCudaHCAL { if (threadIdx.x == 0) { diff = sqrtf(diff2); iter++; - notDone = (diff > tol) && (iter < constantsHCAL_d.maxIterations); + notDone = (diff > tol) && (iter < pfClusParams.maxIterations()); if (debug) { if (diff > tol) printf("\tTopoId %d has diff = %f greater than tolerance %f (continuing)\n", topoId, diff, tol); @@ -1993,7 +1994,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void hcalFastCluster_original(size_t nRH, + __global__ void hcalFastCluster_original(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t nRH, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, const float* __restrict__ pfrh_z, @@ -2036,7 +2038,7 @@ namespace PFClusterCudaHCAL { nRHTopo = topoRHCount[topoId]; nRHNotSeed = nRHTopo - nSeeds + 1; // 1 + (# rechits per topoId that are NOT seeds) topoSeedBegin = topoSeedOffsets[topoId]; - tol = constantsHCAL_d.stoppingTolerance * + tol = pfClusParams.stoppingTolerance() * powf(fmaxf(1.0, nSeeds - 1.0), 2.0); // stopping tolerance * tolerance scaling //gridStride = blockDim.x * gridDim.x; gridStride = blockDim.x; @@ -2090,18 +2092,19 @@ namespace PFClusterCudaHCAL { __syncthreads(); } - auto computeClusterPos = [&](float4& pos4, float _frac, int rhInd, bool isDebug) { + auto computeClusterPos = [&](PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + float4& pos4, float _frac, int rhInd, bool isDebug) { float4 rechitPos = make_float4(pfrh_x[rhInd], pfrh_y[rhInd], pfrh_z[rhInd], 1.0); float threshold = 0.0; if (pfrh_layer[rhInd] == PFLayer::HCAL_BARREL1) { threshold = - constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[rhInd] - 1]; // This number needs to be inverted + pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[rhInd] - 1]; // This number needs to be inverted } else if (pfrh_layer[rhInd] == PFLayer::HCAL_ENDCAP) { - threshold = constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[rhInd] - 1]; + threshold = pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[rhInd] - 1]; } const auto rh_energy = pfrh_energy[rhInd] * _frac; - const auto norm = (_frac < constantsHCAL_d.minFracInCalc ? 0.0f : max(0.0f, logf(rh_energy * threshold))); + const auto norm = (_frac < pfClusParams.minFracInCalc() ? 0.0f : max(0.0f, logf(rh_energy * threshold))); if (isDebug) printf("\t\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n", rhInd, @@ -2167,15 +2170,15 @@ namespace PFClusterCudaHCAL { (clusterPos[i].y - pfrh_y[j]) * (clusterPos[i].y - pfrh_y[j]) + (clusterPos[i].z - pfrh_z[j]) * (clusterPos[i].z - pfrh_z[j]); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = -1.; if (pfrh_layer[j] == PFLayer::HCAL_BARREL1) { fraction = - clusterEnergy[i] * constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + clusterEnergy[i] * pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } else if (pfrh_layer[j] == PFLayer::HCAL_ENDCAP) { fraction = - clusterEnergy[i] * constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + clusterEnergy[i] * pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } if (fraction == -1.) printf("FRACTION is NEGATIVE!!!"); @@ -2197,22 +2200,22 @@ namespace PFClusterCudaHCAL { (clusterPos[i].y - pfrh_y[j]) * (clusterPos[i].y - pfrh_y[j]) + (clusterPos[i].z - pfrh_z[j]) * (clusterPos[i].z - pfrh_z[j]); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = -1.; if (pfrh_layer[j] == PFLayer::HCAL_BARREL1) { fraction = - clusterEnergy[i] * constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + clusterEnergy[i] * pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } else if (pfrh_layer[j] == PFLayer::HCAL_ENDCAP) { fraction = - clusterEnergy[i] * constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + clusterEnergy[i] * pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } if (fraction == -1.) printf("FRACTION is NEGATIVE!!!"); - if (fracSum[j] > constantsHCAL_d.minFracTot) { + if (fracSum[j] > pfClusParams.minFracTot()) { float fracpct = fraction / fracSum[j]; - if (fracpct > 0.9999 || (d2 < 100. && fracpct > constantsHCAL_d.minFracToKeep)) { + if (fracpct > 0.9999 || (d2 < 100. && fracpct > pfClusParams.minFracToKeep())) { pcrhfrac[seedFracOffsets[i] + r] = fracpct; } else { pcrhfrac[seedFracOffsets[i] + r] = -1; @@ -2243,10 +2246,10 @@ namespace PFClusterCudaHCAL { if (debug) { printf("\tNow on seed %d\t\tneigh4Ind = [", i); - for (int k = 0; k < constantsHCAL_d.nNeigh; k++) { + for (int k = 0; k < pfClusParams.nNeigh(); k++) { if (k != 0) printf(", "); - printf("%d", neigh4_Ind[constantsHCAL_d.nNeigh * i + k]); + printf("%d", neigh4_Ind[pfClusParams.nNeigh() * i + k]); } printf("]\n"); } @@ -2265,23 +2268,23 @@ namespace PFClusterCudaHCAL { if (nSeeds == 1) { if (debug) printf("\t\tThis topo cluster has a single seed.\n"); - //computeClusterPos(clusterPos[i], frac, j, debug); + //computeClusterPos(pfClusParams, clusterPos[i], frac, j, debug); updateClusterPos = true; } else { if (j == i) { // This is the seed - //computeClusterPos(clusterPos[i], frac, j, debug); + //computeClusterPos(pfClusParams, clusterPos[i], frac, j, debug); updateClusterPos = true; } else { // Check if this is one of the neighboring rechits - for (int k = 0; k < constantsHCAL_d.nNeigh; k++) { - if (neigh4_Ind[constantsHCAL_d.nNeigh * i + k] < 0) + for (int k = 0; k < pfClusParams.nNeigh(); k++) { + if (neigh4_Ind[pfClusParams.nNeigh() * i + k] < 0) continue; - if (neigh4_Ind[constantsHCAL_d.nNeigh * i + k] == j) { + if (neigh4_Ind[pfClusParams.nNeigh() * i + k] == j) { // Found it if (debug) printf("\t\tRechit %d is one of the 4 neighbors of seed %d\n", j, i); - //computeClusterPos(clusterPos[i], frac, j, debug); + //computeClusterPos(pfClusParams, clusterPos[i], frac, j, debug); updateClusterPos = true; break; } @@ -2289,7 +2292,7 @@ namespace PFClusterCudaHCAL { } } if (updateClusterPos) - computeClusterPos(clusterPos[i], frac, j, debug); + computeClusterPos(pfClusParams, clusterPos[i], frac, j, debug); } //else if (debug) // printf("Can't find rechit fraction for cluster %d (seed %d) rechit %d!\n", s, i, j); @@ -2300,7 +2303,7 @@ namespace PFClusterCudaHCAL { // Normalize the seed postiions for (int s = threadIdx.x; s < nSeeds; s += gridStride) { int i = getSeedRhIdx(s); // Seed index - if (clusterPos[i].w >= constantsHCAL_d.minAllowedNormalization) { + if (clusterPos[i].w >= pfClusParams.minAllowedNormalization()) { // Divide by position norm clusterPos[i].x /= clusterPos[i].w; clusterPos[i].y /= clusterPos[i].w; @@ -2320,7 +2323,7 @@ namespace PFClusterCudaHCAL { s, i, clusterPos[i].w, - constantsHCAL_d.minAllowedNormalization); + pfClusParams.minAllowedNormalization()); clusterPos[i].x = 0.0; clusterPos[i].y = 0.0; clusterPos[i].z = 0.0; @@ -2352,7 +2355,7 @@ namespace PFClusterCudaHCAL { if (threadIdx.x == 0) { diff = sqrtf(diff2); iter++; - notDone = (diff > tol) && (iter < constantsHCAL_d.maxIterations); + notDone = (diff > tol) && (iter < pfClusParams.maxIterations()); if (debug) { if (diff > tol) printf("\tTopoId %d has diff = %f greater than tolerance %f (continuing)\n", topoId, diff, tol); @@ -2377,7 +2380,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void hcalFastCluster_selection(size_t nRH, + __global__ void hcalFastCluster_selection(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t nRH, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, const float* __restrict__ pfrh_z, @@ -2416,7 +2420,8 @@ namespace PFClusterCudaHCAL { pfcIter[topoId] = 0; } else if (nSeeds == 1) { // Single seed cluster - dev_hcalFastCluster_optimizedSimple(topoId, + dev_hcalFastCluster_optimizedSimple(pfClusParams, + topoId, nRHTopo, pfrh_x, pfrh_y, @@ -2431,7 +2436,8 @@ namespace PFClusterCudaHCAL { seedFracOffsets, pfcIter); } else if (nSeeds <= 100 && nRHTopo - nSeeds < 256) { - dev_hcalFastCluster_optimizedComplex(topoId, + dev_hcalFastCluster_optimizedComplex(pfClusParams, + topoId, nSeeds, nRHTopo, pfrh_x, @@ -2449,7 +2455,8 @@ namespace PFClusterCudaHCAL { pfcIter); //dev_hcalFastCluster_original(topoId, nSeeds, nRHTopo, pfrh_x, pfrh_y, pfrh_z, pfrh_energy, pfrh_layer, pfrh_depth, pfrh_neighbours, pcrhfrac, pcrhfracind, seedFracOffsets, topoSeedOffsets, topoSeedList, pfcIter); } else if (nSeeds <= 400 && (nRHTopo - nSeeds <= 1500)) { - dev_hcalFastCluster_original(topoId, + dev_hcalFastCluster_original(pfClusParams, + topoId, nSeeds, nRHTopo, pfrh_x, @@ -2472,7 +2479,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void hcalFastCluster_serialize(size_t nRH, + __global__ void hcalFastCluster_serialize(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t nRH, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, const float* __restrict__ pfrh_z, @@ -2546,18 +2554,19 @@ namespace PFClusterCudaHCAL { float tolScaling = powf(fmaxf(1.0, nSeeds - 1.0), 2.0); // Tolerance scaling - auto computeClusterPos = [&](float4& pos4, float _frac, int rhInd, bool isDebug) { + auto computeClusterPos = [&](PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + float4& pos4, float _frac, int rhInd, bool isDebug) { float4 rechitPos = make_float4(pfrh_x[rhInd], pfrh_y[rhInd], pfrh_z[rhInd], 1.0); float threshold = 0.0; if (pfrh_layer[rhInd] == PFLayer::HCAL_BARREL1) { threshold = - constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[rhInd] - 1]; // This number needs to be inverted + pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[rhInd] - 1]; // This number needs to be inverted } else if (pfrh_layer[rhInd] == PFLayer::HCAL_ENDCAP) { - threshold = constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[rhInd] - 1]; + threshold = pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[rhInd] - 1]; } const auto rh_energy = pfrh_energy[rhInd] * _frac; - const auto norm = (_frac < constantsHCAL_d.minFracInCalc ? 0.0f : max(0.0f, logf(rh_energy * threshold))); + const auto norm = (_frac < pfClusParams.minFracInCalc() ? 0.0f : max(0.0f, logf(rh_energy * threshold))); if (isDebug) printf("\t\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n", rhInd, @@ -2575,7 +2584,7 @@ namespace PFClusterCudaHCAL { }; float diff = -1.0; - while (iter < constantsHCAL_d.maxIterations) { + while (iter < pfClusParams.maxIterations()) { if (debug) { printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId); } @@ -2619,15 +2628,15 @@ namespace PFClusterCudaHCAL { (clusterPos[i].y - pfrh_y[j]) * (clusterPos[i].y - pfrh_y[j]) + (clusterPos[i].z - pfrh_z[j]) * (clusterPos[i].z - pfrh_z[j]); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = -1.; if (pfrh_layer[j] == PFLayer::HCAL_BARREL1) { fraction = - clusterEnergy[i] * constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + clusterEnergy[i] * pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } else if (pfrh_layer[j] == PFLayer::HCAL_ENDCAP) { fraction = - clusterEnergy[i] * constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + clusterEnergy[i] * pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } if (fraction == -1.) printf("FRACTION is NEGATIVE!!!"); @@ -2648,22 +2657,22 @@ namespace PFClusterCudaHCAL { (clusterPos[i].y - pfrh_y[j]) * (clusterPos[i].y - pfrh_y[j]) + (clusterPos[i].z - pfrh_z[j]) * (clusterPos[i].z - pfrh_z[j]); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = -1.; if (pfrh_layer[j] == PFLayer::HCAL_BARREL1) { fraction = - clusterEnergy[i] * constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + clusterEnergy[i] * pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } else if (pfrh_layer[j] == PFLayer::HCAL_ENDCAP) { fraction = - clusterEnergy[i] * constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + clusterEnergy[i] * pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } if (fraction == -1.) printf("FRACTION is NEGATIVE!!!"); - if (fracSum[j] > constantsHCAL_d.minFracTot) { + if (fracSum[j] > pfClusParams.minFracTot()) { float fracpct = fraction / fracSum[j]; - if (fracpct > 0.9999 || (d2 < 100. && fracpct > constantsHCAL_d.minFracToKeep)) { + if (fracpct > 0.9999 || (d2 < 100. && fracpct > pfClusParams.minFracToKeep())) { pcrhfrac[seedFracOffsets[i] + r] = fracpct; } else { pcrhfrac[seedFracOffsets[i] + r] = -1; @@ -2683,10 +2692,10 @@ namespace PFClusterCudaHCAL { if (debug) { printf("\tNow on seed %d\t\tneigh4Ind = [", i); - for (int k = 0; k < constantsHCAL_d.nNeigh; k++) { + for (int k = 0; k < pfClusParams.nNeigh(); k++) { if (k != 0) printf(", "); - printf("%d", neigh4_Ind[constantsHCAL_d.nNeigh * i + k]); + printf("%d", neigh4_Ind[pfClusParams.nNeigh() * i + k]); } printf("]\n"); } @@ -2707,21 +2716,21 @@ namespace PFClusterCudaHCAL { if (nSeeds == 1) { if (debug) printf("\t\tThis topo cluster has a single seed.\n"); - computeClusterPos(clusterPos[i], frac, j, debug); + computeClusterPos(pfClusParams, clusterPos[i], frac, j, debug); } else { if (j == i) { // This is the seed - computeClusterPos(clusterPos[i], frac, j, debug); + computeClusterPos(pfClusParams, clusterPos[i], frac, j, debug); } else { // Check if this is one of the neighboring rechits - for (int k = 0; k < constantsHCAL_d.nNeigh; k++) { - if (neigh4_Ind[constantsHCAL_d.nNeigh * i + k] < 0) + for (int k = 0; k < pfClusParams.nNeigh(); k++) { + if (neigh4_Ind[pfClusParams.nNeigh() * i + k] < 0) continue; - if (neigh4_Ind[constantsHCAL_d.nNeigh * i + k] == j) { + if (neigh4_Ind[pfClusParams.nNeigh() * i + k] == j) { // Found it if (debug) printf("\t\tRechit %d is one of the 4 neighbors of seed %d\n", j, i); - computeClusterPos(clusterPos[i], frac, j, debug); + computeClusterPos(pfClusParams, clusterPos[i], frac, j, debug); } } } @@ -2730,7 +2739,7 @@ namespace PFClusterCudaHCAL { //else if (debug) // printf("Can't find rechit fraction for cluster %d (seed %d) rechit %d!\n", s, i, j); } - if (clusterPos[i].w >= constantsHCAL_d.minAllowedNormalization) { + if (clusterPos[i].w >= pfClusParams.minAllowedNormalization()) { // Divide by position norm clusterPos[i].x /= clusterPos[i].w; clusterPos[i].y /= clusterPos[i].w; @@ -2750,7 +2759,7 @@ namespace PFClusterCudaHCAL { s, i, clusterPos[i].w, - constantsHCAL_d.minAllowedNormalization); + pfClusParams.minAllowedNormalization()); clusterPos[i].x = 0.0; clusterPos[i].y = 0.0; clusterPos[i].z = 0.0; @@ -2773,8 +2782,8 @@ namespace PFClusterCudaHCAL { diff = sqrtf(diff2); iter++; - //if (iter >= constantsHCAL_d.maxIterations || diff2 <= constantsHCAL_d.stoppingTolerance2 * tolScaling2) break; - if (diff <= constantsHCAL_d.stoppingTolerance * tolScaling) { + //if (iter >= pfClusParams.maxIterations() || diff2 <= pfClusParams.stoppingTolerance()2 * tolScaling2) break; + if (diff <= pfClusParams.stoppingTolerance() * tolScaling) { if (debug) printf("\tTopoId %d has diff = %f LESS than tolerance (terminating!)\n", topoId, diff); break; @@ -2783,23 +2792,24 @@ namespace PFClusterCudaHCAL { } } if (iter > 1) { - if (iter >= constantsHCAL_d.maxIterations) + if (iter >= pfClusParams.maxIterations()) printf( - "topoId %d (nSeeds = %d nRHTopo = %d) hit constantsHCAL_d.maxIterations (%d) with diff (%f) > tol " + "topoId %d (nSeeds = %d nRHTopo = %d) hit pfClusParams.maxIterations() (%d) with diff (%f) > tol " "(%f)\n", topoId, nSeeds, nRHTopo, iter, diff, - constantsHCAL_d.stoppingTolerance * tolScaling); + pfClusParams.stoppingTolerance() * tolScaling); else printf("topoId %d converged in %d iterations\n", topoId, iter); } } } - __global__ void hcalFastCluster_step1(size_t size, + __global__ void hcalFastCluster_step1(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t size, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, const float* __restrict__ pfrh_z, @@ -2821,13 +2831,13 @@ namespace PFClusterCudaHCAL { (pfrh_y[i] - pfrh_y[j]) * (pfrh_y[i] - pfrh_y[j]) + (pfrh_z[i] - pfrh_z[j]) * (pfrh_z[i] - pfrh_z[j]); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = -1.; if (pfrh_layer[j] == PFLayer::HCAL_BARREL1) { - fraction = pfrh_energy[i] * constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + fraction = pfrh_energy[i] * pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } else if (pfrh_layer[j] == PFLayer::HCAL_ENDCAP) { - fraction = pfrh_energy[i] * constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + fraction = pfrh_energy[i] * pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } if (fraction == -1.) @@ -2840,7 +2850,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void hcalFastCluster_step2(size_t size, + __global__ void hcalFastCluster_step2(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t size, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, const float* __restrict__ pfrh_z, @@ -2872,21 +2883,21 @@ namespace PFClusterCudaHCAL { (pfrh_y[i] - pfrh_y[j]) * (pfrh_y[i] - pfrh_y[j]) + (pfrh_z[i] - pfrh_z[j]) * (pfrh_z[i] - pfrh_z[j]); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = -1.; if (pfrh_layer[j] == 1) { - fraction = pfrh_energy[i] * constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + fraction = pfrh_energy[i] * pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } else if (pfrh_layer[j] == 3) { - fraction = pfrh_energy[i] * constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + fraction = pfrh_energy[i] * pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } if (fraction == -1.) printf("FRACTION is NEGATIVE!!!"); - if (fracSum[j] > constantsHCAL_d.minFracTot) { + if (fracSum[j] > pfClusParams.minFracTot()) { float fracpct = fraction / fracSum[j]; - if (fracpct > 0.9999 || (d2 < 100. && fracpct > constantsHCAL_d.minFracToKeep)) { + if (fracpct > 0.9999 || (d2 < 100. && fracpct > pfClusParams.minFracToKeep())) { int k = atomicAdd(&rhCount[i], 1); pcrhfrac[seedFracOffsets[i] + k] = fracpct; pcrhfracind[seedFracOffsets[i] + k] = j; @@ -2897,7 +2908,7 @@ namespace PFClusterCudaHCAL { /* if(d2 < 100. ) { - if ((fraction/fracSum[j])>constantsHCAL_d.minFracToKeep){ + if ((fraction/fracSum[j])>pfClusParams.minFracToKeep()){ int k = atomicAdd(&rhCount[i],1); pcrhfrac[i*maxSize+k] = fraction/fracSum[j]; pcrhfracind[i*maxSize+k] = j; @@ -2910,7 +2921,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void hcalFastCluster_step2(size_t size, + __global__ void hcalFastCluster_step2(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t size, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, const float* __restrict__ pfrh_z, @@ -2937,21 +2949,21 @@ namespace PFClusterCudaHCAL { (pfrh_y[i] - pfrh_y[j]) * (pfrh_y[i] - pfrh_y[j]) + (pfrh_z[i] - pfrh_z[j]) * (pfrh_z[i] - pfrh_z[j]); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = -1.; if (pfrh_layer[j] == 1) { - fraction = pfrh_energy[i] * constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + fraction = pfrh_energy[i] * pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } else if (pfrh_layer[j] == 3) { - fraction = pfrh_energy[i] * constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + fraction = pfrh_energy[i] * pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } if (fraction == -1.) printf("FRACTION is NEGATIVE!!!"); - if (fracSum[j] > constantsHCAL_d.minFracTot) { + if (fracSum[j] > pfClusParams.minFracTot()) { float fracpct = fraction / fracSum[j]; - if (fracpct > 0.9999 || (d2 < 100. && fracpct > constantsHCAL_d.minFracToKeep)) { + if (fracpct > 0.9999 || (d2 < 100. && fracpct > pfClusParams.minFracToKeep())) { int k = atomicAdd(&rhCount[i], 1); pcrhfrac[i * 100 + k] = fracpct; pcrhfracind[i * 100 + k] = j; @@ -2960,7 +2972,7 @@ namespace PFClusterCudaHCAL { /* if(d2 < 100. ) { - if ((fraction/fracSum[j])>constantsHCAL_d.minFracToKeep){ + if ((fraction/fracSum[j])>pfClusParams.minFracToKeep()){ int k = atomicAdd(&rhCount[i],1); pcrhfrac[i*maxSize+k] = fraction/fracSum[j]; pcrhfracind[i*maxSize+k] = j; @@ -2973,7 +2985,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void hcalFastCluster_step1_serialize(size_t size, + __global__ void hcalFastCluster_step1_serialize(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t size, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, const float* __restrict__ pfrh_z, @@ -2997,15 +3010,15 @@ namespace PFClusterCudaHCAL { (pfrh_y[i] - pfrh_y[j]) * (pfrh_y[i] - pfrh_y[j]) + (pfrh_z[i] - pfrh_z[j]) * (pfrh_z[i] - pfrh_z[j]); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = -1.; if (pfrh_layer[j] == 1) { fraction = - pfrh_energy[i] * constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + pfrh_energy[i] * pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } else if (pfrh_layer[j] == 3) { fraction = - pfrh_energy[i] * constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + pfrh_energy[i] * pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } if (fraction == -1.) @@ -3020,7 +3033,8 @@ namespace PFClusterCudaHCAL { } } - __global__ void hcalFastCluster_step2_serialize(size_t size, + __global__ void hcalFastCluster_step2_serialize(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t size, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, const float* __restrict__ pfrh_z, @@ -3049,21 +3063,21 @@ namespace PFClusterCudaHCAL { (pfrh_y[i] - pfrh_y[j]) * (pfrh_y[i] - pfrh_y[j]) + (pfrh_z[i] - pfrh_z[j]) * (pfrh_z[i] - pfrh_z[j]); - float d2 = dist2 / constantsHCAL_d.showerSigma2; + float d2 = dist2 / pfClusParams.showerSigma2(); float fraction = -1.; if (pfrh_layer[j] == 1) { fraction = - pfrh_energy[i] * constantsHCAL_d.recHitEnergyNormInvEB_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + pfrh_energy[i] * pfClusParams.recHitEnergyNormInvEB_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } else if (pfrh_layer[j] == 3) { fraction = - pfrh_energy[i] * constantsHCAL_d.recHitEnergyNormInvEE_vec[pfrh_depth[j] - 1] * expf(-0.5 * d2); + pfrh_energy[i] * pfClusParams.recHitEnergyNormInvEE_vec()[pfrh_depth[j] - 1] * expf(-0.5 * d2); } if (fraction == -1.) printf("FRACTION is NEGATIVE!!!"); if (d2 < 100.) { - if ((fraction / fracSum[j]) > constantsHCAL_d.minFracToKeep) { + if ((fraction / fracSum[j]) > pfClusParams.minFracToKeep()) { int k = atomicAdd(&rhCount[i], 1); pcrhfrac[i * 100 + k] = fraction / fracSum[j]; pcrhfracind[i * 100 + k] = j; @@ -3078,15 +3092,16 @@ namespace PFClusterCudaHCAL { } // Compute whether rechits pass topo clustering energy threshold - __global__ void passingTopoThreshold(size_t size, + __global__ void passingTopoThreshold(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + size_t size, const int* __restrict__ pfrh_layer, const int* __restrict__ pfrh_depth, const float* __restrict__ pfrh_energy, bool* pfrh_passTopoThresh) { int i = threadIdx.x + blockIdx.x * blockDim.x; if (i < size) { - if ((pfrh_layer[i] == 3 && pfrh_energy[i] > constantsHCAL_d.topoEThresholdEE_vec[pfrh_depth[i] - 1]) || - (pfrh_layer[i] == 1 && pfrh_energy[i] > constantsHCAL_d.topoEThresholdEB_vec[pfrh_depth[i] - 1])) { + if ((pfrh_layer[i] == 3 && pfrh_energy[i] > pfClusParams.topoEThresholdEE_vec()[pfrh_depth[i] - 1]) || + (pfrh_layer[i] == 1 && pfrh_energy[i] > pfClusParams.topoEThresholdEB_vec()[pfrh_depth[i] - 1])) { pfrh_passTopoThresh[i] = true; } else { pfrh_passTopoThresh[i] = false; @@ -3094,15 +3109,16 @@ namespace PFClusterCudaHCAL { } } - __global__ void passingTopoThreshold(int size, + __global__ void passingTopoThreshold(PFClusteringParamsGPU::DeviceProduct::ConstView pfClusParams, + int size, const int* __restrict__ pfrh_layer, const int* __restrict__ pfrh_depth, const float* __restrict__ pfrh_energy, bool* pfrh_passTopoThresh) { int i = threadIdx.x + blockIdx.x * blockDim.x; if (i < size) { - if ((pfrh_layer[i] == 3 && pfrh_energy[i] > constantsHCAL_d.topoEThresholdEE_vec[pfrh_depth[i] - 1]) || - (pfrh_layer[i] == 1 && pfrh_energy[i] > constantsHCAL_d.topoEThresholdEB_vec[pfrh_depth[i] - 1])) { + if ((pfrh_layer[i] == 3 && pfrh_energy[i] > pfClusParams.topoEThresholdEE_vec()[pfrh_depth[i] - 1]) || + (pfrh_layer[i] == 1 && pfrh_energy[i] > pfClusParams.topoEThresholdEB_vec()[pfrh_depth[i] - 1])) { pfrh_passTopoThresh[i] = true; } else { pfrh_passTopoThresh[i] = false; @@ -4123,6 +4139,7 @@ namespace PFClusterCudaHCAL { void PFRechitToPFCluster_HCAL_entryPoint( cudaStream_t cudaStream, + PFClusteringParamsGPU::DeviceProduct const& pfClusParams, int nEdges, ::hcal::PFRecHitCollection<::pf::common::DevStoragePolicy> const& inputPFRecHits, ::PFClustering::HCAL::OutputDataGPU& outputGPU, @@ -4133,6 +4150,7 @@ namespace PFClusterCudaHCAL { // Combined seeding & topo clustering thresholds, array initialization seedingTopoThreshKernel_HCAL<<<(nRH + threadsPerBlock - 1) / threadsPerBlock, threadsPerBlock, 0, cudaStream>>>( + pfClusParams.const_view(), nRH, inputPFRecHits.pfrh_energy.get(), inputPFRecHits.pfrh_x.get(), @@ -4199,7 +4217,8 @@ namespace PFClusterCudaHCAL { scratchGPU.rhcount.get(), outputGPU.pcrh_fracInd.get()); - hcalFastCluster_selection<<>>(nRH, + hcalFastCluster_selection<<>>(pfClusParams.const_view(), + nRH, inputPFRecHits.pfrh_x.get(), inputPFRecHits.pfrh_y.get(), inputPFRecHits.pfrh_z.get(), diff --git a/RecoParticleFlow/PFClusterProducer/plugins/PFClusterCudaHCAL.h b/RecoParticleFlow/PFClusterProducer/plugins/PFClusterCudaHCAL.h index fa2ad7637b96b..787b47b135f5a 100644 --- a/RecoParticleFlow/PFClusterProducer/plugins/PFClusterCudaHCAL.h +++ b/RecoParticleFlow/PFClusterProducer/plugins/PFClusterCudaHCAL.h @@ -5,14 +5,13 @@ #include "CUDADataFormats/PFRecHitSoA/interface/PFRecHitCollection.h" +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusteringParamsGPU.h" + #include "CudaPFCommon.h" #include "DeclsForKernels.h" namespace PFClusterCudaHCAL { - void initializeCudaConstants(const PFClustering::common::CudaHCALConstants& cudaConstants, - const cudaStream_t cudaStream = cudaStreamDefault); - void PFRechitToPFCluster_HCALV2(size_t size, const float* __restrict__ pfrh_x, const float* __restrict__ pfrh_y, @@ -52,6 +51,7 @@ namespace PFClusterCudaHCAL { void PFRechitToPFCluster_HCAL_entryPoint( cudaStream_t cudaStream, + PFClusteringParamsGPU::DeviceProduct const& pfClusParams, int nEdges, ::hcal::PFRecHitCollection<::pf::common::DevStoragePolicy> const& inputPFRecHits, ::PFClustering::HCAL::OutputDataGPU& outputGPU, diff --git a/RecoParticleFlow/PFClusterProducer/plugins/PFClusterProducerCudaHCAL.cc b/RecoParticleFlow/PFClusterProducer/plugins/PFClusterProducerCudaHCAL.cc index 57b76ff952cb4..760fc09298cc4 100644 --- a/RecoParticleFlow/PFClusterProducer/plugins/PFClusterProducerCudaHCAL.cc +++ b/RecoParticleFlow/PFClusterProducer/plugins/PFClusterProducerCudaHCAL.cc @@ -19,6 +19,7 @@ #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/Utilities/interface/ESInputTag.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" @@ -34,6 +35,9 @@ #include "DeclsForKernels.h" #include "PFClusterCudaHCAL.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusteringParamsGPU.h" + class PFClusterProducerCudaHCAL : public edm::stream::EDProducer { public: PFClusterProducerCudaHCAL(const edm::ParameterSet&); @@ -58,6 +62,8 @@ class PFClusterProducerCudaHCAL : public edm::stream::EDProducer>> InputPFRecHitSoA_Token_; + edm::ESGetToken const pfClusParamsToken_; + bool initCuda_ = true; int nRH_ = 0; @@ -72,7 +78,6 @@ class PFClusterProducerCudaHCAL : public edm::stream::EDProducer("PFRecHitsLabelIn"))}, + pfClusParamsToken_{esConsumes(conf.getParameter("pfClusteringParameters"))}, _produceSoA{conf.getParameter("produceSoA")}, _produceLegacy{conf.getParameter("produceLegacy")}, _rechitsLabel{consumes(conf.getParameter("recHitsSource"))} { @@ -102,9 +108,6 @@ PFClusterProducerCudaHCAL::PFClusterProducerCudaHCAL(const edm::ParameterSet& co const edm::VParameterSet& seedFinderConfs = sfConf.getParameterSetVector("thresholdsByDetector"); - cudaConstants.minFracInCalc = 0.0; - cudaConstants.minAllowedNormalization = 0.0; - //setup topo cluster builder const edm::ParameterSet& initConf = conf.getParameterSet("initialClusteringStep"); const std::string& initName = initConf.getParameter("algoName"); @@ -125,8 +128,6 @@ PFClusterProducerCudaHCAL::PFClusterProducerCudaHCAL(const edm::ParameterSet& co const edm::ParameterSet& acConf = pfcConf.getParameterSet("positionCalc"); const std::string& algoac = acConf.getParameter("algoName"); _positionCalc = PFCPositionCalculatorFactory::get()->create(algoac, acConf, cc); - cudaConstants.minFracInCalc = (float)acConf.getParameter("minFractionInCalc"); - cudaConstants.minAllowedNormalization = (float)acConf.getParameter("minAllowedNormalization"); } if (pfcConf.exists("allCellsPositionCalc")) { @@ -148,103 +149,6 @@ PFClusterProducerCudaHCAL::PFClusterProducerCudaHCAL(const edm::ParameterSet& co _energyCorrector = PFClusterEnergyCorrectorFactory::get()->create(cName, cConf); } - cudaConstants.showerSigma2 = (float)std::pow(pfcConf.getParameter("showerSigma"), 2.); - const auto& recHitEnergyNormConf = pfcConf.getParameterSetVector("recHitEnergyNorms"); - for (const auto& pset : recHitEnergyNormConf) { - const std::string& det = pset.getParameter("detector"); - if (det == std::string("HCAL_BARREL1")) { - const auto& recHitENorms = pset.getParameter>("recHitEnergyNorm"); - std::copy(recHitENorms.begin(), recHitENorms.end(), cudaConstants.recHitEnergyNormInvEB_vec); - for (auto& x : cudaConstants.recHitEnergyNormInvEB_vec) - x = std::pow(x, -1); // Invert these values - } else if (det == std::string("HCAL_ENDCAP")) { - const auto& recHitENorms = pset.getParameter>("recHitEnergyNorm"); - std::copy(recHitENorms.begin(), recHitENorms.end(), cudaConstants.recHitEnergyNormInvEE_vec); - for (auto& x : cudaConstants.recHitEnergyNormInvEE_vec) - x = std::pow(x, -1); // Invert these values - } else - std::cout << "Unknown detector when parsing recHitEnergyNorm: " << det << std::endl; - } - //float recHitEnergyNormEB = 0.08; - //float recHitEnergyNormEE = 0.3; - //float minFracToKeep = 0.0000001; - cudaConstants.minFracToKeep = (float)pfcConf.getParameter("minFractionToKeep"); - cudaConstants.minFracTot = (float)pfcConf.getParameter("minFracTot"); - - // Max PFClustering iterations - cudaConstants.maxIterations = pfcConf.getParameter("maxIterations"); - - cudaConstants.excludeOtherSeeds = pfcConf.getParameter("excludeOtherSeeds"); - - cudaConstants.stoppingTolerance = (float)pfcConf.getParameter("stoppingTolerance"); - - cudaConstants.seedPt2ThresholdEB = -1; - cudaConstants.seedPt2ThresholdEE = -1; - for (const auto& pset : seedFinderConfs) { - const std::string& det = pset.getParameter("detector"); - if (det == std::string("HCAL_BARREL1")) { - const auto& thresholds = pset.getParameter>("seedingThreshold"); - std::copy(thresholds.begin(), thresholds.end(), cudaConstants.seedEThresholdEB_vec); - cudaConstants.seedPt2ThresholdEB = - (float)std::pow(pset.getParameter>("seedingThresholdPt")[0], 2.0); - - } else if (det == std::string("HCAL_ENDCAP")) { - const auto& thresholds = pset.getParameter>("seedingThreshold"); - std::copy(thresholds.begin(), thresholds.end(), cudaConstants.seedEThresholdEE_vec); - cudaConstants.seedPt2ThresholdEE = - (float)std::pow(pset.getParameter>("seedingThresholdPt")[0], 2.0); - } else - std::cout << "Unknown detector when parsing seedFinder: " << det << std::endl; - } - - const auto& topoThresholdConf = initConf.getParameterSetVector("thresholdsByDetector"); - for (const auto& pset : topoThresholdConf) { - const std::string& det = pset.getParameter("detector"); - if (det == std::string("HCAL_BARREL1")) { - const auto& thresholds = pset.getParameter>("gatheringThreshold"); - std::copy(thresholds.begin(), thresholds.end(), cudaConstants.topoEThresholdEB_vec); - } else if (det == std::string("HCAL_ENDCAP")) { - const auto& thresholds = pset.getParameter>("gatheringThreshold"); - std::copy(thresholds.begin(), thresholds.end(), cudaConstants.topoEThresholdEE_vec); - } else - std::cout << "Unknown detector when parsing initClusteringStep: " << det << std::endl; - } - - if (pfcConf.exists("timeResolutionCalcEndcap")) { - const edm::ParameterSet& endcapTimeResConf = pfcConf.getParameterSet("timeResolutionCalcEndcap"); - cudaConstants.endcapTimeResConsts.corrTermLowE = (float)endcapTimeResConf.getParameter("corrTermLowE"); - cudaConstants.endcapTimeResConsts.threshLowE = (float)endcapTimeResConf.getParameter("threshLowE"); - cudaConstants.endcapTimeResConsts.noiseTerm = (float)endcapTimeResConf.getParameter("noiseTerm"); - cudaConstants.endcapTimeResConsts.constantTermLowE2 = - (float)std::pow(endcapTimeResConf.getParameter("constantTermLowE"), 2.0); - cudaConstants.endcapTimeResConsts.noiseTermLowE = (float)endcapTimeResConf.getParameter("noiseTermLowE"); - cudaConstants.endcapTimeResConsts.threshHighE = (float)endcapTimeResConf.getParameter("threshHighE"); - cudaConstants.endcapTimeResConsts.constantTerm2 = - (float)std::pow(endcapTimeResConf.getParameter("constantTerm"), 2.0); - cudaConstants.endcapTimeResConsts.resHighE2 = - (float)std::pow(cudaConstants.endcapTimeResConsts.noiseTerm / cudaConstants.endcapTimeResConsts.threshHighE, - 2.0) + - cudaConstants.endcapTimeResConsts.constantTerm2; - } - - if (pfcConf.exists("timeResolutionCalcBarrel")) { - const edm::ParameterSet& barrelTimeResConf = pfcConf.getParameterSet("timeResolutionCalcBarrel"); - cudaConstants.barrelTimeResConsts.corrTermLowE = (float)barrelTimeResConf.getParameter("corrTermLowE"); - cudaConstants.barrelTimeResConsts.threshLowE = (float)barrelTimeResConf.getParameter("threshLowE"); - cudaConstants.barrelTimeResConsts.noiseTerm = (float)barrelTimeResConf.getParameter("noiseTerm"); - cudaConstants.barrelTimeResConsts.constantTermLowE2 = - (float)std::pow(barrelTimeResConf.getParameter("constantTermLowE"), 2.0); - cudaConstants.barrelTimeResConsts.noiseTermLowE = (float)barrelTimeResConf.getParameter("noiseTermLowE"); - cudaConstants.barrelTimeResConsts.threshHighE = (float)barrelTimeResConf.getParameter("threshHighE"); - cudaConstants.barrelTimeResConsts.constantTerm2 = - (float)std::pow(barrelTimeResConf.getParameter("constantTerm"), 2.0); - cudaConstants.barrelTimeResConsts.resHighE2 = - (float)std::pow(cudaConstants.barrelTimeResConsts.noiseTerm / cudaConstants.barrelTimeResConsts.threshHighE, - 2.0) + - cudaConstants.barrelTimeResConsts.constantTerm2; - } - cudaConstants.nNeigh = sfConf.getParameter("nNeighbours"); - produces(); } @@ -256,6 +160,9 @@ void PFClusterProducerCudaHCAL::fillDescriptions(edm::ConfigurationDescriptions& desc.add("PFRecHitsLabelIn", edm::InputTag("hltParticleFlowRecHitHBHE")); desc.add("produceSoA", true); desc.add("produceLegacy", true); + + desc.add("pfClusteringParameters", edm::ESInputTag("pfClusteringParamsGPUESSource", "pfClusParamsOffline")); + // Prevents the producer and navigator parameter sets from throwing an exception // TODO: Replace with a proper parameter set description: twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideConfigurationValidationAndHelp desc.setAllowAnything(); @@ -282,8 +189,6 @@ void PFClusterProducerCudaHCAL::acquire(edm::Event const& event, auto cudaStream = ctx.stream(); if (initCuda_) { - // Only allocate Cuda memory on first event - PFClusterCudaHCAL::initializeCudaConstants(cudaConstants, cudaStream); outputCPU.allocate(cudaConfig_, cudaStream); outputGPU.allocate(cudaConfig_, cudaStream); @@ -306,9 +211,11 @@ void PFClusterProducerCudaHCAL::acquire(edm::Event const& event, // if (cudaStreamQuery(cudaStream) != cudaSuccess) // cudaCheck(cudaStreamSynchronize(cudaStream)); + auto const& pfClusParamsProduct = setup.getData(pfClusParamsToken_).getProduct(cudaStream); + // Calling cuda kernels PFClusterCudaHCAL::PFRechitToPFCluster_HCAL_entryPoint( - cudaStream, totalNeighbours, PFRecHits, outputGPU, scratchGPU, kernelTimers); + cudaStream, pfClusParamsProduct, totalNeighbours, PFRecHits, outputGPU, scratchGPU, kernelTimers); if (!_produceLegacy) return; // do device->host transfer only when we are producing Legacy data diff --git a/RecoParticleFlow/PFClusterProducer/plugins/PFClusteringParamsGPUESSource.cc b/RecoParticleFlow/PFClusterProducer/plugins/PFClusteringParamsGPUESSource.cc new file mode 100644 index 0000000000000..fd0aa884a1635 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/plugins/PFClusteringParamsGPUESSource.cc @@ -0,0 +1,55 @@ +#include +#include +#include + +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/EventSetupRecordIntervalFinder.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusteringParamsGPU.h" + +class PFClusteringParamsGPUESSource : public edm::ESProducer, public edm::EventSetupRecordIntervalFinder { +public: + PFClusteringParamsGPUESSource(edm::ParameterSet const&); + ~PFClusteringParamsGPUESSource() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions&); + + std::unique_ptr produce(JobConfigurationGPURecord const&); + +protected: + void setIntervalFor(const edm::eventsetup::EventSetupRecordKey&, + const edm::IOVSyncValue&, + edm::ValidityInterval&) override; + +private: + edm::ParameterSet const& pset_; +}; + +PFClusteringParamsGPUESSource::PFClusteringParamsGPUESSource(edm::ParameterSet const& pset) : pset_{pset} { + // the fwk automatically uses "appendToDataLabel" as product label of the ESProduct + setWhatProduced(this); + findingRecord(); +} + +void PFClusteringParamsGPUESSource::setIntervalFor(const edm::eventsetup::EventSetupRecordKey& iKey, + const edm::IOVSyncValue& iTime, + edm::ValidityInterval& oInterval) { + oInterval = edm::ValidityInterval(edm::IOVSyncValue::beginOfTime(), edm::IOVSyncValue::endOfTime()); +} + +void PFClusteringParamsGPUESSource::fillDescriptions(edm::ConfigurationDescriptions& desc) { + edm::ParameterSetDescription psetDesc; + psetDesc.add("appendToDataLabel", "pfClusParamsOffline"); + PFClusteringParamsGPU::fillDescription(psetDesc); + desc.addWithDefaultLabel(psetDesc); +} + +std::unique_ptr PFClusteringParamsGPUESSource::produce(JobConfigurationGPURecord const&) { + return std::make_unique(pset_); +} + +#include "FWCore/Framework/interface/SourceFactory.h" +DEFINE_FWK_EVENTSETUP_SOURCE(PFClusteringParamsGPUESSource); diff --git a/RecoParticleFlow/PFClusterProducer/python/particleFlowCluster_cff.py b/RecoParticleFlow/PFClusterProducer/python/particleFlowCluster_cff.py index f9fe48869f53e..56f255cd248d7 100644 --- a/RecoParticleFlow/PFClusterProducer/python/particleFlowCluster_cff.py +++ b/RecoParticleFlow/PFClusterProducer/python/particleFlowCluster_cff.py @@ -87,18 +87,20 @@ inputECAL = 'particleFlowTimeAssignerECAL') #--- for Run 3 on GPU +from Configuration.ProcessModifiers.gpu_cff import gpu + from RecoParticleFlow.PFClusterProducer.pfhbheRecHitParamsGPUESProducer_cfi import pfhbheRecHitParamsGPUESProducer from RecoParticleFlow.PFClusterProducer.pfhbheTopologyGPUESProducer_cfi import pfhbheTopologyGPUESProducer -from Configuration.ProcessModifiers.gpu_cff import gpu -from Configuration.Eras.Modifier_run3_HB_cff import run3_HB +from RecoParticleFlow.PFClusterProducer.pfClusteringParamsGPUESSource_cfi import pfClusteringParamsGPUESSource _gpu_pfClusteringHBHEHFTask = pfClusteringHBHEHFTask.copy() -_gpu_pfClusteringHBHEHFOnlyTask = pfClusteringHBHEHFOnlyTask.copy() -_gpu_pfClusteringHBHEHFTask.add(cms.Task(pfhbheRecHitParamsGPUESProducer,pfhbheTopologyGPUESProducer)) -_gpu_pfClusteringHBHEHFOnlyTask.add(cms.Task(pfhbheRecHitParamsGPUESProducer,pfhbheTopologyGPUESProducer)) - -gpu.toReplaceWith(pfClusteringHBHEHFTask, - _gpu_pfClusteringHBHEHFTask) -gpu.toReplaceWith(pfClusteringHBHEHFOnlyTask, - _gpu_pfClusteringHBHEHFOnlyTask) +_gpu_pfClusteringHBHEHFTask.add(pfhbheRecHitParamsGPUESProducer) +_gpu_pfClusteringHBHEHFTask.add(pfhbheTopologyGPUESProducer) +_gpu_pfClusteringHBHEHFTask.add(pfClusteringParamsGPUESSource) +gpu.toReplaceWith(pfClusteringHBHEHFTask, _gpu_pfClusteringHBHEHFTask) +_gpu_pfClusteringHBHEHFOnlyTask = pfClusteringHBHEHFOnlyTask.copy() +_gpu_pfClusteringHBHEHFOnlyTask.add(pfhbheRecHitParamsGPUESProducer) +_gpu_pfClusteringHBHEHFOnlyTask.add(pfhbheTopologyGPUESProducer) +_gpu_pfClusteringHBHEHFOnlyTask.add(pfClusteringParamsGPUESSource) +gpu.toReplaceWith(pfClusteringHBHEHFOnlyTask, _gpu_pfClusteringHBHEHFOnlyTask) diff --git a/RecoParticleFlow/PFClusterProducer/src/PFClusteringParamsGPU.cc b/RecoParticleFlow/PFClusterProducer/src/PFClusteringParamsGPU.cc new file mode 100644 index 0000000000000..728c9fa74dbf5 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/src/PFClusteringParamsGPU.cc @@ -0,0 +1,226 @@ +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/Exception.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusteringParamsGPU.h" + +#include + +PFClusteringParamsGPU::PFClusteringParamsGPU(edm::ParameterSet const& iConfig) + : params_(7) { setParameterValues(iConfig); } + +PFClusteringParamsGPU::DeviceProduct const& PFClusteringParamsGPU::getProduct(cudaStream_t cudaStream) const { + auto const& ret = product_.dataForCurrentDeviceAsync( + cudaStream, [this](DeviceProduct& product, cudaStream_t cudaStream) { + product = DeviceProduct(params_->metadata().size(), cudaStream); + cudaCheck(cudaMemcpyAsync(product.buffer().get(), + params_.const_buffer().get(), + params_.bufferSize(), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return ret; +} + +void PFClusteringParamsGPU::setParameterValues(edm::ParameterSet const& iConfig) { + auto view = params_.view(); + + // seedFinder + auto const& sfConf = iConfig.getParameterSet("seedFinder"); + view.nNeigh() = sfConf.getParameter("nNeighbours"); + auto const& seedFinderConfs = sfConf.getParameterSetVector("thresholdsByDetector"); + for (auto const& pset : seedFinderConfs) { + auto const& det = pset.getParameter("detector"); + auto seedPt2Threshold = std::pow(pset.getParameter("seedingThresholdPt"), 2.); + auto const& thresholds = pset.getParameter>("seedingThreshold"); + if (det == "HCAL_BARREL1") { + if (thresholds.size() != kMaxDepth_barrel) + throw cms::Exception("InvalidConfiguration") << "Invalid size (" << thresholds.size() << " != " << kMaxDepth_barrel << ") for \"\" vector of det = \"" << det << "\""; + view.seedPt2ThresholdEB() = seedPt2Threshold; + for (size_t idx = 0; idx < thresholds.size(); ++idx) { + view.seedEThresholdEB_vec()[idx] = thresholds[idx]; + } + } else if (det == "HCAL_ENDCAP") { + if (thresholds.size() != kMaxDepth_endcap) + throw cms::Exception("InvalidConfiguration") << "Invalid size (" << thresholds.size() << " != " << kMaxDepth_endcap << ") for \"\" vector of det = \"" << det << "\""; + view.seedPt2ThresholdEE() = seedPt2Threshold; + for (size_t idx = 0; idx < thresholds.size(); ++idx) { + view.seedEThresholdEE_vec()[idx] = thresholds[idx]; + } + } else { + throw cms::Exception("InvalidConfiguration") << "Unknown detector when parsing seedFinder: " << det; + } + } + + // initialClusteringStep + auto const& initConf = iConfig.getParameterSet("initialClusteringStep"); + auto const& topoThresholdConf = initConf.getParameterSetVector("thresholdsByDetector"); + for (auto const& pset : topoThresholdConf) { + auto const& det = pset.getParameter("detector"); + auto const& thresholds = pset.getParameter>("gatheringThreshold"); + if (det == "HCAL_BARREL1") { + if (thresholds.size() != kMaxDepth_barrel) + throw cms::Exception("InvalidConfiguration") << "Invalid size (" << thresholds.size() << " != " << kMaxDepth_barrel << ") for \"\" vector of det = \"" << det << "\""; + for (size_t idx = 0; idx < thresholds.size(); ++idx) { + view.topoEThresholdEB_vec()[idx] = thresholds[idx]; + } + } else if (det == "HCAL_ENDCAP") { + if (thresholds.size() != kMaxDepth_endcap) + throw cms::Exception("InvalidConfiguration") << "Invalid size (" << thresholds.size() << " != " << kMaxDepth_endcap << ") for \"\" vector of det = \"" << det << "\""; + for (size_t idx = 0; idx < thresholds.size(); ++idx) { + view.topoEThresholdEE_vec()[idx] = thresholds[idx]; + } + } else { + throw cms::Exception("InvalidConfiguration") << "Unknown detector when parsing initClusteringStep: " << det; + } + } + + // pfClusterBuilder + auto const & pfClusterPSet = iConfig.getParameterSet("pfClusterBuilder"); + view.showerSigma2() = std::pow(pfClusterPSet.getParameter("showerSigma"), 2.); + view.minFracToKeep() = pfClusterPSet.getParameter("minFractionToKeep"); + view.minFracTot() = pfClusterPSet.getParameter("minFracTot"); + view.maxIterations() = pfClusterPSet.getParameter("maxIterations"); + view.excludeOtherSeeds() = pfClusterPSet.getParameter("excludeOtherSeeds"); + view.stoppingTolerance() = pfClusterPSet.getParameter("stoppingTolerance"); + auto const& pcPSet = pfClusterPSet.getParameterSet("positionCalc"); + view.minFracInCalc() = pcPSet.getParameter("minFractionInCalc"); + view.minAllowedNormalization() = pcPSet.getParameter("minAllowedNormalization"); + + auto const& recHitEnergyNormConf = pfClusterPSet.getParameterSetVector("recHitEnergyNorms"); + for (auto const& pset : recHitEnergyNormConf) { + auto const& recHitNorms = pset.getParameter>("recHitEnergyNorm"); + auto const& det = pset.getParameter("detector"); + if (det == "HCAL_BARREL1") { + if (recHitNorms.size() != kMaxDepth_barrel) + throw cms::Exception("InvalidConfiguration") << "Invalid size (" << recHitNorms.size() << " != " << kMaxDepth_barrel << ") for \"\" vector of det = \"" << det << "\""; + for (size_t idx = 0; idx < recHitNorms.size(); ++idx) { + view.recHitEnergyNormInvEB_vec()[idx] = 1. / recHitNorms[idx]; + } + } else if (det == "HCAL_ENDCAP") { + if (recHitNorms.size() != kMaxDepth_endcap) + throw cms::Exception("InvalidConfiguration") << "Invalid size (" << recHitNorms.size() << " != " << kMaxDepth_endcap << ") for \"\" vector of det = \"" << det << "\""; + for (size_t idx = 0; idx < recHitNorms.size(); ++idx) { + view.recHitEnergyNormInvEE_vec()[idx] = 1. / recHitNorms[idx]; + } + } else { + throw cms::Exception("InvalidConfiguration") << "Unknown detector when parsing recHitEnergyNorms: " << det; + } + } + + auto const& barrelTimeResConf = pfClusterPSet.getParameterSet("timeResolutionCalcBarrel"); + view.barrelTimeResConsts_corrTermLowE() = barrelTimeResConf.getParameter("corrTermLowE"); + view.barrelTimeResConsts_threshLowE() = barrelTimeResConf.getParameter("threshLowE"); + view.barrelTimeResConsts_noiseTerm() = barrelTimeResConf.getParameter("noiseTerm"); + view.barrelTimeResConsts_constantTermLowE2() = std::pow(barrelTimeResConf.getParameter("constantTermLowE"), 2.); + view.barrelTimeResConsts_noiseTermLowE() = barrelTimeResConf.getParameter("noiseTermLowE"); + view.barrelTimeResConsts_threshHighE() = barrelTimeResConf.getParameter("threshHighE"); + view.barrelTimeResConsts_constantTerm2() = std::pow(barrelTimeResConf.getParameter("constantTerm"), 2.); + view.barrelTimeResConsts_resHighE2() = std::pow(view.barrelTimeResConsts_noiseTerm() / view.barrelTimeResConsts_threshHighE(), 2.) + + view.barrelTimeResConsts_constantTerm2(); + + auto const& endcapTimeResConf = pfClusterPSet.getParameterSet("timeResolutionCalcEndcap"); + view.endcapTimeResConsts_corrTermLowE() = endcapTimeResConf.getParameter("corrTermLowE"); + view.endcapTimeResConsts_threshLowE() = endcapTimeResConf.getParameter("threshLowE"); + view.endcapTimeResConsts_noiseTerm() = endcapTimeResConf.getParameter("noiseTerm"); + view.endcapTimeResConsts_constantTermLowE2() = std::pow(endcapTimeResConf.getParameter("constantTermLowE"), 2.); + view.endcapTimeResConsts_noiseTermLowE() = endcapTimeResConf.getParameter("noiseTermLowE"); + view.endcapTimeResConsts_threshHighE() = endcapTimeResConf.getParameter("threshHighE"); + view.endcapTimeResConsts_constantTerm2() = std::pow(endcapTimeResConf.getParameter("constantTerm"), 2.); + view.endcapTimeResConsts_resHighE2() = std::pow(view.endcapTimeResConsts_noiseTerm() / view.endcapTimeResConsts_threshHighE(), 2.) + + view.endcapTimeResConsts_constantTerm2(); +} + +void PFClusteringParamsGPU::fillDescription(edm::ParameterSetDescription& desc) { + { + auto const psetName = "seedFinder"; + edm::ParameterSetDescription foo; + foo.add("nNeighbours", 4); + { + edm::ParameterSetDescription validator; + validator.add("detector", ""); + validator.add>("seedingThreshold", {}); + validator.add("seedingThresholdPt", 0.); + std::vector vDefaults(2); + vDefaults[0].addParameter("detector", "HCAL_BARREL1"); + vDefaults[0].addParameter>("seedingThreshold", {0.125, 0.25, 0.35, 0.35}); + vDefaults[0].addParameter("seedingThresholdPt", 0.); + vDefaults[1].addParameter("detector", "HCAL_ENDCAP"); + vDefaults[1].addParameter>("seedingThreshold", {0.1375, 0.275, 0.275, 0.275, 0.275, 0.275, 0.275}); + vDefaults[1].addParameter("seedingThresholdPt", 0.); + foo.addVPSet("thresholdsByDetector", validator, vDefaults); + } + desc.add(psetName, foo); + } + { + auto const psetName = "initialClusteringStep"; + edm::ParameterSetDescription foo; + { + edm::ParameterSetDescription validator; + validator.add("detector", ""); + validator.add>("gatheringThreshold", {}); + std::vector vDefaults(2); + vDefaults[0].addParameter("detector", "HCAL_BARREL1"); + vDefaults[0].addParameter>("gatheringThreshold", {0.1, 0.2, 0.3, 0.3}); + vDefaults[1].addParameter("detector", "HCAL_ENDCAP"); + vDefaults[1].addParameter>("gatheringThreshold", {0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2}); + foo.addVPSet("thresholdsByDetector", validator, vDefaults); + } + desc.add(psetName, foo); + } + { + auto const psetName = "pfClusterBuilder"; + edm::ParameterSetDescription foo; + foo.add("maxIterations", 50); + foo.add("minFracTot", 1e-20); + foo.add("minFractionToKeep", 1e-7); + foo.add("excludeOtherSeeds", true); + foo.add("showerSigma", 10.); + foo.add("stoppingTolerance", 1e-8); + { + edm::ParameterSetDescription validator; + validator.add("detector", ""); + validator.add>("recHitEnergyNorm", {}); + std::vector vDefaults(2); + vDefaults[0].addParameter("detector", "HCAL_BARREL1"); + vDefaults[0].addParameter>("recHitEnergyNorm", {0.1, 0.2, 0.3, 0.3}); + vDefaults[1].addParameter("detector", "HCAL_ENDCAP"); + vDefaults[1].addParameter>("recHitEnergyNorm", {0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2}); + foo.addVPSet("recHitEnergyNorms", validator, vDefaults); + } + { + edm::ParameterSetDescription bar; + bar.add("minFractionInCalc", 1e-9); + bar.add("minAllowedNormalization", 1e-9); + foo.add("positionCalc", bar); + } + { + edm::ParameterSetDescription bar; + bar.add("corrTermLowE", 0.); + bar.add("threshLowE", 6.); + bar.add("noiseTerm", 21.86); + bar.add("constantTermLowE", 4.24); + bar.add("noiseTermLowE", 8.); + bar.add("threshHighE", 15.); + bar.add("constantTerm", 2.82); + foo.add("timeResolutionCalcBarrel", bar); + } + { + edm::ParameterSetDescription bar; + bar.add("corrTermLowE", 0.); + bar.add("threshLowE", 6.); + bar.add("noiseTerm", 21.86); + bar.add("constantTermLowE", 4.24); + bar.add("noiseTermLowE", 8.); + bar.add("threshHighE", 15.); + bar.add("constantTerm", 2.82); + foo.add("timeResolutionCalcEndcap", bar); + } + desc.add(psetName, foo); + } +} + +#include "FWCore/Utilities/interface/typelookup.h" +TYPELOOKUP_DATA_REG(PFClusteringParamsGPU); diff --git a/RecoParticleFlow/PFClusterProducer/test/BuildFile.xml b/RecoParticleFlow/PFClusterProducer/test/BuildFile.xml index 1b74ac95dbebb..3b7e65f35881e 100644 --- a/RecoParticleFlow/PFClusterProducer/test/BuildFile.xml +++ b/RecoParticleFlow/PFClusterProducer/test/BuildFile.xml @@ -17,3 +17,19 @@ + + + + + + + + + + + + + + + + diff --git a/RecoParticleFlow/PFClusterProducer/test/TestDumpPFClusteringParamsGPU.cc b/RecoParticleFlow/PFClusterProducer/test/TestDumpPFClusteringParamsGPU.cc new file mode 100644 index 0000000000000..2769464a615cd --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/test/TestDumpPFClusteringParamsGPU.cc @@ -0,0 +1,53 @@ +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/Utilities/interface/ESInputTag.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusteringParamsGPU.h" + +#include "testKernels.h" + +class TestDumpPFClusteringParamsGPU : public edm::stream::EDProducer { +public: + explicit TestDumpPFClusteringParamsGPU(edm::ParameterSet const&); + ~TestDumpPFClusteringParamsGPU() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions&); + +private: + void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; + void produce(edm::Event&, edm::EventSetup const&) override; + + edm::ESGetToken const pfClusParamsToken_; + + cms::cuda::ContextState cudaState_; +}; + +TestDumpPFClusteringParamsGPU::TestDumpPFClusteringParamsGPU(edm::ParameterSet const& iConfig) + : pfClusParamsToken_{esConsumes(iConfig.getParameter("pfClusteringParameters"))} { +} + +void TestDumpPFClusteringParamsGPU::fillDescriptions(edm::ConfigurationDescriptions& desc) { + edm::ParameterSetDescription psetDesc; + psetDesc.add("pfClusteringParameters", edm::ESInputTag("pfClusteringParamsGPUESSource", "pfClusParamsOffline")); + desc.addWithDefaultLabel(psetDesc); +} + +void TestDumpPFClusteringParamsGPU::acquire(edm::Event const& event, + edm::EventSetup const& setup, + edm::WaitingTaskWithArenaHolder holder) { + cms::cuda::ScopedContextAcquire ctx{event.streamID(), std::move(holder), cudaState_}; + auto const& pfClusParamsProduct = setup.getData(pfClusParamsToken_).getProduct(ctx.stream()); + testPFlow::testPFClusteringParamsEntryPoint(pfClusParamsProduct, ctx.stream()); +} + +void TestDumpPFClusteringParamsGPU::produce(edm::Event& event, edm::EventSetup const& setup) { + cms::cuda::ScopedContextProduce ctx{cudaState_}; +} + +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(TestDumpPFClusteringParamsGPU); diff --git a/RecoParticleFlow/PFClusterProducer/test/testKernels.cu b/RecoParticleFlow/PFClusterProducer/test/testKernels.cu new file mode 100644 index 0000000000000..adc66f9c4b063 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/test/testKernels.cu @@ -0,0 +1,69 @@ +#include + +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +#include "testKernels.h" + +namespace testPFlowCUDA { + + __global__ void printPFClusteringParamsOnGPU(PFClusteringParamsGPU::DeviceProduct::ConstView params){ + + printf("nNeigh: %d\n", params.nNeigh()); + printf("seedPt2ThresholdEB: %4.2f\n", params.seedPt2ThresholdEB()); + printf("seedPt2ThresholdEE: %4.2f\n", params.seedPt2ThresholdEE()); + + for (uint32_t idx = 0; idx < 4; ++idx) + printf("seedEThresholdEB_vec[%d]: %4.2f\n", idx, params.seedEThresholdEB_vec()[idx]); + for (uint32_t idx = 0; idx < 7; ++idx) + printf("seedEThresholdEE_vec[%d]: %4.2f\n", idx, params.seedEThresholdEE_vec()[idx]); + + for (uint32_t idx = 0; idx < 4; ++idx) + printf("topoEThresholdEB_vec[%d]: %4.2f\n", idx, params.topoEThresholdEB_vec()[idx]); + for (uint32_t idx = 0; idx < 7; ++idx) + printf("topoEThresholdEE_vec[%d]: %4.2f\n", idx, params.topoEThresholdEE_vec()[idx]); + + printf("showerSigma2: %4.2f\n", params.showerSigma2()); + printf("minFracToKeep: %4.2f\n", params.minFracToKeep()); + printf("minFracTot: %4.2f\n", params.minFracTot()); + printf("maxIterations: %d\n", params.maxIterations()); + printf("excludeOtherSeeds: %d\n", params.excludeOtherSeeds()); + printf("stoppingTolerance: %4.2f\n", params.stoppingTolerance()); + printf("minFracInCalc: %4.2f\n", params.minFracInCalc()); + printf("minAllowedNormalization: %4.2f\n", params.minAllowedNormalization()); + + for (uint32_t idx = 0; idx < 4; ++idx) + printf("recHitEnergyNormInvEB_vec[%d]: %4.2f\n", idx, params.recHitEnergyNormInvEB_vec()[idx]); + for (uint32_t idx = 0; idx < 7; ++idx) + printf("recHitEnergyNormInvEE_vec[%d]: %4.2f\n", idx, params.recHitEnergyNormInvEE_vec()[idx]); + + printf("barrelTimeResConsts_corrTermLowE: %4.2f\n", params.barrelTimeResConsts_corrTermLowE()); + printf("barrelTimeResConsts_threshLowE: %4.2f\n", params.barrelTimeResConsts_threshLowE()); + printf("barrelTimeResConsts_noiseTerm: %4.2f\n", params.barrelTimeResConsts_noiseTerm()); + printf("barrelTimeResConsts_constantTermLowE2: %4.2f\n", params.barrelTimeResConsts_constantTermLowE2()); + printf("barrelTimeResConsts_noiseTermLowE: %4.2f\n", params.barrelTimeResConsts_noiseTermLowE()); + printf("barrelTimeResConsts_threshHighE: %4.2f\n", params.barrelTimeResConsts_threshHighE()); + printf("barrelTimeResConsts_constantTerm2: %4.2f\n", params.barrelTimeResConsts_constantTerm2()); + printf("barrelTimeResConsts_resHighE2: %4.2f\n", params.barrelTimeResConsts_resHighE2()); + + printf("endcapTimeResConsts_corrTermLowE: %4.2f\n", params.endcapTimeResConsts_corrTermLowE()); + printf("endcapTimeResConsts_threshLowE: %4.2f\n", params.endcapTimeResConsts_threshLowE()); + printf("endcapTimeResConsts_noiseTerm: %4.2f\n", params.endcapTimeResConsts_noiseTerm()); + printf("endcapTimeResConsts_constantTermLowE2: %4.2f\n", params.endcapTimeResConsts_constantTermLowE2()); + printf("endcapTimeResConsts_noiseTermLowE: %4.2f\n", params.endcapTimeResConsts_noiseTermLowE()); + printf("endcapTimeResConsts_threshHighE: %4.2f\n", params.endcapTimeResConsts_threshHighE()); + printf("endcapTimeResConsts_constantTerm2: %4.2f\n", params.endcapTimeResConsts_constantTerm2()); + printf("endcapTimeResConsts_resHighE2: %4.2f\n", params.endcapTimeResConsts_resHighE2()); + } + +} + +namespace testPFlow { + + void testPFClusteringParamsEntryPoint( + PFClusteringParamsGPU::DeviceProduct const& pfClusParams, + cudaStream_t cudaStream + ) { + testPFlowCUDA::printPFClusteringParamsOnGPU<<<1, 1, 0, cudaStream>>>(pfClusParams.const_view()); + } + +} diff --git a/RecoParticleFlow/PFClusterProducer/test/testKernels.h b/RecoParticleFlow/PFClusterProducer/test/testKernels.h new file mode 100644 index 0000000000000..548f8f2850834 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/test/testKernels.h @@ -0,0 +1,15 @@ +#ifndef RecoParticleFlow_PFClusterProducer_test_testKernels_h +#define RecoParticleFlow_PFClusterProducer_test_testKernels_h + +#include "RecoParticleFlow/PFClusterProducer/interface/PFClusteringParamsGPU.h" + +namespace testPFlow { + + void testPFClusteringParamsEntryPoint( + PFClusteringParamsGPU::DeviceProduct const& pfClusParams, + cudaStream_t cudaStream + ); + +} + +#endif // RecoParticleFlow_PFClusterProducer_test_testKernels_h diff --git a/RecoParticleFlow/PFClusterProducer/test/testPFClusParamsGPU_cfg.py b/RecoParticleFlow/PFClusterProducer/test/testPFClusParamsGPU_cfg.py new file mode 100644 index 0000000000000..827b67b05a714 --- /dev/null +++ b/RecoParticleFlow/PFClusterProducer/test/testPFClusParamsGPU_cfg.py @@ -0,0 +1,37 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process('TEST') + +### +### EDM Input and job configuration +### +process.source = cms.Source('EmptySource') + +# limit the number of events to be processed +process.maxEvents.input = 50 + +# enable TrigReport, TimeReport and MultiThreading +process.options.wantSummary = True +process.options.numberOfThreads = 4 +process.options.numberOfStreams = 0 + +### +### ESModules, EDModules, Sequences, Tasks, Paths, EndPaths and Schedule +### +process.load('Configuration.StandardSequences.Accelerators_cff') + +from RecoParticleFlow.PFClusterProducer.pfClusteringParamsGPUESSource_cfi import pfClusteringParamsGPUESSource as _pfClusteringParamsGPUESSource +process.PFClusteringParamsGPUESSource = _pfClusteringParamsGPUESSource.clone( + appendToDataLabel = 'pfClusParamsOfflineDefault', +) + +from RecoParticleFlow.PFClusterProducer.testDumpPFClusteringParamsGPU_cfi import testDumpPFClusteringParamsGPU as _testDumpPFClusteringParamsGPU +process.theProducer = _testDumpPFClusteringParamsGPU.clone( + pfClusteringParameters = 'PFClusteringParamsGPUESSource:pfClusParamsOfflineDefault', +) + +process.theSequence = cms.Sequence( process.theProducer ) + +process.thePath = cms.Path( process.theSequence ) + +process.schedule = cms.Schedule( process.thePath )