From 94df11b62e4b17d84c2e8efb6b934bddeb069534 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Wed, 28 Aug 2024 17:16:08 -0700 Subject: [PATCH 1/9] migrate TrackCandidate to SoA from DataFormats/SoATemplate: kernels internally are still using the POD TrackCandidates SoA (to be migrated later) --- RecoTracker/LSTCore/interface/Constants.h | 25 ++++ .../interface/TrackCandidatesHostCollection.h | 10 ++ .../LSTCore/interface/TrackCandidatesSoA.h | 39 ++++++ .../LSTCore/interface/alpaka/Constants.h | 17 --- RecoTracker/LSTCore/interface/alpaka/LST.h | 4 - RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 131 +++++++++--------- RecoTracker/LSTCore/src/alpaka/Event.h | 9 +- RecoTracker/LSTCore/src/alpaka/LST.dev.cc | 83 +++++------ .../LSTCore/src/alpaka/TrackCandidate.h | 96 +++---------- RecoTracker/LSTCore/standalone/bin/lst.cc | 4 +- .../standalone/code/core/AccessHelper.cc | 16 +-- .../standalone/code/core/write_lst_ntuple.cc | 55 ++------ .../standalone/code/core/write_lst_ntuple.h | 3 - 13 files changed, 238 insertions(+), 254 deletions(-) create mode 100644 RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/TrackCandidatesSoA.h diff --git a/RecoTracker/LSTCore/interface/Constants.h b/RecoTracker/LSTCore/interface/Constants.h index 350857ac0b2e5..00a2c83a1ce29 100644 --- a/RecoTracker/LSTCore/interface/Constants.h +++ b/RecoTracker/LSTCore/interface/Constants.h @@ -2,6 +2,15 @@ #define RecoTracker_LSTCore_interface_Constants_h #include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Common/interface/StdArray.h" + +#if defined(FP16_Base) +#if defined ALPAKA_ACC_GPU_CUDA_ENABLED +#include +#elif defined ALPAKA_ACC_GPU_HIP_ENABLED +#include +#endif +#endif #ifdef CACHE_ALLOC #include "HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h" @@ -55,6 +64,17 @@ namespace lst { constexpr unsigned int size_superbins = 45000; +// Half precision wrapper functions. +#if defined(FP16_Base) +#define __F2H __float2half +#define __H2F __half2float + typedef __half float FPX; +#else +#define __F2H +#define __H2F + typedef float FPX; +#endif + // Defining the constant host device variables right up here // Currently pixel tracks treated as LSs with 2 double layers (IT layers 1+2 and 3+4) and 4 hits. To be potentially handled better in the future. struct Params_pLS { @@ -74,8 +94,13 @@ namespace lst { }; struct Params_pT5 { static constexpr int kLayers = 7, kHits = 14; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; }; + using ArrayUx2 = edm::StdArray; + } //namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h b/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h new file mode 100644 index 0000000000000..3ffd2bedf945e --- /dev/null +++ b/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h +#define RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h + +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using TrackCandidatesHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h new file mode 100644 index 0000000000000..f40c30323abc1 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h @@ -0,0 +1,39 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesSoA_h +#define RecoTracker_LSTCore_interface_TrackCandidatesSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + GENERATE_SOA_LAYOUT(TrackCandidatesSoALayout, + SOA_COLUMN(short, trackCandidateType), // 4-T5 5-pT3 7-pT5 8-pLS + SOA_COLUMN(unsigned int, directObjectIndices), // direct indices to each type containers + SOA_COLUMN(ArrayUx2, objectIndices), // tracklet and triplet indices + SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), // + SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), // + SOA_COLUMN(int, pixelSeedIndex), // + SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), // + SOA_COLUMN(FPX, centerX), // + SOA_COLUMN(FPX, centerY), // + SOA_COLUMN(FPX, radius), // + SOA_SCALAR(unsigned int, nTrackCandidates), // + SOA_SCALAR(unsigned int, nTrackCandidatespT3), // + SOA_SCALAR(unsigned int, nTrackCandidatespT5), // + SOA_SCALAR(unsigned int, nTrackCandidatespLS), // + SOA_SCALAR(unsigned int, nTrackCandidatesT5)) // + + using TrackCandidatesSoA = TrackCandidatesSoALayout<>; + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void initScalars(TrackCandidatesSoA::View& v) { + v.nTrackCandidates() = 0; + v.nTrackCandidatesT5() = 0; + v.nTrackCandidatespT3() = 0; + v.nTrackCandidatespT5() = 0; + v.nTrackCandidatespLS() = 0; + } + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h index 1a16dad68420e..4477c5232608b 100644 --- a/RecoTracker/LSTCore/interface/alpaka/Constants.h +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -3,27 +3,10 @@ #include "RecoTracker/LSTCore/interface/Constants.h" -#if defined ALPAKA_ACC_GPU_CUDA_ENABLED -#include -#elif defined ALPAKA_ACC_GPU_HIP_ENABLED -#include -#endif - namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { using namespace ::lst; -// Half precision wrapper functions. -#if defined(FP16_Base) -#define __F2H __float2half -#define __H2F __half2float - typedef __half float FPX; -#else -#define __F2H -#define __H2F - typedef float FPX; -#endif - Vec3D constexpr elementsPerThread(Vec3D::all(static_cast(1))); // Needed for files that are compiled by g++ to not throw an error. diff --git a/RecoTracker/LSTCore/interface/alpaka/LST.h b/RecoTracker/LSTCore/interface/alpaka/LST.h index 1f3c08804540f..df1319462432e 100644 --- a/RecoTracker/LSTCore/interface/alpaka/LST.h +++ b/RecoTracker/LSTCore/interface/alpaka/LST.h @@ -66,10 +66,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::vector const& ph2_z); void getOutput(Event& event); - std::vector getHitIdxs(short trackCandidateType, - unsigned int TCIdx, - unsigned int const* TCHitIndices, - unsigned int const* hitIndices); // Input and output vectors std::vector in_trkX_; diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 659591b836ec9..4991210729da3 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -64,7 +64,7 @@ void Event::resetEventSync() { quintupletsInGPU_.reset(); quintupletsBuffers_.reset(); trackCandidatesInGPU_.reset(); - trackCandidatesBuffers_.reset(); + trackCandidatesDC_.reset(); pixelTripletsInGPU_.reset(); pixelTripletsBuffers_.reset(); pixelQuintupletsInGPU_.reset(); @@ -78,7 +78,7 @@ void Event::resetEventSync() { quintupletsInCPU_.reset(); pixelTripletsInCPU_.reset(); pixelQuintupletsInCPU_.reset(); - trackCandidatesInCPU_.reset(); + trackCandidatesHC_.reset(); modulesInCPU_.reset(); } @@ -478,8 +478,10 @@ void Event::createTriplets() { void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { if (!trackCandidatesInGPU_) { trackCandidatesInGPU_.emplace(); - trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_); - trackCandidatesInGPU_->setData(*trackCandidatesBuffers_); + trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + auto buf = trackCandidatesDC_->buffer(); + alpaka::memset(queue, buf, 0u); + trackCandidatesInGPU_->setData(trackCandidatesDC_->view()); } Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; @@ -595,10 +597,13 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { auto nTrackCanpT3Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); auto nTrackCanpLSHost_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); auto nTrackCanT5Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); - alpaka::memcpy(queue_, nTrackCanpT5Host_buf, trackCandidatesBuffers_->nTrackCandidatespT5_buf); - alpaka::memcpy(queue_, nTrackCanpT3Host_buf, trackCandidatesBuffers_->nTrackCandidatespT3_buf); - alpaka::memcpy(queue_, nTrackCanpLSHost_buf, trackCandidatesBuffers_->nTrackCandidatespLS_buf); - alpaka::memcpy(queue_, nTrackCanT5Host_buf, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::memcpy( + queue_, nTrackCanpT5Host_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT5, 1u)); + alpaka::memcpy( + queue_, nTrackCanpT3Host_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT3, 1u)); + alpaka::memcpy( + queue_, nTrackCanpLSHost_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespLS, 1u)); + alpaka::memcpy(queue_, nTrackCanT5Host_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatesT5, 1u)); alpaka::wait(queue_); // wait to get the values before using them auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data(); @@ -821,8 +826,10 @@ void Event::createPixelQuintuplets() { } if (!trackCandidatesInGPU_) { trackCandidatesInGPU_.emplace(); - trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_); - trackCandidatesInGPU_->setData(*trackCandidatesBuffers_); + trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + auto buf = trackCandidatesDC_->buffer(); + alpaka::memset(queue, buf, 0u); + trackCandidatesInGPU_->setData(trackCandidatesDC_->view()); } auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); @@ -1209,7 +1216,7 @@ unsigned int Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { int Event::getNumberOfTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); + alpaka::memcpy(queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u)); alpaka::wait(queue_); return *nTrackCandidates_buf_h.data(); @@ -1218,7 +1225,8 @@ int Event::getNumberOfTrackCandidates() { int Event::getNumberOfPT5TrackCandidates() { auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers_->nTrackCandidatespT5_buf); + alpaka::memcpy( + queue_, nTrackCandidatesPT5_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT5, 1u)); alpaka::wait(queue_); return *nTrackCandidatesPT5_buf_h.data(); @@ -1227,7 +1235,8 @@ int Event::getNumberOfPT5TrackCandidates() { int Event::getNumberOfPT3TrackCandidates() { auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers_->nTrackCandidatespT3_buf); + alpaka::memcpy( + queue_, nTrackCandidatesPT3_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT3, 1u)); alpaka::wait(queue_); return *nTrackCandidatesPT3_buf_h.data(); @@ -1236,7 +1245,8 @@ int Event::getNumberOfPT3TrackCandidates() { int Event::getNumberOfPLSTrackCandidates() { auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers_->nTrackCandidatespLS_buf); + alpaka::memcpy( + queue_, nTrackCandidatesPLS_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespLS, 1u)); alpaka::wait(queue_); return *nTrackCandidatesPLS_buf_h.data(); @@ -1246,8 +1256,9 @@ int Event::getNumberOfPixelTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); - alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::memcpy(queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u)); + alpaka::memcpy( + queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatesT5, 1u)); alpaka::wait(queue_); return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); @@ -1256,7 +1267,8 @@ int Event::getNumberOfPixelTrackCandidates() { int Event::getNumberOfT5TrackCandidates() { auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::memcpy( + queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatesT5, 1u)); alpaka::wait(queue_); return *nTrackCandidatesT5_buf_h.data(); @@ -1540,74 +1552,69 @@ PixelQuintupletsBuffer& Event::getPixelQuintuplets(bool return pixelQuintupletsInCPU_.value(); } -TrackCandidatesBuffer& Event::getTrackCandidates(bool sync) { - if (!trackCandidatesInCPU_) { - // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_ +const TrackCandidatesHostCollection& Event::getTrackCandidates(bool sync) { + if (!trackCandidatesHC_) { + // Get nTrackCanHost parameter to initialize host based instance auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); - trackCandidatesInCPU_.emplace( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_); - trackCandidatesInCPU_->setData(*trackCandidatesInCPU_); - alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU_ becomes usable + alpaka::memcpy(queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u)); + trackCandidatesHC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU becomes usable auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - *trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost; - alpaka::memcpy(queue_, - trackCandidatesInCPU_->hitIndices_buf, - trackCandidatesBuffers_->hitIndices_buf, - Params_pT5::kHits * nTrackCanHost); + trackCandidatesHC_->view().nTrackCandidates() = nTrackCanHost; alpaka::memcpy( - queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost); + queue_, + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().hitIndices()->data(), Params_pT5::kHits * nTrackCanHost), + alpaka::createView(devAcc, trackCandidatesInGPU_->hitIndices, Params_pT5::kHits * nTrackCanHost)); alpaka::memcpy(queue_, - trackCandidatesInCPU_->logicalLayers_buf, - trackCandidatesBuffers_->logicalLayers_buf, - Params_pT5::kLayers * nTrackCanHost); + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().pixelSeedIndex(), nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesInGPU_->pixelSeedIndex, nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesInGPU_->logicalLayers, Params_pT5::kLayers * nTrackCanHost)); alpaka::memcpy(queue_, - trackCandidatesInCPU_->directObjectIndices_buf, - trackCandidatesBuffers_->directObjectIndices_buf, - nTrackCanHost); + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().directObjectIndices(), nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesInGPU_->directObjectIndices, nTrackCanHost)); alpaka::memcpy(queue_, - trackCandidatesInCPU_->objectIndices_buf, - trackCandidatesBuffers_->objectIndices_buf, - 2 * nTrackCanHost); + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().objectIndices()->data(), 2 * nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesInGPU_->objectIndices, 2 * nTrackCanHost)); alpaka::memcpy(queue_, - trackCandidatesInCPU_->trackCandidateType_buf, - trackCandidatesBuffers_->trackCandidateType_buf, - nTrackCanHost); + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().trackCandidateType(), nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesInGPU_->trackCandidateType, nTrackCanHost)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } - return trackCandidatesInCPU_.value(); + return trackCandidatesHC_.value(); } -TrackCandidatesBuffer& Event::getTrackCandidatesInCMSSW(bool sync) { - if (!trackCandidatesInCPU_) { - // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_ +const TrackCandidatesHostCollection& Event::getTrackCandidatesInCMSSW(bool sync) { + if (!trackCandidatesHC_) { + // Get nTrackCanHost parameter to initialize host based instance auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); - trackCandidatesInCPU_.emplace( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_); - trackCandidatesInCPU_->setData(*trackCandidatesInCPU_); - alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU_ becomes usable + alpaka::memcpy(queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u)); + trackCandidatesHC_ = + new ::lst::TrackCandidatesHostCollection(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU becomes usable auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - *trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost; - alpaka::memcpy(queue_, - trackCandidatesInCPU_->hitIndices_buf, - trackCandidatesBuffers_->hitIndices_buf, - Params_pT5::kHits * nTrackCanHost); + trackCandidatesHC_->view().nTrackCandidates() = nTrackCanHost; alpaka::memcpy( - queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost); + queue_, + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().hitIndices()->data(), Params_pT5::kHits * nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesInGPU_->hitIndices, Params_pT5::kHits * nTrackCanHost)); + alpaka::memcpy(queue_, + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().pixelSeedIndex(), nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesInGPU_->pixelSeedIndex, nTrackCanHost)); alpaka::memcpy(queue_, - trackCandidatesInCPU_->trackCandidateType_buf, - trackCandidatesBuffers_->trackCandidateType_buf, - nTrackCanHost); + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().trackCandidateType(), nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesInGPU_->trackCandidateType, nTrackCanHost)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } - return trackCandidatesInCPU_.value(); + return trackCandidatesHC_.value(); } ModulesBuffer& Event::getModules(bool isFull, bool sync) { diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 2b09565cf4176..8108040073af2 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -3,6 +3,7 @@ #include +#include "RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/alpaka/LST.h" #include "RecoTracker/LSTCore/interface/Module.h" @@ -55,7 +56,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::optional quintupletsInGPU_; std::optional> quintupletsBuffers_; std::optional trackCandidatesInGPU_; - std::optional> trackCandidatesBuffers_; + std::optional trackCandidatesDC_; std::optional pixelTripletsInGPU_; std::optional> pixelTripletsBuffers_; std::optional pixelQuintupletsInGPU_; @@ -67,7 +68,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::optional> mdsInCPU_; std::optional> segmentsInCPU_; std::optional> tripletsInCPU_; - std::optional> trackCandidatesInCPU_; + std::optional trackCandidatesHC_; std::optional> modulesInCPU_; std::optional> quintupletsInCPU_; std::optional> pixelTripletsInCPU_; @@ -189,8 +190,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { QuintupletsBuffer& getQuintuplets(bool sync = true); PixelTripletsBuffer& getPixelTriplets(bool sync = true); PixelQuintupletsBuffer& getPixelQuintuplets(bool sync = true); - TrackCandidatesBuffer& getTrackCandidates(bool sync = true); - TrackCandidatesBuffer& getTrackCandidatesInCMSSW(bool sync = true); + const TrackCandidatesHostCollection& getTrackCandidates(bool sync = true); + const TrackCandidatesHostCollection& getTrackCandidatesInCMSSW(bool sync = true); ModulesBuffer& getModules(bool isFull = false, bool sync = true); }; diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index 65543720a1d34..028c95e9c8129 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -18,6 +18,42 @@ namespace { const float vy = dxy * p3.x() / pt - p3.y() / p * p3.z() / p * dz; return {vx, vy, vz}; } + + using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + std::vector getHitIdxs(short trackCandidateType, + Params_pT5::ArrayUxHits const& tcHitIndices, + unsigned int const* hitIndices) { + std::vector hits; + + unsigned int maxNHits = 0; + if (trackCandidateType == 7) + maxNHits = Params_pT5::kHits; // pT5 + else if (trackCandidateType == 5) + maxNHits = Params_pT3::kHits; // pT3 + else if (trackCandidateType == 4) + maxNHits = Params_T5::kHits; // T5 + else if (trackCandidateType == 8) + maxNHits = Params_pLS::kHits; // pLS + + for (unsigned int i = 0; i < maxNHits; i++) { + unsigned int hitIdxInGPU = tcHitIndices[i]; + unsigned int hitIdx = + (trackCandidateType == 8) + ? hitIdxInGPU + : hitIndices[hitIdxInGPU]; // Hit indices are stored differently in the standalone for pLS. + + // For p objects, the 3rd and 4th hit maybe the same, + // due to the way pLS hits are stored in the standalone. + // This is because pixel seeds can be either triplets or quadruplets. + if (trackCandidateType != 4 && hits.size() == 3 && hits.back() == hitIdx) // Remove duplicate 4th hits. + continue; + + hits.push_back(hitIdx); + } + + return hits; + } + } // namespace void LST::prepareInput(std::vector const& see_px, @@ -212,60 +248,25 @@ void LST::prepareInput(std::vector const& see_px, in_isQuad_vec_ = isQuad_vec; } -std::vector LST::getHitIdxs(short trackCandidateType, - unsigned int TCIdx, - unsigned int const* TCHitIndices, - unsigned int const* hitIndices) { - std::vector hits; - - unsigned int maxNHits = 0; - if (trackCandidateType == 7) - maxNHits = Params_pT5::kHits; // pT5 - else if (trackCandidateType == 5) - maxNHits = Params_pT3::kHits; // pT3 - else if (trackCandidateType == 4) - maxNHits = Params_T5::kHits; // T5 - else if (trackCandidateType == 8) - maxNHits = Params_pLS::kHits; // pLS - - for (unsigned int i = 0; i < maxNHits; i++) { - unsigned int hitIdxInGPU = TCHitIndices[Params_pT5::kHits * TCIdx + i]; - unsigned int hitIdx = - (trackCandidateType == 8) - ? hitIdxInGPU - : hitIndices[hitIdxInGPU]; // Hit indices are stored differently in the standalone for pLS. - - // For p objects, the 3rd and 4th hit maybe the same, - // due to the way pLS hits are stored in the standalone. - // This is because pixel seeds can be either triplets or quadruplets. - if (trackCandidateType != 4 && hits.size() == 3 && hits.back() == hitIdx) // Remove duplicate 4th hits. - continue; - - hits.push_back(hitIdx); - } - - return hits; -} - void LST::getOutput(Event& event) { std::vector> tc_hitIdxs; std::vector tc_len; std::vector tc_seedIdx; std::vector tc_trackCandidateType; - HitsBuffer& hitsInGPU = event.getHitsInCMSSW(false); // sync on next line - TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW().data(); + HitsBuffer& hitsBuffer = (*event.getHitsInCMSSW(false)); // sync on next line + auto const& trackCandidates = event.getTrackCandidatesInCMSSW()->const_view(); - unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { - short trackCandidateType = trackCandidates->trackCandidateType[idx]; + short trackCandidateType = trackCandidates.trackCandidateType()[idx]; std::vector hit_idx = - getHitIdxs(trackCandidateType, idx, trackCandidates->hitIndices, hitsInGPU.data()->idxs); + getHitIdxs(trackCandidateType, trackCandidates.hitIndices()[idx], hitsBuffer.data()->idxs); tc_hitIdxs.push_back(hit_idx); tc_len.push_back(hit_idx.size()); - tc_seedIdx.push_back(trackCandidates->pixelSeedIndex[idx]); + tc_seedIdx.push_back(trackCandidates.pixelSeedIndex()[idx]); tc_trackCandidateType.push_back(trackCandidateType); } diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 16f36df3257cd..5992124e2fd38 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -1,8 +1,11 @@ #ifndef RecoTracker_LSTCore_src_alpaka_TrackCandidate_h #define RecoTracker_LSTCore_src_alpaka_TrackCandidate_h +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" #include "Triplet.h" #include "Segment.h" @@ -13,6 +16,8 @@ #include "ObjectRanges.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using TrackCandidatesDeviceCollection = PortableCollection<::lst::TrackCandidatesSoA>; + struct TrackCandidates { short* trackCandidateType; // 4-T5 5-pT3 7-pT5 8-pLS unsigned int* directObjectIndices; // Will hold direct indices to each type containers @@ -32,80 +37,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { FPX* centerY; FPX* radius; - template - void setData(TBuff& buf) { - trackCandidateType = buf.trackCandidateType_buf.data(); - directObjectIndices = buf.directObjectIndices_buf.data(); - objectIndices = buf.objectIndices_buf.data(); - nTrackCandidates = buf.nTrackCandidates_buf.data(); - nTrackCandidatespT3 = buf.nTrackCandidatespT3_buf.data(); - nTrackCandidatespT5 = buf.nTrackCandidatespT5_buf.data(); - nTrackCandidatespLS = buf.nTrackCandidatespLS_buf.data(); - nTrackCandidatesT5 = buf.nTrackCandidatesT5_buf.data(); - - logicalLayers = buf.logicalLayers_buf.data(); - hitIndices = buf.hitIndices_buf.data(); - pixelSeedIndex = buf.pixelSeedIndex_buf.data(); - lowerModuleIndices = buf.lowerModuleIndices_buf.data(); - - centerX = buf.centerX_buf.data(); - centerY = buf.centerY_buf.data(); - radius = buf.radius_buf.data(); - } - }; - - template - struct TrackCandidatesBuffer { - Buf trackCandidateType_buf; - Buf directObjectIndices_buf; - Buf objectIndices_buf; - Buf nTrackCandidates_buf; - Buf nTrackCandidatespT3_buf; - Buf nTrackCandidatespT5_buf; - Buf nTrackCandidatespLS_buf; - Buf nTrackCandidatesT5_buf; - - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf pixelSeedIndex_buf; - Buf lowerModuleIndices_buf; - - Buf centerX_buf; - Buf centerY_buf; - Buf radius_buf; - - TrackCandidates data_; - - template - TrackCandidatesBuffer(unsigned int maxTrackCandidates, TDevAcc const& devAccIn, TQueue& queue) - : trackCandidateType_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - directObjectIndices_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - objectIndices_buf(allocBufWrapper(devAccIn, 2 * maxTrackCandidates, queue)), - nTrackCandidates_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatespT3_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatespT5_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatespLS_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatesT5_buf(allocBufWrapper(devAccIn, 1, queue)), - logicalLayers_buf(allocBufWrapper(devAccIn, Params_pT5::kLayers * maxTrackCandidates, queue)), - hitIndices_buf(allocBufWrapper(devAccIn, Params_pT5::kHits * maxTrackCandidates, queue)), - pixelSeedIndex_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - lowerModuleIndices_buf(allocBufWrapper(devAccIn, Params_pT5::kLayers * maxTrackCandidates, queue)), - centerX_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - centerY_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - radius_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)) { - alpaka::memset(queue, nTrackCandidates_buf, 0u); - alpaka::memset(queue, nTrackCandidatesT5_buf, 0u); - alpaka::memset(queue, nTrackCandidatespT3_buf, 0u); - alpaka::memset(queue, nTrackCandidatespT5_buf, 0u); - alpaka::memset(queue, nTrackCandidatespLS_buf, 0u); - alpaka::memset(queue, logicalLayers_buf, 0u); - alpaka::memset(queue, lowerModuleIndices_buf, 0u); - alpaka::memset(queue, hitIndices_buf, 0u); - alpaka::memset(queue, pixelSeedIndex_buf, 0); + void setData(TrackCandidatesSoA::View& view) { + trackCandidateType = view.trackCandidateType(); + directObjectIndices = view.directObjectIndices(); + objectIndices = view.objectIndices()->data(); + nTrackCandidates = &view.nTrackCandidates(); + nTrackCandidatespT3 = &view.nTrackCandidatespT3(); + nTrackCandidatespT5 = &view.nTrackCandidatespT5(); + nTrackCandidatespLS = &view.nTrackCandidatespLS(); + nTrackCandidatesT5 = &view.nTrackCandidatesT5(); + + logicalLayers = view.logicalLayers()->data(); + hitIndices = view.hitIndices()->data(); + pixelSeedIndex = view.pixelSeedIndex(); + lowerModuleIndices = view.lowerModuleIndices()->data(); + + centerX = view.centerX(); + centerY = view.centerY(); + radius = view.radius(); } - - inline TrackCandidates const* data() const { return &data_; } - inline void setData(TrackCandidatesBuffer& buf) { data_.setData(buf); } }; ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, diff --git a/RecoTracker/LSTCore/standalone/bin/lst.cc b/RecoTracker/LSTCore/standalone/bin/lst.cc index c0e52d0a0d194..ca8126c8ae4c4 100644 --- a/RecoTracker/LSTCore/standalone/bin/lst.cc +++ b/RecoTracker/LSTCore/standalone/bin/lst.cc @@ -460,7 +460,9 @@ void run_lst() { if (ana.verbose == 5) { #pragma omp critical - { debugPrintOutlierMultiplicities(events.at(omp_get_thread_num())); } + { + // TODO: debugPrintOutlierMultiplicities + } } if (ana.do_write_ntuple) { diff --git a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc index 426a74babc4d1..d5edc045be4c7 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc +++ b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc @@ -410,11 +410,11 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromTC(Event* event, unsigned int TC) { +std::vector getLSsFromTC(Event* event, unsigned int iTC) { // Get the type of the track candidate - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - short type = trackCandidates->trackCandidateType[TC]; - unsigned int objidx = trackCandidates->directObjectIndices[TC]; + auto const& trackCandidates = event->getTrackCandidates().const_view(); + short type = trackCandidates.trackCandidateType()[iTC]; + unsigned int objidx = trackCandidates.directObjectIndices()[iTC]; switch (type) { case kpT5: return getLSsFrompT5(event, objidx); @@ -433,11 +433,11 @@ std::vector getLSsFromTC(Event* event, unsigned int TC) { //____________________________________________________________________________________________ std::tuple, std::vector> getHitIdxsAndHitTypesFromTC(Event* event, - unsigned TC) { + unsigned iTC) { // Get the type of the track candidate - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - short type = trackCandidates->trackCandidateType[TC]; - unsigned int objidx = trackCandidates->directObjectIndices[TC]; + auto const& trackCandidates = event->getTrackCandidates().const_view(); + short type = trackCandidates.trackCandidateType()[iTC]; + unsigned int objidx = trackCandidates.directObjectIndices()[iTC]; switch (type) { case kpT5: return getHitIdxsAndHitTypesFrompT5(event, objidx); diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc index e12512f5c5c7d..cf71f9a42a1fe 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc @@ -226,8 +226,8 @@ void setOutputBranches(Event* event) { std::vector> tc_matched_simIdx; // ============ Track candidates ============= - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + auto const& trackCandidates = event->getTrackCandidates().const_view(); + unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { // Compute reco quantities of track candidate based on final object int type, isFake; @@ -506,7 +506,7 @@ void setGnnNtupleBranches(Event* event) { Hits const* hitsEvt = event->getHits().data(); Modules const* modules = event->getModules().data(); ObjectRanges const* ranges = event->getRanges().data(); - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); + auto const& trackCandidates = event->getTrackCandidates().const_view(); std::set mds_used_in_sg; std::map md_index_map; @@ -521,7 +521,7 @@ void setGnnNtupleBranches(Event* event) { } std::set lss_used_in_true_tc; - unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { // Only consider true track candidates std::vector hitidxs; @@ -710,8 +710,8 @@ void setGnnNtupleMiniDoublet(Event* event, unsigned int MD) { //________________________________________________________________________________________________________________________________ std::tuple> parseTrackCandidate(Event* event, unsigned int idx) { // Get the type of the track candidate - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - short type = trackCandidates->trackCandidateType[idx]; + auto const& trackCandidates = event->getTrackCandidates().const_view(); + short type = trackCandidates.trackCandidateType()[idx]; enum { pT5 = 7, pT3 = 5, T5 = 4, pLS = 8 }; @@ -744,7 +744,7 @@ std::tuple> parseTrackCandidate( std::tuple, std::vector> parsepT5(Event* event, unsigned int idx) { // Get relevant information - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); + auto const& trackCandidates = event->getTrackCandidates().const_view(); Quintuplets const* quintuplets = event->getQuintuplets().data(); Segments const* segments = event->getSegments().data(); @@ -757,7 +757,7 @@ std::tuple, std::vectordirectObjectIndices[idx]; + unsigned int pT5 = trackCandidates.directObjectIndices()[idx]; unsigned int pLS = getPixelLSFrompT5(event, pT5); unsigned int T5Index = getT5FrompT5(event, pT5); @@ -856,7 +856,7 @@ std::tuple, std::vector, std::vector> parsepT3(Event* event, unsigned int idx) { // Get relevant information - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); + auto const& trackCandidates = event->getTrackCandidates().const_view(); Triplets const* triplets = event->getTriplets().data(); Segments const* segments = event->getSegments().data(); @@ -867,7 +867,7 @@ std::tuple, std::vectordirectObjectIndices[idx]; + unsigned int pT3 = trackCandidates.directObjectIndices()[idx]; unsigned int pLS = getPixelLSFrompT3(event, pT3); unsigned int T3 = getT3FrompT3(event, pT3); @@ -890,9 +890,9 @@ std::tuple, std::vector, std::vector> parseT5(Event* event, unsigned int idx) { - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); + auto const& trackCandidates = event->getTrackCandidates().const_view(); Quintuplets const* quintuplets = event->getQuintuplets().data(); - unsigned int T5 = trackCandidates->directObjectIndices[idx]; + unsigned int T5 = trackCandidates.directObjectIndices()[idx]; std::vector hits = getHitsFromT5(event, T5); // @@ -924,11 +924,11 @@ std::tuple, std::vector, std::vector> parsepLS(Event* event, unsigned int idx) { - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); + auto const& trackCandidates = event->getTrackCandidates().const_view(); Segments const* segments = event->getSegments().data(); // Getting pLS index - unsigned int pLS = trackCandidates->directObjectIndices[idx]; + unsigned int pLS = trackCandidates.directObjectIndices()[idx]; // Getting pt eta and phi float pt = segments->ptIn[pLS]; @@ -1109,30 +1109,3 @@ void printT3s(Event* event) { } std::cout << "VALIDATION nTriplets: " << nTriplets << std::endl; } - -//________________________________________________________________________________________________________________________________ -void debugPrintOutlierMultiplicities(Event* event) { - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - Triplets const* triplets = event->getTriplets().data(); - Segments const* segments = event->getSegments().data(); - MiniDoublets const* miniDoublets = event->getMiniDoublets().data(); - Modules const* modules = event->getModules().data(); - ObjectRanges const* ranges = event->getRanges().data(); - //int nTrackCandidates = 0; - for (unsigned int idx = 0; idx <= *(modules->nLowerModules); ++idx) { - if (trackCandidates->nTrackCandidates[idx] > 50000) { - std::cout << " modules->detIds[modules->lowerModuleIndices[idx]]: " << modules->detIds[idx] << std::endl; - std::cout << " idx: " << idx - << " trackCandidates->nTrackCandidates[idx]: " << trackCandidates->nTrackCandidates[idx] << std::endl; - std::cout << " idx: " << idx << " triplets->nTriplets[idx]: " << triplets->nTriplets[idx] << std::endl; - unsigned int i = idx; //modules->lowerModuleIndices[idx]; - std::cout << " idx: " << idx << " i: " << i << " segments->nSegments[i]: " << segments->nSegments[i] << std::endl; - int nMD = miniDoublets->nMDs[2 * idx] + miniDoublets->nMDs[2 * idx + 1]; - std::cout << " idx: " << idx << " nMD: " << nMD << std::endl; - int nHits = 0; - nHits += ranges->hitRanges[4 * idx + 1] - ranges->hitRanges[4 * idx] + 1; - nHits += ranges->hitRanges[4 * idx + 3] - ranges->hitRanges[4 * idx + 2] + 1; - std::cout << " idx: " << idx << " nHits: " << nHits << std::endl; - } - } -} diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h index 7a25c0d3cbcc6..3f04ec59ad554 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h @@ -52,7 +52,4 @@ void printT3s(LSTEvent* event); void printT4s(LSTEvent* event); void printTCs(LSTEvent* event); -// Print anomalous multiplicities -void debugPrintOutlierMultiplicities(LSTEvent* event); - #endif From 18a1f6e1b476d1a1af2dd61f9cf818247d1a064e Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Wed, 28 Aug 2024 17:18:14 -0700 Subject: [PATCH 2/9] drop -Wshadow in standalone builds: DataFormats/SoATemplate and related generates around 30MB of warnings with only partial TrackCandidate SoA use --- RecoTracker/LSTCore/standalone/LST/Makefile | 6 +++--- RecoTracker/LSTCore/standalone/Makefile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/RecoTracker/LSTCore/standalone/LST/Makefile b/RecoTracker/LSTCore/standalone/LST/Makefile index ba5e19e6a2779..5a282fedff6e2 100644 --- a/RecoTracker/LSTCore/standalone/LST/Makefile +++ b/RecoTracker/LSTCore/standalone/LST/Makefile @@ -44,9 +44,9 @@ LIBS=$(LIB_CPU) $(LIB_CUDA) $(LIB_ROCM) GENCODE_CUDA := -gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_89,code=[sm_89,compute_89] CXX = g++ -CXXFLAGS_CPU = -march=native -mtune=native -Ofast -fno-reciprocal-math -fopenmp-simd -g -Wall -Wshadow -Woverloaded-virtual -fPIC -fopenmp -I.. -CXXFLAGS_CUDA = -O3 -g --compiler-options -Wall --compiler-options -Wshadow --compiler-options -Woverloaded-virtual --compiler-options -fPIC --compiler-options -fopenmp -dc -lineinfo --ptxas-options=-v --cudart shared $(GENCODE_CUDA) --use_fast_math --default-stream per-thread -I.. -CXXFLAGS_ROCM = -O3 -g -Wall -Wshadow -Woverloaded-virtual -fPIC -I${ROCM_ROOT}/include -I.. +CXXFLAGS_CPU = -march=native -mtune=native -Ofast -fno-reciprocal-math -fopenmp-simd -g -Wall -Woverloaded-virtual -fPIC -fopenmp -I.. +CXXFLAGS_CUDA = -O3 -g --compiler-options -Wall --compiler-options -Woverloaded-virtual --compiler-options -fPIC --compiler-options -fopenmp -dc -lineinfo --ptxas-options=-v --cudart shared $(GENCODE_CUDA) --use_fast_math --default-stream per-thread -I.. +CXXFLAGS_ROCM = -O3 -g -Wall -Woverloaded-virtual -fPIC -I${ROCM_ROOT}/include -I.. CMSSWINCLUDE := -I${TRACKLOOPERDIR}/../../../ -I${CMSSW_BASE}/src ifdef CMSSW_RELEASE_BASE CMSSWINCLUDE := ${CMSSWINCLUDE} -I${CMSSW_RELEASE_BASE}/src diff --git a/RecoTracker/LSTCore/standalone/Makefile b/RecoTracker/LSTCore/standalone/Makefile index efcd2483c5eba..c1ba326e2dcc5 100644 --- a/RecoTracker/LSTCore/standalone/Makefile +++ b/RecoTracker/LSTCore/standalone/Makefile @@ -9,7 +9,7 @@ OBJECTS_ROCM=$(SOURCES:.cc=_rocm.o) OBJECTS=$(OBJECTS_CPU) $(OBJECTS_CUDA) $(OBJECTS_ROCM) CXX = g++ -CXXFLAGS = -g -O2 -Wall -fPIC -Wshadow -Woverloaded-virtual -Wno-unused-function -fno-var-tracking -std=c++17 +CXXFLAGS = -g -O2 -Wall -fPIC -Woverloaded-virtual -Wno-unused-function -fno-var-tracking -std=c++17 INCLUDEFLAGS= -ILST -I$(shell pwd) -Icode -Icode/core -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include $(shell rooutil-config --include) -I$(shell root-config --incdir) -I${TRACKLOOPERDIR}/../../../ -I${CMSSW_BASE}/src -I../interface/ -I../interface/alpaka/ -I../src/ -I../src/alpaka/ ifdef CMSSW_RELEASE_BASE INCLUDEFLAGS:= ${INCLUDEFLAGS} -I${CMSSW_RELEASE_BASE}/src From 3e98eb4e71fa9573ccff222ba0b9f18ebc44f809 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Wed, 28 Aug 2024 20:49:20 -0700 Subject: [PATCH 3/9] code checks --- .../LSTCore/interface/TrackCandidatesSoA.h | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h index f40c30323abc1..06b138b3f728f 100644 --- a/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h +++ b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h @@ -9,21 +9,21 @@ namespace lst { GENERATE_SOA_LAYOUT(TrackCandidatesSoALayout, - SOA_COLUMN(short, trackCandidateType), // 4-T5 5-pT3 7-pT5 8-pLS - SOA_COLUMN(unsigned int, directObjectIndices), // direct indices to each type containers - SOA_COLUMN(ArrayUx2, objectIndices), // tracklet and triplet indices - SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), // - SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), // - SOA_COLUMN(int, pixelSeedIndex), // - SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), // - SOA_COLUMN(FPX, centerX), // - SOA_COLUMN(FPX, centerY), // - SOA_COLUMN(FPX, radius), // - SOA_SCALAR(unsigned int, nTrackCandidates), // - SOA_SCALAR(unsigned int, nTrackCandidatespT3), // - SOA_SCALAR(unsigned int, nTrackCandidatespT5), // - SOA_SCALAR(unsigned int, nTrackCandidatespLS), // - SOA_SCALAR(unsigned int, nTrackCandidatesT5)) // + SOA_COLUMN(short, trackCandidateType), // 4-T5 5-pT3 7-pT5 8-pLS + SOA_COLUMN(unsigned int, directObjectIndices), // direct indices to each type containers + SOA_COLUMN(ArrayUx2, objectIndices), // tracklet and triplet indices + SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), // + SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), // + SOA_COLUMN(int, pixelSeedIndex), // + SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), // + SOA_COLUMN(FPX, centerX), // + SOA_COLUMN(FPX, centerY), // + SOA_COLUMN(FPX, radius), // + SOA_SCALAR(unsigned int, nTrackCandidates), // + SOA_SCALAR(unsigned int, nTrackCandidatespT3), // + SOA_SCALAR(unsigned int, nTrackCandidatespT5), // + SOA_SCALAR(unsigned int, nTrackCandidatespLS), // + SOA_SCALAR(unsigned int, nTrackCandidatesT5)) // using TrackCandidatesSoA = TrackCandidatesSoALayout<>; @@ -34,6 +34,6 @@ namespace lst { v.nTrackCandidatespT5() = 0; v.nTrackCandidatespLS() = 0; } - + } // namespace lst #endif From a5822d0c626a43f7292c6000f2a547e5a77f9a3c Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 29 Aug 2024 07:41:06 -0700 Subject: [PATCH 4/9] add a check that device and host match for host=device --- RecoTracker/LSTCore/src/alpaka/TrackCandidate.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 5992124e2fd38..407a155f59f05 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -1,10 +1,12 @@ #ifndef RecoTracker_LSTCore_src_alpaka_TrackCandidate_h #define RecoTracker_LSTCore_src_alpaka_TrackCandidate_h + #include "DataFormats/Portable/interface/alpaka/PortableCollection.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h" #include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" #include "Triplet.h" @@ -537,4 +539,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(lst::TrackCandidatesDeviceCollection, lst::TrackCandidatesHostCollection); + #endif From 8864fd13b58c8dc33c8ab6402d0f937679531990 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 29 Aug 2024 07:42:03 -0700 Subject: [PATCH 5/9] sync DALPAKA flags in standalone builds with cmsdist scram-tools.file/tools/alpaka/alpaka.xml --- RecoTracker/LSTCore/standalone/LST/Makefile | 6 +++--- RecoTracker/LSTCore/standalone/Makefile | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/RecoTracker/LSTCore/standalone/LST/Makefile b/RecoTracker/LSTCore/standalone/LST/Makefile index 5a282fedff6e2..3dd0483edfeb6 100644 --- a/RecoTracker/LSTCore/standalone/LST/Makefile +++ b/RecoTracker/LSTCore/standalone/LST/Makefile @@ -52,9 +52,9 @@ ifdef CMSSW_RELEASE_BASE CMSSWINCLUDE := ${CMSSWINCLUDE} -I${CMSSW_RELEASE_BASE}/src endif ALPAKAINCLUDE = -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include -std=c++17 ${CMSSWINCLUDE} -ALPAKASERIAL = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -ALPAKACUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_ACC_GPU_CUDA_ONLY --expt-relaxed-constexpr -ALPAKAROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_ACC_GPU_HIP_ONLY -DALPAKA_DISABLE_VENDOR_RNG +ALPAKASERIAL = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKACUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_ACC_GPU_CUDA_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 --expt-relaxed-constexpr +ALPAKAROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_ACC_GPU_HIP_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 ROOTINCLUDE = -I$(ROOT_ROOT)/include ROOTCFLAGS = -pthread -m64 $(ROOTINCLUDE) PRINTFLAG = -DT4FromT3 diff --git a/RecoTracker/LSTCore/standalone/Makefile b/RecoTracker/LSTCore/standalone/Makefile index c1ba326e2dcc5..8eb677611513e 100644 --- a/RecoTracker/LSTCore/standalone/Makefile +++ b/RecoTracker/LSTCore/standalone/Makefile @@ -20,9 +20,9 @@ LDFLAGS_ROCM= -L${ROCM_ROOT}/lib -lamdhip64 ALPAKAFLAGS = -DALPAKA_DEBUG=0 CUDAINCLUDE = -I${CUDA_HOME}/include ROCMINCLUDE = -I${ROCM_ROOT}/include -ALPAKA_CPU = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -ALPAKA_CUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_HOST_ONLY -ALPAKA_ROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_HOST_ONLY -DALPAKA_DISABLE_VENDOR_RNG -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ +ALPAKA_CPU = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKA_CUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_HOST_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKA_ROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_HOST_ONLY -DALPAKA_DISABLE_VENDOR_RNG -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 EXTRAFLAGS = -ITMultiDrawTreePlayer -Wunused-variable -lTMVA -lEG -lGenVector -lXMLIO -lMLP -lTreePlayer -fopenmp DOQUINTUPLET = PTCUTFLAG = From af1f68ff3bfde38596b06fad16ff0e5bb4e71d20 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 29 Aug 2024 07:46:09 -0700 Subject: [PATCH 6/9] code checks --- RecoTracker/LSTCore/src/alpaka/TrackCandidate.h | 1 - 1 file changed, 1 deletion(-) diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 407a155f59f05..32b6b3bdee9fa 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -1,7 +1,6 @@ #ifndef RecoTracker_LSTCore_src_alpaka_TrackCandidate_h #define RecoTracker_LSTCore_src_alpaka_TrackCandidate_h - #include "DataFormats/Portable/interface/alpaka/PortableCollection.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" From 155fc82f071f792393fd1f75a28b9b391ac29dd1 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 29 Aug 2024 09:50:08 -0700 Subject: [PATCH 7/9] switch to TrackCandidates = ::lst::TrackCandidatesSoA::View --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 118 ++++++++------- RecoTracker/LSTCore/src/alpaka/Event.h | 2 +- RecoTracker/LSTCore/src/alpaka/LST.dev.cc | 4 +- .../LSTCore/src/alpaka/TrackCandidate.h | 138 +++++++----------- 4 files changed, 119 insertions(+), 143 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 4991210729da3..090f8608e1e02 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -63,7 +63,7 @@ void Event::resetEventSync() { tripletsBuffers_.reset(); quintupletsInGPU_.reset(); quintupletsBuffers_.reset(); - trackCandidatesInGPU_.reset(); + trackCandidatesD_ = nullptr; trackCandidatesDC_.reset(); pixelTripletsInGPU_.reset(); pixelTripletsBuffers_.reset(); @@ -476,12 +476,11 @@ void Event::createTriplets() { } void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { - if (!trackCandidatesInGPU_) { - trackCandidatesInGPU_.emplace(); + if (!trackCandidatesDC_) { trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); auto buf = trackCandidatesDC_->buffer(); - alpaka::memset(queue, buf, 0u); - trackCandidatesInGPU_->setData(trackCandidatesDC_->view()); + alpaka::memset(queue_, buf, 0u); + trackCandidatesD_ = &trackCandidatesDC_->view(); } Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; @@ -505,7 +504,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { AddpT3asTrackCandidatesInGPU{}, nLowerModules_, *pixelTripletsInGPU_, - *trackCandidatesInGPU_, + *trackCandidatesD_, *segmentsInGPU_, *rangesInGPU_); @@ -550,7 +549,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { AddT5asTrackCandidateInGPU{}, nLowerModules_, *quintupletsInGPU_, - *trackCandidatesInGPU_, + *trackCandidatesD_, *rangesInGPU_); if (!no_pls_dupclean) { @@ -573,7 +572,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { *modulesBuffers_.data(), *rangesInGPU_, *pixelTripletsInGPU_, - *trackCandidatesInGPU_, + *trackCandidatesD_, *segmentsInGPU_, *mdsInGPU_, *hitsInGPU_, @@ -588,7 +587,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { addpLSasTrackCandidateInGPU_workDiv, AddpLSasTrackCandidateInGPU{}, nLowerModules_, - *trackCandidatesInGPU_, + *trackCandidatesD_, *segmentsInGPU_, tc_pls_triplets); @@ -598,12 +597,13 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { auto nTrackCanpLSHost_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); auto nTrackCanT5Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); alpaka::memcpy( - queue_, nTrackCanpT5Host_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT5, 1u)); + queue_, nTrackCanpT5Host_buf, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespT5(), 1u)); alpaka::memcpy( - queue_, nTrackCanpT3Host_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT3, 1u)); + queue_, nTrackCanpT3Host_buf, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespT3(), 1u)); alpaka::memcpy( - queue_, nTrackCanpLSHost_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespLS, 1u)); - alpaka::memcpy(queue_, nTrackCanT5Host_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatesT5, 1u)); + queue_, nTrackCanpLSHost_buf, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespLS(), 1u)); + alpaka::memcpy( + queue_, nTrackCanT5Host_buf, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); // wait to get the values before using them auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data(); @@ -824,12 +824,11 @@ void Event::createPixelQuintuplets() { pixelQuintupletsBuffers_.emplace(n_max_pixel_quintuplets, devAcc_, queue_); pixelQuintupletsInGPU_->setData(*pixelQuintupletsBuffers_); } - if (!trackCandidatesInGPU_) { - trackCandidatesInGPU_.emplace(); + if (!trackCandidatesDC_) { trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); auto buf = trackCandidatesDC_->buffer(); - alpaka::memset(queue, buf, 0u); - trackCandidatesInGPU_->setData(trackCandidatesDC_->view()); + alpaka::memset(queue_, buf, 0u); + trackCandidatesD_ = &trackCandidatesDC_->view(); } auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); @@ -938,7 +937,7 @@ void Event::createPixelQuintuplets() { AddpT5asTrackCandidateInGPU{}, nLowerModules_, *pixelQuintupletsInGPU_, - *trackCandidatesInGPU_, + *trackCandidatesD_, *segmentsInGPU_, *rangesInGPU_); @@ -1216,7 +1215,8 @@ unsigned int Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { int Event::getNumberOfTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u)); + alpaka::memcpy( + queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidates(), 1u)); alpaka::wait(queue_); return *nTrackCandidates_buf_h.data(); @@ -1226,7 +1226,7 @@ int Event::getNumberOfPT5TrackCandidates() { auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); alpaka::memcpy( - queue_, nTrackCandidatesPT5_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT5, 1u)); + queue_, nTrackCandidatesPT5_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespT5(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPT5_buf_h.data(); @@ -1236,7 +1236,7 @@ int Event::getNumberOfPT3TrackCandidates() { auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); alpaka::memcpy( - queue_, nTrackCandidatesPT3_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT3, 1u)); + queue_, nTrackCandidatesPT3_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespT3(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPT3_buf_h.data(); @@ -1246,7 +1246,7 @@ int Event::getNumberOfPLSTrackCandidates() { auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); alpaka::memcpy( - queue_, nTrackCandidatesPLS_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespLS, 1u)); + queue_, nTrackCandidatesPLS_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespLS(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPLS_buf_h.data(); @@ -1256,9 +1256,10 @@ int Event::getNumberOfPixelTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u)); alpaka::memcpy( - queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatesT5, 1u)); + queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidates(), 1u)); + alpaka::memcpy( + queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); @@ -1268,7 +1269,7 @@ int Event::getNumberOfT5TrackCandidates() { auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); alpaka::memcpy( - queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatesT5, 1u)); + queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesT5_buf_h.data(); @@ -1556,7 +1557,8 @@ const TrackCandidatesHostCollection& Event::getTrackCandidates(bool sync) { if (!trackCandidatesHC_) { // Get nTrackCanHost parameter to initialize host based instance auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u)); + alpaka::memcpy( + queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidates(), 1u)); trackCandidatesHC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU becomes usable @@ -1565,24 +1567,32 @@ const TrackCandidatesHostCollection& Event::getTrackCandidates(bool sync) { trackCandidatesHC_->view().nTrackCandidates() = nTrackCanHost; alpaka::memcpy( queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().hitIndices()->data(), Params_pT5::kHits * nTrackCanHost), - alpaka::createView(devAcc, trackCandidatesInGPU_->hitIndices, Params_pT5::kHits * nTrackCanHost)); - alpaka::memcpy(queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().pixelSeedIndex(), nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesInGPU_->pixelSeedIndex, nTrackCanHost)); + alpaka::createView(cms::alpakatools::host(), + trackCandidatesHC_->view().hitIndices()->data(), + Params_pT5::kHits * nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesD_->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); alpaka::memcpy( queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesInGPU_->logicalLayers, Params_pT5::kLayers * nTrackCanHost)); - alpaka::memcpy(queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().directObjectIndices(), nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesInGPU_->directObjectIndices, nTrackCanHost)); - alpaka::memcpy(queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().objectIndices()->data(), 2 * nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesInGPU_->objectIndices, 2 * nTrackCanHost)); + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().pixelSeedIndex(), nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesD_->pixelSeedIndex(), nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), + trackCandidatesHC_->view().logicalLayers()->data(), + Params_pT5::kLayers * nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesD_->logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().directObjectIndices(), nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesD_->directObjectIndices(), nTrackCanHost)); alpaka::memcpy(queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().trackCandidateType(), nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesInGPU_->trackCandidateType, nTrackCanHost)); + alpaka::createView( + cms::alpakatools::host(), trackCandidatesHC_->view().objectIndices()->data(), 2 * nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesD_->objectIndices()->data(), 2 * nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().trackCandidateType(), nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesD_->trackCandidateType(), nTrackCanHost)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } @@ -1593,9 +1603,9 @@ const TrackCandidatesHostCollection& Event::getTrackCandidatesInCMSSW(bool sync) if (!trackCandidatesHC_) { // Get nTrackCanHost parameter to initialize host based instance auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u)); - trackCandidatesHC_ = - new ::lst::TrackCandidatesHostCollection(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + alpaka::memcpy( + queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidates(), 1u)); + trackCandidatesHC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU becomes usable auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); @@ -1603,14 +1613,18 @@ const TrackCandidatesHostCollection& Event::getTrackCandidatesInCMSSW(bool sync) trackCandidatesHC_->view().nTrackCandidates() = nTrackCanHost; alpaka::memcpy( queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().hitIndices()->data(), Params_pT5::kHits * nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesInGPU_->hitIndices, Params_pT5::kHits * nTrackCanHost)); - alpaka::memcpy(queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().pixelSeedIndex(), nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesInGPU_->pixelSeedIndex, nTrackCanHost)); - alpaka::memcpy(queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().trackCandidateType(), nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesInGPU_->trackCandidateType, nTrackCanHost)); + alpaka::createView(cms::alpakatools::host(), + trackCandidatesHC_->view().hitIndices()->data(), + Params_pT5::kHits * nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesD_->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().pixelSeedIndex(), nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesD_->pixelSeedIndex(), nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().trackCandidateType(), nTrackCanHost), + alpaka::createView(devAcc_, trackCandidatesD_->trackCandidateType(), nTrackCanHost)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 8108040073af2..3ad1c79cd7ae7 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -55,7 +55,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::optional> tripletsBuffers_; std::optional quintupletsInGPU_; std::optional> quintupletsBuffers_; - std::optional trackCandidatesInGPU_; + TrackCandidates* trackCandidatesD_; // not owned, view from trackCandidatesDC_ std::optional trackCandidatesDC_; std::optional pixelTripletsInGPU_; std::optional> pixelTripletsBuffers_; diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index 028c95e9c8129..2c5cfd499c7e0 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -254,8 +254,8 @@ void LST::getOutput(Event& event) { std::vector tc_seedIdx; std::vector tc_trackCandidateType; - HitsBuffer& hitsBuffer = (*event.getHitsInCMSSW(false)); // sync on next line - auto const& trackCandidates = event.getTrackCandidatesInCMSSW()->const_view(); + HitsBuffer& hitsBuffer = event.getHitsInCMSSW(false); // sync on next line + auto const& trackCandidates = event.getTrackCandidatesInCMSSW().const_view(); unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 32b6b3bdee9fa..1622f746893c7 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -19,66 +19,29 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { using TrackCandidatesDeviceCollection = PortableCollection<::lst::TrackCandidatesSoA>; - struct TrackCandidates { - short* trackCandidateType; // 4-T5 5-pT3 7-pT5 8-pLS - unsigned int* directObjectIndices; // Will hold direct indices to each type containers - unsigned int* objectIndices; // Will hold tracklet and triplet indices - check the type!! - unsigned int* nTrackCandidates; - unsigned int* nTrackCandidatespT3; - unsigned int* nTrackCandidatespT5; - unsigned int* nTrackCandidatespLS; - unsigned int* nTrackCandidatesT5; - - uint8_t* logicalLayers; - unsigned int* hitIndices; - int* pixelSeedIndex; - uint16_t* lowerModuleIndices; - - FPX* centerX; - FPX* centerY; - FPX* radius; - - void setData(TrackCandidatesSoA::View& view) { - trackCandidateType = view.trackCandidateType(); - directObjectIndices = view.directObjectIndices(); - objectIndices = view.objectIndices()->data(); - nTrackCandidates = &view.nTrackCandidates(); - nTrackCandidatespT3 = &view.nTrackCandidatespT3(); - nTrackCandidatespT5 = &view.nTrackCandidatespT5(); - nTrackCandidatespLS = &view.nTrackCandidatespLS(); - nTrackCandidatesT5 = &view.nTrackCandidatesT5(); - - logicalLayers = view.logicalLayers()->data(); - hitIndices = view.hitIndices()->data(); - pixelSeedIndex = view.pixelSeedIndex(); - lowerModuleIndices = view.lowerModuleIndices()->data(); - - centerX = view.centerX(); - centerY = view.centerY(); - radius = view.radius(); - } - }; + using TrackCandidates = ::lst::TrackCandidatesSoA::View; + using TrackCandidatesConst = ::lst::TrackCandidatesSoA::ConstView; - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& cands, unsigned int trackletIndex, unsigned int trackCandidateIndex, uint4 hitIndices, int pixelSeedIndex) { - trackCandidatesInGPU.trackCandidateType[trackCandidateIndex] = 8; // type for pLS - trackCandidatesInGPU.directObjectIndices[trackCandidateIndex] = trackletIndex; - trackCandidatesInGPU.pixelSeedIndex[trackCandidateIndex] = pixelSeedIndex; + cands.trackCandidateType()[trackCandidateIndex] = 8; // type for pLS + cands.directObjectIndices()[trackCandidateIndex] = trackletIndex; + cands.pixelSeedIndex()[trackCandidateIndex] = pixelSeedIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex] = trackletIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex + 1] = trackletIndex; + cands.objectIndices()[trackCandidateIndex][0] = trackletIndex; + cands.objectIndices()[trackCandidateIndex][1] = trackletIndex; - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 0] = + cands.hitIndices()[trackCandidateIndex][0] = hitIndices.x; // Order explanation in https://github.com/SegmentLinking/TrackLooper/issues/267 - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 1] = hitIndices.z; - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 2] = hitIndices.y; - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 3] = hitIndices.w; + cands.hitIndices()[trackCandidateIndex][1] = hitIndices.z; + cands.hitIndices()[trackCandidateIndex][2] = hitIndices.y; + cands.hitIndices()[trackCandidateIndex][3] = hitIndices.w; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(TrackCandidates& cands, short trackCandidateType, unsigned int innerTrackletIndex, unsigned int outerTrackletIndex, @@ -91,12 +54,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float radius, unsigned int trackCandidateIndex, unsigned int directObjectIndex) { - trackCandidatesInGPU.trackCandidateType[trackCandidateIndex] = trackCandidateType; - trackCandidatesInGPU.directObjectIndices[trackCandidateIndex] = directObjectIndex; - trackCandidatesInGPU.pixelSeedIndex[trackCandidateIndex] = pixelSeedIndex; + cands.trackCandidateType()[trackCandidateIndex] = trackCandidateType; + cands.directObjectIndices()[trackCandidateIndex] = directObjectIndex; + cands.pixelSeedIndex()[trackCandidateIndex] = pixelSeedIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex] = innerTrackletIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex + 1] = outerTrackletIndex; + cands.objectIndices()[trackCandidateIndex][0] = innerTrackletIndex; + cands.objectIndices()[trackCandidateIndex][1] = outerTrackletIndex; size_t limits = trackCandidateType == 7 ? Params_pT5::kLayers @@ -104,15 +67,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //send the starting pointer to the logicalLayer and hitIndices for (size_t i = 0; i < limits; i++) { - trackCandidatesInGPU.logicalLayers[Params_pT5::kLayers * trackCandidateIndex + i] = logicalLayerIndices[i]; - trackCandidatesInGPU.lowerModuleIndices[Params_pT5::kLayers * trackCandidateIndex + i] = lowerModuleIndices[i]; + cands.logicalLayers()[trackCandidateIndex][i] = logicalLayerIndices[i]; + cands.lowerModuleIndices()[trackCandidateIndex][i] = lowerModuleIndices[i]; } for (size_t i = 0; i < 2 * limits; i++) { - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + i] = hitIndices[i]; + cands.hitIndices()[trackCandidateIndex][i] = hitIndices[i]; } - trackCandidatesInGPU.centerX[trackCandidateIndex] = __F2H(centerX); - trackCandidatesInGPU.centerY[trackCandidateIndex] = __F2H(centerY); - trackCandidatesInGPU.radius[trackCandidateIndex] = __F2H(radius); + cands.centerX()[trackCandidateIndex] = __F2H(centerX); + cands.centerY()[trackCandidateIndex] = __F2H(centerY); + cands.radius()[trackCandidateIndex] = __F2H(radius); } ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits(unsigned int ix, @@ -257,7 +220,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules modulesInGPU, ObjectRanges rangesInGPU, PixelTriplets pixelTripletsInGPU, - TrackCandidates trackCandidatesInGPU, + TrackCandidates cands, Segments segmentsInGPU, MiniDoublets mdsInGPU, Hits hitsInGPU, @@ -276,11 +239,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float phi1 = segmentsInGPU.phi[pixelArrayIndex]; unsigned int prefix = rangesInGPU.segmentModuleIndices[pixelModuleIndex]; - unsigned int nTrackCandidates = *(trackCandidatesInGPU.nTrackCandidates); + unsigned int nTrackCandidates = cands.nTrackCandidates(); for (unsigned int trackCandidateIndex = globalThreadIdx[1]; trackCandidateIndex < nTrackCandidates; trackCandidateIndex += gridThreadExtent[1]) { - short type = trackCandidatesInGPU.trackCandidateType[trackCandidateIndex]; - unsigned int innerTrackletIdx = trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex]; + short type = cands.trackCandidateType()[trackCandidateIndex]; + unsigned int innerTrackletIdx = cands.objectIndices()[trackCandidateIndex][0]; if (type == 4) // T5 { unsigned int quintupletIndex = innerTrackletIdx; // T5 index @@ -337,7 +300,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, PixelTriplets pixelTripletsInGPU, - TrackCandidates trackCandidatesInGPU, + TrackCandidates cands, Segments segmentsInGPU, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block @@ -355,22 +318,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); if (trackCandidateIdx >= n_max_pixel_track_candidates) // This is done before any non-pixel TCs are added { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pT3"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespT3, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &cands.nTrackCandidatespT3(), 1u, alpaka::hierarchy::Threads{}); float radius = 0.5f * (__H2F(pixelTripletsInGPU.pixelRadius[pixelTripletIndex]) + __H2F(pixelTripletsInGPU.tripletRadius[pixelTripletIndex])); unsigned int pT3PixelIndex = pixelTripletsInGPU.pixelSegmentIndices[pixelTripletIndex]; - addTrackCandidateToMemory(trackCandidatesInGPU, + addTrackCandidateToMemory(cands, 5 /*track candidate type pT3=5*/, pixelTripletIndex, pixelTripletIndex, @@ -393,7 +356,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, Quintuplets quintupletsInGPU, - TrackCandidates trackCandidatesInGPU, + TrackCandidates cands, ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -411,19 +374,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); - if (trackCandidateIdx - *trackCandidatesInGPU.nTrackCandidatespT5 - - *trackCandidatesInGPU.nTrackCandidatespT3 >= + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - cands.nTrackCandidatespT5() - cands.nTrackCandidatespT3() >= n_max_nonpixel_track_candidates) // pT5 and pT3 TCs have been added, but not pLS TCs { #ifdef WARNINGS printf("Track Candidate excess alert! Type = T5"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatesT5, 1u, alpaka::hierarchy::Threads{}); - addTrackCandidateToMemory(trackCandidatesInGPU, + alpaka::atomicAdd(acc, &cands.nTrackCandidatesT5(), 1u, alpaka::hierarchy::Threads{}); + addTrackCandidateToMemory(cands, 4 /*track candidate type T5=4*/, quintupletIndex, quintupletIndex, @@ -446,7 +408,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - TrackCandidates trackCandidatesInGPU, + TrackCandidates cands, Segments segmentsInGPU, bool tc_pls_triplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); @@ -459,19 +421,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); - if (trackCandidateIdx - *trackCandidatesInGPU.nTrackCandidatesT5 >= + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - cands.nTrackCandidatesT5() >= n_max_pixel_track_candidates) // T5 TCs have already been added { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pLS"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespLS, 1u, alpaka::hierarchy::Threads{}); - addpLSTrackCandidateToMemory(trackCandidatesInGPU, + alpaka::atomicAdd(acc, &cands.nTrackCandidatespLS(), 1u, alpaka::hierarchy::Threads{}); + addpLSTrackCandidateToMemory(cands, pixelArrayIndex, trackCandidateIdx, segmentsInGPU.pLSHitsIdxs[pixelArrayIndex], @@ -486,7 +448,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, PixelQuintuplets pixelQuintupletsInGPU, - TrackCandidates trackCandidatesInGPU, + TrackCandidates cands, Segments segmentsInGPU, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block @@ -504,23 +466,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); if (trackCandidateIdx >= n_max_pixel_track_candidates) // No other TCs have been added yet { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pT5"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespT5, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &cands.nTrackCandidatespT5(), 1u, alpaka::hierarchy::Threads{}); float radius = 0.5f * (__H2F(pixelQuintupletsInGPU.pixelRadius[pixelQuintupletIndex]) + __H2F(pixelQuintupletsInGPU.quintupletRadius[pixelQuintupletIndex])); unsigned int pT5PixelIndex = pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex]; addTrackCandidateToMemory( - trackCandidatesInGPU, + cands, 7 /*track candidate type pT5=7*/, pT5PixelIndex, pixelQuintupletsInGPU.T5Indices[pixelQuintupletIndex], From a296da7f2b7112941fe05eb185c44ce37df9f87e Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Tue, 1 Oct 2024 15:46:02 -0700 Subject: [PATCH 8/9] get rid of TrackCandidates* trackCandidatesD_ and simplify syntax --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 108 ++++++++++---------- RecoTracker/LSTCore/src/alpaka/Event.h | 1 - 2 files changed, 52 insertions(+), 57 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 090f8608e1e02..df17991d9a6ce 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -63,7 +63,6 @@ void Event::resetEventSync() { tripletsBuffers_.reset(); quintupletsInGPU_.reset(); quintupletsBuffers_.reset(); - trackCandidatesD_ = nullptr; trackCandidatesDC_.reset(); pixelTripletsInGPU_.reset(); pixelTripletsBuffers_.reset(); @@ -480,7 +479,6 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); auto buf = trackCandidatesDC_->buffer(); alpaka::memset(queue_, buf, 0u); - trackCandidatesD_ = &trackCandidatesDC_->view(); } Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; @@ -504,7 +502,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { AddpT3asTrackCandidatesInGPU{}, nLowerModules_, *pixelTripletsInGPU_, - *trackCandidatesD_, + trackCandidatesDC_->view(), *segmentsInGPU_, *rangesInGPU_); @@ -549,7 +547,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { AddT5asTrackCandidateInGPU{}, nLowerModules_, *quintupletsInGPU_, - *trackCandidatesD_, + trackCandidatesDC_->view(), *rangesInGPU_); if (!no_pls_dupclean) { @@ -572,7 +570,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { *modulesBuffers_.data(), *rangesInGPU_, *pixelTripletsInGPU_, - *trackCandidatesD_, + trackCandidatesDC_->view(), *segmentsInGPU_, *mdsInGPU_, *hitsInGPU_, @@ -587,7 +585,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { addpLSasTrackCandidateInGPU_workDiv, AddpLSasTrackCandidateInGPU{}, nLowerModules_, - *trackCandidatesD_, + trackCandidatesDC_->view(), *segmentsInGPU_, tc_pls_triplets); @@ -597,13 +595,13 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { auto nTrackCanpLSHost_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); auto nTrackCanT5Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); alpaka::memcpy( - queue_, nTrackCanpT5Host_buf, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespT5(), 1u)); + queue_, nTrackCanpT5Host_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT5(), 1u)); alpaka::memcpy( - queue_, nTrackCanpT3Host_buf, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespT3(), 1u)); + queue_, nTrackCanpT3Host_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT3(), 1u)); alpaka::memcpy( - queue_, nTrackCanpLSHost_buf, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespLS(), 1u)); + queue_, nTrackCanpLSHost_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespLS(), 1u)); alpaka::memcpy( - queue_, nTrackCanT5Host_buf, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatesT5(), 1u)); + queue_, nTrackCanT5Host_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); // wait to get the values before using them auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data(); @@ -828,7 +826,6 @@ void Event::createPixelQuintuplets() { trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); auto buf = trackCandidatesDC_->buffer(); alpaka::memset(queue_, buf, 0u); - trackCandidatesD_ = &trackCandidatesDC_->view(); } auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); @@ -937,7 +934,7 @@ void Event::createPixelQuintuplets() { AddpT5asTrackCandidateInGPU{}, nLowerModules_, *pixelQuintupletsInGPU_, - *trackCandidatesD_, + trackCandidatesDC_->view(), *segmentsInGPU_, *rangesInGPU_); @@ -1216,7 +1213,7 @@ int Event::getNumberOfTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); alpaka::memcpy( - queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidates(), 1u)); + queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); alpaka::wait(queue_); return *nTrackCandidates_buf_h.data(); @@ -1225,8 +1222,9 @@ int Event::getNumberOfTrackCandidates() { int Event::getNumberOfPT5TrackCandidates() { auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy( - queue_, nTrackCandidatesPT5_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespT5(), 1u)); + alpaka::memcpy(queue_, + nTrackCandidatesPT5_buf_h, + alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT5(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPT5_buf_h.data(); @@ -1235,8 +1233,9 @@ int Event::getNumberOfPT5TrackCandidates() { int Event::getNumberOfPT3TrackCandidates() { auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy( - queue_, nTrackCandidatesPT3_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespT3(), 1u)); + alpaka::memcpy(queue_, + nTrackCandidatesPT3_buf_h, + alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT3(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPT3_buf_h.data(); @@ -1245,8 +1244,9 @@ int Event::getNumberOfPT3TrackCandidates() { int Event::getNumberOfPLSTrackCandidates() { auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy( - queue_, nTrackCandidatesPLS_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatespLS(), 1u)); + alpaka::memcpy(queue_, + nTrackCandidatesPLS_buf_h, + alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespLS(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPLS_buf_h.data(); @@ -1257,9 +1257,9 @@ int Event::getNumberOfPixelTrackCandidates() { auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); alpaka::memcpy( - queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidates(), 1u)); + queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); alpaka::memcpy( - queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatesT5(), 1u)); + queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); @@ -1269,7 +1269,7 @@ int Event::getNumberOfT5TrackCandidates() { auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); alpaka::memcpy( - queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidatesT5(), 1u)); + queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesT5_buf_h.data(); @@ -1558,41 +1558,39 @@ const TrackCandidatesHostCollection& Event::getTrackCandidates(bool sync) { // Get nTrackCanHost parameter to initialize host based instance auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); alpaka::memcpy( - queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidates(), 1u)); + queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); trackCandidatesHC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU becomes usable auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - trackCandidatesHC_->view().nTrackCandidates() = nTrackCanHost; + (*trackCandidatesHC_)->nTrackCandidates() = nTrackCanHost; alpaka::memcpy( queue_, - alpaka::createView(cms::alpakatools::host(), - trackCandidatesHC_->view().hitIndices()->data(), - Params_pT5::kHits * nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesD_->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); - alpaka::memcpy( - queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().pixelSeedIndex(), nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesD_->pixelSeedIndex(), nTrackCanHost)); + alpaka::createView( + cms::alpakatools::host(), (*trackCandidatesHC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); + alpaka::memcpy(queue_, + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->pixelSeedIndex(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->pixelSeedIndex(), nTrackCanHost)); + alpaka::memcpy(queue_, + alpaka::createView(cms::alpakatools::host(), + (*trackCandidatesHC_)->logicalLayers()->data(), + Params_pT5::kLayers * nTrackCanHost), + alpaka::createView( + devAcc_, (*trackCandidatesDC_)->logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost)); alpaka::memcpy( queue_, - alpaka::createView(cms::alpakatools::host(), - trackCandidatesHC_->view().logicalLayers()->data(), - Params_pT5::kLayers * nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesD_->logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost)); + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->directObjectIndices(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->directObjectIndices(), nTrackCanHost)); alpaka::memcpy( queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().directObjectIndices(), nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesD_->directObjectIndices(), nTrackCanHost)); - alpaka::memcpy(queue_, - alpaka::createView( - cms::alpakatools::host(), trackCandidatesHC_->view().objectIndices()->data(), 2 * nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesD_->objectIndices()->data(), 2 * nTrackCanHost)); + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->objectIndices()->data(), 2 * nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->objectIndices()->data(), 2 * nTrackCanHost)); alpaka::memcpy( queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().trackCandidateType(), nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesD_->trackCandidateType(), nTrackCanHost)); + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->trackCandidateType(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->trackCandidateType(), nTrackCanHost)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } @@ -1604,27 +1602,25 @@ const TrackCandidatesHostCollection& Event::getTrackCandidatesInCMSSW(bool sync) // Get nTrackCanHost parameter to initialize host based instance auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); alpaka::memcpy( - queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, &trackCandidatesD_->nTrackCandidates(), 1u)); + queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); trackCandidatesHC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU becomes usable auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - trackCandidatesHC_->view().nTrackCandidates() = nTrackCanHost; + (*trackCandidatesHC_)->nTrackCandidates() = nTrackCanHost; alpaka::memcpy( queue_, - alpaka::createView(cms::alpakatools::host(), - trackCandidatesHC_->view().hitIndices()->data(), - Params_pT5::kHits * nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesD_->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); - alpaka::memcpy( - queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().pixelSeedIndex(), nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesD_->pixelSeedIndex(), nTrackCanHost)); + alpaka::createView( + cms::alpakatools::host(), (*trackCandidatesHC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); + alpaka::memcpy(queue_, + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->pixelSeedIndex(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->pixelSeedIndex(), nTrackCanHost)); alpaka::memcpy( queue_, - alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().trackCandidateType(), nTrackCanHost), - alpaka::createView(devAcc_, trackCandidatesD_->trackCandidateType(), nTrackCanHost)); + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->trackCandidateType(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->trackCandidateType(), nTrackCanHost)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 3ad1c79cd7ae7..bf63acf55a9bd 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -55,7 +55,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::optional> tripletsBuffers_; std::optional quintupletsInGPU_; std::optional> quintupletsBuffers_; - TrackCandidates* trackCandidatesD_; // not owned, view from trackCandidatesDC_ std::optional trackCandidatesDC_; std::optional pixelTripletsInGPU_; std::optional> pixelTripletsBuffers_; From 69804f049b30839427a81989052e36e825b08cd3 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Fri, 4 Oct 2024 16:04:19 -0700 Subject: [PATCH 9/9] move TrackCandidatesDeviceCollection and related aliases to interface/alpaka --- RecoTracker/LSTCore/interface/TrackCandidatesSoA.h | 11 ++--------- .../alpaka/TrackCandidatesDeviceCollection.h | 10 ++++++++++ RecoTracker/LSTCore/src/alpaka/TrackCandidate.h | 8 +------- 3 files changed, 13 insertions(+), 16 deletions(-) create mode 100644 RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h index 06b138b3f728f..18bea1e51c6ba 100644 --- a/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h +++ b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h @@ -26,14 +26,7 @@ namespace lst { SOA_SCALAR(unsigned int, nTrackCandidatesT5)) // using TrackCandidatesSoA = TrackCandidatesSoALayout<>; - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void initScalars(TrackCandidatesSoA::View& v) { - v.nTrackCandidates() = 0; - v.nTrackCandidatesT5() = 0; - v.nTrackCandidatespT3() = 0; - v.nTrackCandidatespT5() = 0; - v.nTrackCandidatespLS() = 0; - } - + using TrackCandidates = TrackCandidatesSoA::View; + using TrackCandidatesConst = TrackCandidatesSoA::ConstView; } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h new file mode 100644 index 0000000000000..057d86180d967 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesDeviceCollection_h +#define RecoTracker_LSTCore_interface_TrackCandidatesDeviceCollection_h + +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using TrackCandidatesDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 1622f746893c7..204aae25dced5 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -1,12 +1,11 @@ #ifndef RecoTracker_LSTCore_src_alpaka_TrackCandidate_h #define RecoTracker_LSTCore_src_alpaka_TrackCandidate_h -#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" - #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/Module.h" #include "RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h" #include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h" #include "Triplet.h" #include "Segment.h" @@ -17,11 +16,6 @@ #include "ObjectRanges.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - using TrackCandidatesDeviceCollection = PortableCollection<::lst::TrackCandidatesSoA>; - - using TrackCandidates = ::lst::TrackCandidatesSoA::View; - using TrackCandidatesConst = ::lst::TrackCandidatesSoA::ConstView; - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& cands, unsigned int trackletIndex, unsigned int trackCandidateIndex,