diff --git a/RecoTracker/LSTCore/interface/Constants.h b/RecoTracker/LSTCore/interface/Constants.h index 350857ac0b2e5..00a2c83a1ce29 100644 --- a/RecoTracker/LSTCore/interface/Constants.h +++ b/RecoTracker/LSTCore/interface/Constants.h @@ -2,6 +2,15 @@ #define RecoTracker_LSTCore_interface_Constants_h #include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Common/interface/StdArray.h" + +#if defined(FP16_Base) +#if defined ALPAKA_ACC_GPU_CUDA_ENABLED +#include +#elif defined ALPAKA_ACC_GPU_HIP_ENABLED +#include +#endif +#endif #ifdef CACHE_ALLOC #include "HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h" @@ -55,6 +64,17 @@ namespace lst { constexpr unsigned int size_superbins = 45000; +// Half precision wrapper functions. +#if defined(FP16_Base) +#define __F2H __float2half +#define __H2F __half2float + typedef __half float FPX; +#else +#define __F2H +#define __H2F + typedef float FPX; +#endif + // Defining the constant host device variables right up here // Currently pixel tracks treated as LSs with 2 double layers (IT layers 1+2 and 3+4) and 4 hits. To be potentially handled better in the future. struct Params_pLS { @@ -74,8 +94,13 @@ namespace lst { }; struct Params_pT5 { static constexpr int kLayers = 7, kHits = 14; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; }; + using ArrayUx2 = edm::StdArray; + } //namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h b/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h new file mode 100644 index 0000000000000..3ffd2bedf945e --- /dev/null +++ b/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h +#define RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h + +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using TrackCandidatesHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h new file mode 100644 index 0000000000000..18bea1e51c6ba --- /dev/null +++ b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h @@ -0,0 +1,32 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesSoA_h +#define RecoTracker_LSTCore_interface_TrackCandidatesSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + GENERATE_SOA_LAYOUT(TrackCandidatesSoALayout, + SOA_COLUMN(short, trackCandidateType), // 4-T5 5-pT3 7-pT5 8-pLS + SOA_COLUMN(unsigned int, directObjectIndices), // direct indices to each type containers + SOA_COLUMN(ArrayUx2, objectIndices), // tracklet and triplet indices + SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), // + SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), // + SOA_COLUMN(int, pixelSeedIndex), // + SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), // + SOA_COLUMN(FPX, centerX), // + SOA_COLUMN(FPX, centerY), // + SOA_COLUMN(FPX, radius), // + SOA_SCALAR(unsigned int, nTrackCandidates), // + SOA_SCALAR(unsigned int, nTrackCandidatespT3), // + SOA_SCALAR(unsigned int, nTrackCandidatespT5), // + SOA_SCALAR(unsigned int, nTrackCandidatespLS), // + SOA_SCALAR(unsigned int, nTrackCandidatesT5)) // + + using TrackCandidatesSoA = TrackCandidatesSoALayout<>; + using TrackCandidates = TrackCandidatesSoA::View; + using TrackCandidatesConst = TrackCandidatesSoA::ConstView; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h index 1a16dad68420e..4477c5232608b 100644 --- a/RecoTracker/LSTCore/interface/alpaka/Constants.h +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -3,27 +3,10 @@ #include "RecoTracker/LSTCore/interface/Constants.h" -#if defined ALPAKA_ACC_GPU_CUDA_ENABLED -#include -#elif defined ALPAKA_ACC_GPU_HIP_ENABLED -#include -#endif - namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { using namespace ::lst; -// Half precision wrapper functions. -#if defined(FP16_Base) -#define __F2H __float2half -#define __H2F __half2float - typedef __half float FPX; -#else -#define __F2H -#define __H2F - typedef float FPX; -#endif - Vec3D constexpr elementsPerThread(Vec3D::all(static_cast(1))); // Needed for files that are compiled by g++ to not throw an error. diff --git a/RecoTracker/LSTCore/interface/alpaka/LST.h b/RecoTracker/LSTCore/interface/alpaka/LST.h index 1f3c08804540f..df1319462432e 100644 --- a/RecoTracker/LSTCore/interface/alpaka/LST.h +++ b/RecoTracker/LSTCore/interface/alpaka/LST.h @@ -66,10 +66,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::vector const& ph2_z); void getOutput(Event& event); - std::vector getHitIdxs(short trackCandidateType, - unsigned int TCIdx, - unsigned int const* TCHitIndices, - unsigned int const* hitIndices); // Input and output vectors std::vector in_trkX_; diff --git a/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h new file mode 100644 index 0000000000000..057d86180d967 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesDeviceCollection_h +#define RecoTracker_LSTCore_interface_TrackCandidatesDeviceCollection_h + +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using TrackCandidatesDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index a575494d7ba07..d05a2e909f690 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -62,8 +62,7 @@ void Event::resetEventSync() { tripletsBuffers_.reset(); quintupletsInGPU_.reset(); quintupletsBuffers_.reset(); - trackCandidatesInGPU_.reset(); - trackCandidatesBuffers_.reset(); + trackCandidatesDC_.reset(); pixelTripletsInGPU_.reset(); pixelTripletsBuffers_.reset(); pixelQuintupletsInGPU_.reset(); @@ -77,7 +76,7 @@ void Event::resetEventSync() { quintupletsInCPU_.reset(); pixelTripletsInCPU_.reset(); pixelQuintupletsInCPU_.reset(); - trackCandidatesInCPU_.reset(); + trackCandidatesHC_.reset(); modulesInCPU_.reset(); } @@ -487,10 +486,10 @@ void Event::createTriplets() { } void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { - if (!trackCandidatesInGPU_) { - trackCandidatesInGPU_.emplace(); - trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_); - trackCandidatesInGPU_->setData(*trackCandidatesBuffers_); + if (!trackCandidatesDC_) { + trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + auto buf = trackCandidatesDC_->buffer(); + alpaka::memset(queue_, buf, 0u); } Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; @@ -514,7 +513,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { AddpT3asTrackCandidatesInGPU{}, nLowerModules_, *pixelTripletsInGPU_, - *trackCandidatesInGPU_, + trackCandidatesDC_->view(), *segmentsInGPU_, *rangesInGPU_); @@ -559,7 +558,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { AddT5asTrackCandidateInGPU{}, nLowerModules_, *quintupletsInGPU_, - *trackCandidatesInGPU_, + trackCandidatesDC_->view(), *rangesInGPU_); if (!no_pls_dupclean) { @@ -582,7 +581,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { *modulesBuffers_.data(), *rangesInGPU_, *pixelTripletsInGPU_, - *trackCandidatesInGPU_, + trackCandidatesDC_->view(), *segmentsInGPU_, mdsDev_->const_view(), *hitsInGPU_, @@ -597,7 +596,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { addpLSasTrackCandidateInGPU_workDiv, AddpLSasTrackCandidateInGPU{}, nLowerModules_, - *trackCandidatesInGPU_, + trackCandidatesDC_->view(), *segmentsInGPU_, tc_pls_triplets); @@ -606,10 +605,14 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { auto nTrackCanpT3Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); auto nTrackCanpLSHost_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); auto nTrackCanT5Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); - alpaka::memcpy(queue_, nTrackCanpT5Host_buf, trackCandidatesBuffers_->nTrackCandidatespT5_buf); - alpaka::memcpy(queue_, nTrackCanpT3Host_buf, trackCandidatesBuffers_->nTrackCandidatespT3_buf); - alpaka::memcpy(queue_, nTrackCanpLSHost_buf, trackCandidatesBuffers_->nTrackCandidatespLS_buf); - alpaka::memcpy(queue_, nTrackCanT5Host_buf, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::memcpy( + queue_, nTrackCanpT5Host_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT5(), 1u)); + alpaka::memcpy( + queue_, nTrackCanpT3Host_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT3(), 1u)); + alpaka::memcpy( + queue_, nTrackCanpLSHost_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespLS(), 1u)); + alpaka::memcpy( + queue_, nTrackCanT5Host_buf, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); // wait to get the values before using them auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data(); @@ -830,10 +833,10 @@ void Event::createPixelQuintuplets() { pixelQuintupletsBuffers_.emplace(n_max_pixel_quintuplets, devAcc_, queue_); pixelQuintupletsInGPU_->setData(*pixelQuintupletsBuffers_); } - if (!trackCandidatesInGPU_) { - trackCandidatesInGPU_.emplace(); - trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_); - trackCandidatesInGPU_->setData(*trackCandidatesBuffers_); + if (!trackCandidatesDC_) { + trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + auto buf = trackCandidatesDC_->buffer(); + alpaka::memset(queue_, buf, 0u); } auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); @@ -942,7 +945,7 @@ void Event::createPixelQuintuplets() { AddpT5asTrackCandidateInGPU{}, nLowerModules_, *pixelQuintupletsInGPU_, - *trackCandidatesInGPU_, + trackCandidatesDC_->view(), *segmentsInGPU_, *rangesInGPU_); @@ -1221,7 +1224,8 @@ unsigned int Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { int Event::getNumberOfTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); + alpaka::memcpy( + queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); alpaka::wait(queue_); return *nTrackCandidates_buf_h.data(); @@ -1230,7 +1234,9 @@ int Event::getNumberOfTrackCandidates() { int Event::getNumberOfPT5TrackCandidates() { auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers_->nTrackCandidatespT5_buf); + alpaka::memcpy(queue_, + nTrackCandidatesPT5_buf_h, + alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT5(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPT5_buf_h.data(); @@ -1239,7 +1245,9 @@ int Event::getNumberOfPT5TrackCandidates() { int Event::getNumberOfPT3TrackCandidates() { auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers_->nTrackCandidatespT3_buf); + alpaka::memcpy(queue_, + nTrackCandidatesPT3_buf_h, + alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespT3(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPT3_buf_h.data(); @@ -1248,7 +1256,9 @@ int Event::getNumberOfPT3TrackCandidates() { int Event::getNumberOfPLSTrackCandidates() { auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers_->nTrackCandidatespLS_buf); + alpaka::memcpy(queue_, + nTrackCandidatesPLS_buf_h, + alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatespLS(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesPLS_buf_h.data(); @@ -1258,8 +1268,10 @@ int Event::getNumberOfPixelTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); - alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::memcpy( + queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); + alpaka::memcpy( + queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); @@ -1268,7 +1280,8 @@ int Event::getNumberOfPixelTrackCandidates() { int Event::getNumberOfT5TrackCandidates() { auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::memcpy( + queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidatesT5(), 1u)); alpaka::wait(queue_); return *nTrackCandidatesT5_buf_h.data(); @@ -1547,74 +1560,78 @@ PixelQuintupletsBuffer& Event::getPixelQuintuplets(bool return pixelQuintupletsInCPU_.value(); } -TrackCandidatesBuffer& Event::getTrackCandidates(bool sync) { - if (!trackCandidatesInCPU_) { - // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_ +const TrackCandidatesHostCollection& Event::getTrackCandidates(bool sync) { + if (!trackCandidatesHC_) { + // Get nTrackCanHost parameter to initialize host based instance auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); - trackCandidatesInCPU_.emplace( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_); - trackCandidatesInCPU_->setData(*trackCandidatesInCPU_); - alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU_ becomes usable + alpaka::memcpy( + queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); + trackCandidatesHC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU becomes usable auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - *trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost; - alpaka::memcpy(queue_, - trackCandidatesInCPU_->hitIndices_buf, - trackCandidatesBuffers_->hitIndices_buf, - Params_pT5::kHits * nTrackCanHost); + (*trackCandidatesHC_)->nTrackCandidates() = nTrackCanHost; alpaka::memcpy( - queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost); - alpaka::memcpy(queue_, - trackCandidatesInCPU_->logicalLayers_buf, - trackCandidatesBuffers_->logicalLayers_buf, - Params_pT5::kLayers * nTrackCanHost); + queue_, + alpaka::createView( + cms::alpakatools::host(), (*trackCandidatesHC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); alpaka::memcpy(queue_, - trackCandidatesInCPU_->directObjectIndices_buf, - trackCandidatesBuffers_->directObjectIndices_buf, - nTrackCanHost); + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->pixelSeedIndex(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->pixelSeedIndex(), nTrackCanHost)); alpaka::memcpy(queue_, - trackCandidatesInCPU_->objectIndices_buf, - trackCandidatesBuffers_->objectIndices_buf, - 2 * nTrackCanHost); - alpaka::memcpy(queue_, - trackCandidatesInCPU_->trackCandidateType_buf, - trackCandidatesBuffers_->trackCandidateType_buf, - nTrackCanHost); + alpaka::createView(cms::alpakatools::host(), + (*trackCandidatesHC_)->logicalLayers()->data(), + Params_pT5::kLayers * nTrackCanHost), + alpaka::createView( + devAcc_, (*trackCandidatesDC_)->logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->directObjectIndices(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->directObjectIndices(), nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->objectIndices()->data(), 2 * nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->objectIndices()->data(), 2 * nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->trackCandidateType(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->trackCandidateType(), nTrackCanHost)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } - return trackCandidatesInCPU_.value(); + return trackCandidatesHC_.value(); } -TrackCandidatesBuffer& Event::getTrackCandidatesInCMSSW(bool sync) { - if (!trackCandidatesInCPU_) { - // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_ +const TrackCandidatesHostCollection& Event::getTrackCandidatesInCMSSW(bool sync) { + if (!trackCandidatesHC_) { + // Get nTrackCanHost parameter to initialize host based instance auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); - trackCandidatesInCPU_.emplace( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_); - trackCandidatesInCPU_->setData(*trackCandidatesInCPU_); - alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU_ becomes usable + alpaka::memcpy( + queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); + trackCandidatesHC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU becomes usable auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - *trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost; - alpaka::memcpy(queue_, - trackCandidatesInCPU_->hitIndices_buf, - trackCandidatesBuffers_->hitIndices_buf, - Params_pT5::kHits * nTrackCanHost); + (*trackCandidatesHC_)->nTrackCandidates() = nTrackCanHost; alpaka::memcpy( - queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost); + queue_, + alpaka::createView( + cms::alpakatools::host(), (*trackCandidatesHC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); alpaka::memcpy(queue_, - trackCandidatesInCPU_->trackCandidateType_buf, - trackCandidatesBuffers_->trackCandidateType_buf, - nTrackCanHost); + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->pixelSeedIndex(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->pixelSeedIndex(), nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->trackCandidateType(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->trackCandidateType(), nTrackCanHost)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } - return trackCandidatesInCPU_.value(); + return trackCandidatesHC_.value(); } ModulesBuffer& Event::getModules(bool isFull, bool sync) { diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 598147ef9b2a3..c07c7003d870a 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -3,6 +3,7 @@ #include +#include "RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/alpaka/LST.h" #include "RecoTracker/LSTCore/interface/Module.h" @@ -53,8 +54,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::optional> tripletsBuffers_; std::optional quintupletsInGPU_; std::optional> quintupletsBuffers_; - std::optional trackCandidatesInGPU_; - std::optional> trackCandidatesBuffers_; + std::optional trackCandidatesDC_; std::optional pixelTripletsInGPU_; std::optional> pixelTripletsBuffers_; std::optional pixelQuintupletsInGPU_; @@ -66,7 +66,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::optional mdsHost_; std::optional> segmentsInCPU_; std::optional> tripletsInCPU_; - std::optional> trackCandidatesInCPU_; + std::optional trackCandidatesHC_; std::optional> modulesInCPU_; std::optional> quintupletsInCPU_; std::optional> pixelTripletsInCPU_; @@ -189,8 +189,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { QuintupletsBuffer& getQuintuplets(bool sync = true); PixelTripletsBuffer& getPixelTriplets(bool sync = true); PixelQuintupletsBuffer& getPixelQuintuplets(bool sync = true); - TrackCandidatesBuffer& getTrackCandidates(bool sync = true); - TrackCandidatesBuffer& getTrackCandidatesInCMSSW(bool sync = true); + const TrackCandidatesHostCollection& getTrackCandidates(bool sync = true); + const TrackCandidatesHostCollection& getTrackCandidatesInCMSSW(bool sync = true); ModulesBuffer& getModules(bool isFull = false, bool sync = true); }; diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index 65543720a1d34..2c5cfd499c7e0 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -18,6 +18,42 @@ namespace { const float vy = dxy * p3.x() / pt - p3.y() / p * p3.z() / p * dz; return {vx, vy, vz}; } + + using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + std::vector getHitIdxs(short trackCandidateType, + Params_pT5::ArrayUxHits const& tcHitIndices, + unsigned int const* hitIndices) { + std::vector hits; + + unsigned int maxNHits = 0; + if (trackCandidateType == 7) + maxNHits = Params_pT5::kHits; // pT5 + else if (trackCandidateType == 5) + maxNHits = Params_pT3::kHits; // pT3 + else if (trackCandidateType == 4) + maxNHits = Params_T5::kHits; // T5 + else if (trackCandidateType == 8) + maxNHits = Params_pLS::kHits; // pLS + + for (unsigned int i = 0; i < maxNHits; i++) { + unsigned int hitIdxInGPU = tcHitIndices[i]; + unsigned int hitIdx = + (trackCandidateType == 8) + ? hitIdxInGPU + : hitIndices[hitIdxInGPU]; // Hit indices are stored differently in the standalone for pLS. + + // For p objects, the 3rd and 4th hit maybe the same, + // due to the way pLS hits are stored in the standalone. + // This is because pixel seeds can be either triplets or quadruplets. + if (trackCandidateType != 4 && hits.size() == 3 && hits.back() == hitIdx) // Remove duplicate 4th hits. + continue; + + hits.push_back(hitIdx); + } + + return hits; + } + } // namespace void LST::prepareInput(std::vector const& see_px, @@ -212,60 +248,25 @@ void LST::prepareInput(std::vector const& see_px, in_isQuad_vec_ = isQuad_vec; } -std::vector LST::getHitIdxs(short trackCandidateType, - unsigned int TCIdx, - unsigned int const* TCHitIndices, - unsigned int const* hitIndices) { - std::vector hits; - - unsigned int maxNHits = 0; - if (trackCandidateType == 7) - maxNHits = Params_pT5::kHits; // pT5 - else if (trackCandidateType == 5) - maxNHits = Params_pT3::kHits; // pT3 - else if (trackCandidateType == 4) - maxNHits = Params_T5::kHits; // T5 - else if (trackCandidateType == 8) - maxNHits = Params_pLS::kHits; // pLS - - for (unsigned int i = 0; i < maxNHits; i++) { - unsigned int hitIdxInGPU = TCHitIndices[Params_pT5::kHits * TCIdx + i]; - unsigned int hitIdx = - (trackCandidateType == 8) - ? hitIdxInGPU - : hitIndices[hitIdxInGPU]; // Hit indices are stored differently in the standalone for pLS. - - // For p objects, the 3rd and 4th hit maybe the same, - // due to the way pLS hits are stored in the standalone. - // This is because pixel seeds can be either triplets or quadruplets. - if (trackCandidateType != 4 && hits.size() == 3 && hits.back() == hitIdx) // Remove duplicate 4th hits. - continue; - - hits.push_back(hitIdx); - } - - return hits; -} - void LST::getOutput(Event& event) { std::vector> tc_hitIdxs; std::vector tc_len; std::vector tc_seedIdx; std::vector tc_trackCandidateType; - HitsBuffer& hitsInGPU = event.getHitsInCMSSW(false); // sync on next line - TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW().data(); + HitsBuffer& hitsBuffer = event.getHitsInCMSSW(false); // sync on next line + auto const& trackCandidates = event.getTrackCandidatesInCMSSW().const_view(); - unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { - short trackCandidateType = trackCandidates->trackCandidateType[idx]; + short trackCandidateType = trackCandidates.trackCandidateType()[idx]; std::vector hit_idx = - getHitIdxs(trackCandidateType, idx, trackCandidates->hitIndices, hitsInGPU.data()->idxs); + getHitIdxs(trackCandidateType, trackCandidates.hitIndices()[idx], hitsBuffer.data()->idxs); tc_hitIdxs.push_back(hit_idx); tc_len.push_back(hit_idx.size()); - tc_seedIdx.push_back(trackCandidates->pixelSeedIndex[idx]); + tc_seedIdx.push_back(trackCandidates.pixelSeedIndex()[idx]); tc_trackCandidateType.push_back(trackCandidateType); } diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 8a4f85f8a566b..cd267c57a91d0 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -3,6 +3,9 @@ #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h" #include "Triplet.h" #include "Segment.h" @@ -13,121 +16,26 @@ #include "ObjectRanges.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct TrackCandidates { - short* trackCandidateType; // 4-T5 5-pT3 7-pT5 8-pLS - unsigned int* directObjectIndices; // Will hold direct indices to each type containers - unsigned int* objectIndices; // Will hold tracklet and triplet indices - check the type!! - unsigned int* nTrackCandidates; - unsigned int* nTrackCandidatespT3; - unsigned int* nTrackCandidatespT5; - unsigned int* nTrackCandidatespLS; - unsigned int* nTrackCandidatesT5; - - uint8_t* logicalLayers; - unsigned int* hitIndices; - int* pixelSeedIndex; - uint16_t* lowerModuleIndices; - - FPX* centerX; - FPX* centerY; - FPX* radius; - - template - void setData(TBuff& buf) { - trackCandidateType = buf.trackCandidateType_buf.data(); - directObjectIndices = buf.directObjectIndices_buf.data(); - objectIndices = buf.objectIndices_buf.data(); - nTrackCandidates = buf.nTrackCandidates_buf.data(); - nTrackCandidatespT3 = buf.nTrackCandidatespT3_buf.data(); - nTrackCandidatespT5 = buf.nTrackCandidatespT5_buf.data(); - nTrackCandidatespLS = buf.nTrackCandidatespLS_buf.data(); - nTrackCandidatesT5 = buf.nTrackCandidatesT5_buf.data(); - - logicalLayers = buf.logicalLayers_buf.data(); - hitIndices = buf.hitIndices_buf.data(); - pixelSeedIndex = buf.pixelSeedIndex_buf.data(); - lowerModuleIndices = buf.lowerModuleIndices_buf.data(); - - centerX = buf.centerX_buf.data(); - centerY = buf.centerY_buf.data(); - radius = buf.radius_buf.data(); - } - }; - - template - struct TrackCandidatesBuffer { - Buf trackCandidateType_buf; - Buf directObjectIndices_buf; - Buf objectIndices_buf; - Buf nTrackCandidates_buf; - Buf nTrackCandidatespT3_buf; - Buf nTrackCandidatespT5_buf; - Buf nTrackCandidatespLS_buf; - Buf nTrackCandidatesT5_buf; - - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf pixelSeedIndex_buf; - Buf lowerModuleIndices_buf; - - Buf centerX_buf; - Buf centerY_buf; - Buf radius_buf; - - TrackCandidates data_; - - template - TrackCandidatesBuffer(unsigned int maxTrackCandidates, TDevAcc const& devAccIn, TQueue& queue) - : trackCandidateType_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - directObjectIndices_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - objectIndices_buf(allocBufWrapper(devAccIn, 2 * maxTrackCandidates, queue)), - nTrackCandidates_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatespT3_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatespT5_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatespLS_buf(allocBufWrapper(devAccIn, 1, queue)), - nTrackCandidatesT5_buf(allocBufWrapper(devAccIn, 1, queue)), - logicalLayers_buf(allocBufWrapper(devAccIn, Params_pT5::kLayers * maxTrackCandidates, queue)), - hitIndices_buf(allocBufWrapper(devAccIn, Params_pT5::kHits * maxTrackCandidates, queue)), - pixelSeedIndex_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - lowerModuleIndices_buf(allocBufWrapper(devAccIn, Params_pT5::kLayers * maxTrackCandidates, queue)), - centerX_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - centerY_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), - radius_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)) { - alpaka::memset(queue, nTrackCandidates_buf, 0u); - alpaka::memset(queue, nTrackCandidatesT5_buf, 0u); - alpaka::memset(queue, nTrackCandidatespT3_buf, 0u); - alpaka::memset(queue, nTrackCandidatespT5_buf, 0u); - alpaka::memset(queue, nTrackCandidatespLS_buf, 0u); - alpaka::memset(queue, logicalLayers_buf, 0u); - alpaka::memset(queue, lowerModuleIndices_buf, 0u); - alpaka::memset(queue, hitIndices_buf, 0u); - alpaka::memset(queue, pixelSeedIndex_buf, 0); - } - - inline TrackCandidates const* data() const { return &data_; } - inline void setData(TrackCandidatesBuffer& buf) { data_.setData(buf); } - }; - - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& cands, unsigned int trackletIndex, unsigned int trackCandidateIndex, uint4 hitIndices, int pixelSeedIndex) { - trackCandidatesInGPU.trackCandidateType[trackCandidateIndex] = 8; // type for pLS - trackCandidatesInGPU.directObjectIndices[trackCandidateIndex] = trackletIndex; - trackCandidatesInGPU.pixelSeedIndex[trackCandidateIndex] = pixelSeedIndex; + cands.trackCandidateType()[trackCandidateIndex] = 8; // type for pLS + cands.directObjectIndices()[trackCandidateIndex] = trackletIndex; + cands.pixelSeedIndex()[trackCandidateIndex] = pixelSeedIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex] = trackletIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex + 1] = trackletIndex; + cands.objectIndices()[trackCandidateIndex][0] = trackletIndex; + cands.objectIndices()[trackCandidateIndex][1] = trackletIndex; - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 0] = + cands.hitIndices()[trackCandidateIndex][0] = hitIndices.x; // Order explanation in https://github.com/SegmentLinking/TrackLooper/issues/267 - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 1] = hitIndices.z; - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 2] = hitIndices.y; - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 3] = hitIndices.w; + cands.hitIndices()[trackCandidateIndex][1] = hitIndices.z; + cands.hitIndices()[trackCandidateIndex][2] = hitIndices.y; + cands.hitIndices()[trackCandidateIndex][3] = hitIndices.w; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(TrackCandidates& cands, short trackCandidateType, unsigned int innerTrackletIndex, unsigned int outerTrackletIndex, @@ -140,12 +48,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float radius, unsigned int trackCandidateIndex, unsigned int directObjectIndex) { - trackCandidatesInGPU.trackCandidateType[trackCandidateIndex] = trackCandidateType; - trackCandidatesInGPU.directObjectIndices[trackCandidateIndex] = directObjectIndex; - trackCandidatesInGPU.pixelSeedIndex[trackCandidateIndex] = pixelSeedIndex; + cands.trackCandidateType()[trackCandidateIndex] = trackCandidateType; + cands.directObjectIndices()[trackCandidateIndex] = directObjectIndex; + cands.pixelSeedIndex()[trackCandidateIndex] = pixelSeedIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex] = innerTrackletIndex; - trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex + 1] = outerTrackletIndex; + cands.objectIndices()[trackCandidateIndex][0] = innerTrackletIndex; + cands.objectIndices()[trackCandidateIndex][1] = outerTrackletIndex; size_t limits = trackCandidateType == 7 ? Params_pT5::kLayers @@ -153,15 +61,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //send the starting pointer to the logicalLayer and hitIndices for (size_t i = 0; i < limits; i++) { - trackCandidatesInGPU.logicalLayers[Params_pT5::kLayers * trackCandidateIndex + i] = logicalLayerIndices[i]; - trackCandidatesInGPU.lowerModuleIndices[Params_pT5::kLayers * trackCandidateIndex + i] = lowerModuleIndices[i]; + cands.logicalLayers()[trackCandidateIndex][i] = logicalLayerIndices[i]; + cands.lowerModuleIndices()[trackCandidateIndex][i] = lowerModuleIndices[i]; } for (size_t i = 0; i < 2 * limits; i++) { - trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + i] = hitIndices[i]; + cands.hitIndices()[trackCandidateIndex][i] = hitIndices[i]; } - trackCandidatesInGPU.centerX[trackCandidateIndex] = __F2H(centerX); - trackCandidatesInGPU.centerY[trackCandidateIndex] = __F2H(centerY); - trackCandidatesInGPU.radius[trackCandidateIndex] = __F2H(radius); + cands.centerX()[trackCandidateIndex] = __F2H(centerX); + cands.centerY()[trackCandidateIndex] = __F2H(centerY); + cands.radius()[trackCandidateIndex] = __F2H(radius); } ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits( @@ -303,7 +211,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules modulesInGPU, ObjectRanges rangesInGPU, PixelTriplets pixelTripletsInGPU, - TrackCandidates trackCandidatesInGPU, + TrackCandidates cands, Segments segmentsInGPU, MiniDoubletsConst mds, Hits hitsInGPU, @@ -322,11 +230,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float phi1 = segmentsInGPU.phi[pixelArrayIndex]; unsigned int prefix = rangesInGPU.segmentModuleIndices[pixelModuleIndex]; - unsigned int nTrackCandidates = *(trackCandidatesInGPU.nTrackCandidates); + unsigned int nTrackCandidates = cands.nTrackCandidates(); for (unsigned int trackCandidateIndex = globalThreadIdx[1]; trackCandidateIndex < nTrackCandidates; trackCandidateIndex += gridThreadExtent[1]) { - short type = trackCandidatesInGPU.trackCandidateType[trackCandidateIndex]; - unsigned int innerTrackletIdx = trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex]; + short type = cands.trackCandidateType()[trackCandidateIndex]; + unsigned int innerTrackletIdx = cands.objectIndices()[trackCandidateIndex][0]; if (type == 4) // T5 { unsigned int quintupletIndex = innerTrackletIdx; // T5 index @@ -383,7 +291,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, PixelTriplets pixelTripletsInGPU, - TrackCandidates trackCandidatesInGPU, + TrackCandidates cands, Segments segmentsInGPU, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block @@ -401,22 +309,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); if (trackCandidateIdx >= n_max_pixel_track_candidates) // This is done before any non-pixel TCs are added { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pT3"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespT3, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &cands.nTrackCandidatespT3(), 1u, alpaka::hierarchy::Threads{}); float radius = 0.5f * (__H2F(pixelTripletsInGPU.pixelRadius[pixelTripletIndex]) + __H2F(pixelTripletsInGPU.tripletRadius[pixelTripletIndex])); unsigned int pT3PixelIndex = pixelTripletsInGPU.pixelSegmentIndices[pixelTripletIndex]; - addTrackCandidateToMemory(trackCandidatesInGPU, + addTrackCandidateToMemory(cands, 5 /*track candidate type pT3=5*/, pixelTripletIndex, pixelTripletIndex, @@ -439,7 +347,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, Quintuplets quintupletsInGPU, - TrackCandidates trackCandidatesInGPU, + TrackCandidates cands, ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -457,19 +365,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); - if (trackCandidateIdx - *trackCandidatesInGPU.nTrackCandidatespT5 - - *trackCandidatesInGPU.nTrackCandidatespT3 >= + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - cands.nTrackCandidatespT5() - cands.nTrackCandidatespT3() >= n_max_nonpixel_track_candidates) // pT5 and pT3 TCs have been added, but not pLS TCs { #ifdef WARNINGS printf("Track Candidate excess alert! Type = T5"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatesT5, 1u, alpaka::hierarchy::Threads{}); - addTrackCandidateToMemory(trackCandidatesInGPU, + alpaka::atomicAdd(acc, &cands.nTrackCandidatesT5(), 1u, alpaka::hierarchy::Threads{}); + addTrackCandidateToMemory(cands, 4 /*track candidate type T5=4*/, quintupletIndex, quintupletIndex, @@ -492,7 +399,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - TrackCandidates trackCandidatesInGPU, + TrackCandidates cands, Segments segmentsInGPU, bool tc_pls_triplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); @@ -505,19 +412,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); - if (trackCandidateIdx - *trackCandidatesInGPU.nTrackCandidatesT5 >= + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - cands.nTrackCandidatesT5() >= n_max_pixel_track_candidates) // T5 TCs have already been added { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pLS"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespLS, 1u, alpaka::hierarchy::Threads{}); - addpLSTrackCandidateToMemory(trackCandidatesInGPU, + alpaka::atomicAdd(acc, &cands.nTrackCandidatespLS(), 1u, alpaka::hierarchy::Threads{}); + addpLSTrackCandidateToMemory(cands, pixelArrayIndex, trackCandidateIdx, segmentsInGPU.pLSHitsIdxs[pixelArrayIndex], @@ -532,7 +439,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, PixelQuintuplets pixelQuintupletsInGPU, - TrackCandidates trackCandidatesInGPU, + TrackCandidates cands, Segments segmentsInGPU, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block @@ -550,23 +457,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); if (trackCandidateIdx >= n_max_pixel_track_candidates) // No other TCs have been added yet { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pT5"); #endif - alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespT5, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &cands.nTrackCandidatespT5(), 1u, alpaka::hierarchy::Threads{}); float radius = 0.5f * (__H2F(pixelQuintupletsInGPU.pixelRadius[pixelQuintupletIndex]) + __H2F(pixelQuintupletsInGPU.quintupletRadius[pixelQuintupletIndex])); unsigned int pT5PixelIndex = pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex]; addTrackCandidateToMemory( - trackCandidatesInGPU, + cands, 7 /*track candidate type pT5=7*/, pT5PixelIndex, pixelQuintupletsInGPU.T5Indices[pixelQuintupletIndex], @@ -584,4 +491,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(lst::TrackCandidatesDeviceCollection, lst::TrackCandidatesHostCollection); + #endif diff --git a/RecoTracker/LSTCore/standalone/bin/lst.cc b/RecoTracker/LSTCore/standalone/bin/lst.cc index c0e52d0a0d194..ca8126c8ae4c4 100644 --- a/RecoTracker/LSTCore/standalone/bin/lst.cc +++ b/RecoTracker/LSTCore/standalone/bin/lst.cc @@ -460,7 +460,9 @@ void run_lst() { if (ana.verbose == 5) { #pragma omp critical - { debugPrintOutlierMultiplicities(events.at(omp_get_thread_num())); } + { + // TODO: debugPrintOutlierMultiplicities + } } if (ana.do_write_ntuple) { diff --git a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc index 4d82d4ae93f2f..d0496e7067703 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc +++ b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc @@ -410,11 +410,11 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromTC(Event* event, unsigned int TC) { +std::vector getLSsFromTC(Event* event, unsigned int iTC) { // Get the type of the track candidate - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - short type = trackCandidates->trackCandidateType[TC]; - unsigned int objidx = trackCandidates->directObjectIndices[TC]; + auto const& trackCandidates = event->getTrackCandidates().const_view(); + short type = trackCandidates.trackCandidateType()[iTC]; + unsigned int objidx = trackCandidates.directObjectIndices()[iTC]; switch (type) { case kpT5: return getLSsFrompT5(event, objidx); @@ -433,11 +433,11 @@ std::vector getLSsFromTC(Event* event, unsigned int TC) { //____________________________________________________________________________________________ std::tuple, std::vector> getHitIdxsAndHitTypesFromTC(Event* event, - unsigned TC) { + unsigned iTC) { // Get the type of the track candidate - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - short type = trackCandidates->trackCandidateType[TC]; - unsigned int objidx = trackCandidates->directObjectIndices[TC]; + auto const& trackCandidates = event->getTrackCandidates().const_view(); + short type = trackCandidates.trackCandidateType()[iTC]; + unsigned int objidx = trackCandidates.directObjectIndices()[iTC]; switch (type) { case kpT5: return getHitIdxsAndHitTypesFrompT5(event, objidx); diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc index f96f08e1a931d..ef7b698c1afce 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc @@ -226,8 +226,8 @@ void setOutputBranches(Event* event) { std::vector> tc_matched_simIdx; // ============ Track candidates ============= - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + auto const& trackCandidates = event->getTrackCandidates().const_view(); + unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { // Compute reco quantities of track candidate based on final object int type, isFake; @@ -506,7 +506,7 @@ void setGnnNtupleBranches(Event* event) { Hits const* hitsEvt = event->getHits().data(); Modules const* modules = event->getModules().data(); ObjectRanges const* ranges = event->getRanges().data(); - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); + auto const& trackCandidates = event->getTrackCandidates().const_view(); std::set mds_used_in_sg; std::map md_index_map; @@ -521,7 +521,7 @@ void setGnnNtupleBranches(Event* event) { } std::set lss_used_in_true_tc; - unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { // Only consider true track candidates std::vector hitidxs; @@ -710,8 +710,8 @@ void setGnnNtupleMiniDoublet(Event* event, unsigned int MD) { //________________________________________________________________________________________________________________________________ std::tuple> parseTrackCandidate(Event* event, unsigned int idx) { // Get the type of the track candidate - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - short type = trackCandidates->trackCandidateType[idx]; + auto const& trackCandidates = event->getTrackCandidates().const_view(); + short type = trackCandidates.trackCandidateType()[idx]; enum { pT5 = 7, pT3 = 5, T5 = 4, pLS = 8 }; @@ -744,7 +744,7 @@ std::tuple> parseTrackCandidate( std::tuple, std::vector> parsepT5(Event* event, unsigned int idx) { // Get relevant information - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); + auto const& trackCandidates = event->getTrackCandidates().const_view(); Quintuplets const* quintuplets = event->getQuintuplets().data(); Segments const* segments = event->getSegments().data(); @@ -757,7 +757,7 @@ std::tuple, std::vectordirectObjectIndices[idx]; + unsigned int pT5 = trackCandidates.directObjectIndices()[idx]; unsigned int pLS = getPixelLSFrompT5(event, pT5); unsigned int T5Index = getT5FrompT5(event, pT5); @@ -856,7 +856,7 @@ std::tuple, std::vector, std::vector> parsepT3(Event* event, unsigned int idx) { // Get relevant information - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); + auto const& trackCandidates = event->getTrackCandidates().const_view(); Triplets const* triplets = event->getTriplets().data(); Segments const* segments = event->getSegments().data(); @@ -867,7 +867,7 @@ std::tuple, std::vectordirectObjectIndices[idx]; + unsigned int pT3 = trackCandidates.directObjectIndices()[idx]; unsigned int pLS = getPixelLSFrompT3(event, pT3); unsigned int T3 = getT3FrompT3(event, pT3); @@ -890,9 +890,9 @@ std::tuple, std::vector, std::vector> parseT5(Event* event, unsigned int idx) { - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); + auto const& trackCandidates = event->getTrackCandidates().const_view(); Quintuplets const* quintuplets = event->getQuintuplets().data(); - unsigned int T5 = trackCandidates->directObjectIndices[idx]; + unsigned int T5 = trackCandidates.directObjectIndices()[idx]; std::vector hits = getHitsFromT5(event, T5); // @@ -924,11 +924,11 @@ std::tuple, std::vector, std::vector> parsepLS(Event* event, unsigned int idx) { - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); + auto const& trackCandidates = event->getTrackCandidates().const_view(); Segments const* segments = event->getSegments().data(); // Getting pLS index - unsigned int pLS = trackCandidates->directObjectIndices[idx]; + unsigned int pLS = trackCandidates.directObjectIndices()[idx]; // Getting pt eta and phi float pt = segments->ptIn[pLS]; @@ -1110,30 +1110,3 @@ void printT3s(Event* event) { } std::cout << "VALIDATION nTriplets: " << nTriplets << std::endl; } - -//________________________________________________________________________________________________________________________________ -void debugPrintOutlierMultiplicities(Event* event) { - TrackCandidates const* trackCandidates = event->getTrackCandidates().data(); - Triplets const* triplets = event->getTriplets().data(); - Segments const* segments = event->getSegments().data(); - MiniDoubletsOccupancyConst miniDoubletsOccupancy = event->getMiniDoublets(); - Modules const* modules = event->getModules().data(); - ObjectRanges const* ranges = event->getRanges().data(); - //int nTrackCandidates = 0; - for (unsigned int idx = 0; idx <= *(modules->nLowerModules); ++idx) { - if (trackCandidates->nTrackCandidates[idx] > 50000) { - std::cout << " modules->detIds[modules->lowerModuleIndices[idx]]: " << modules->detIds[idx] << std::endl; - std::cout << " idx: " << idx - << " trackCandidates->nTrackCandidates[idx]: " << trackCandidates->nTrackCandidates[idx] << std::endl; - std::cout << " idx: " << idx << " triplets->nTriplets[idx]: " << triplets->nTriplets[idx] << std::endl; - unsigned int i = idx; //modules->lowerModuleIndices[idx]; - std::cout << " idx: " << idx << " i: " << i << " segments->nSegments[i]: " << segments->nSegments[i] << std::endl; - int nMD = miniDoubletsOccupancy.nMDs()[2 * idx] + miniDoubletsOccupancy.nMDs()[2 * idx + 1]; - std::cout << " idx: " << idx << " nMD: " << nMD << std::endl; - int nHits = 0; - nHits += ranges->hitRanges[4 * idx + 1] - ranges->hitRanges[4 * idx] + 1; - nHits += ranges->hitRanges[4 * idx + 3] - ranges->hitRanges[4 * idx + 2] + 1; - std::cout << " idx: " << idx << " nHits: " << nHits << std::endl; - } - } -} diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h index 7a25c0d3cbcc6..3f04ec59ad554 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h @@ -52,7 +52,4 @@ void printT3s(LSTEvent* event); void printT4s(LSTEvent* event); void printTCs(LSTEvent* event); -// Print anomalous multiplicities -void debugPrintOutlierMultiplicities(LSTEvent* event); - #endif