diff --git a/RecoTracker/LSTCore/interface/Constants.h b/RecoTracker/LSTCore/interface/Constants.h index 8fe8d99aa1b29..82e889be9a780 100644 --- a/RecoTracker/LSTCore/interface/Constants.h +++ b/RecoTracker/LSTCore/interface/Constants.h @@ -97,12 +97,21 @@ namespace lst { }; struct Params_T3 { static constexpr int kLayers = 3, kHits = 6; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; }; struct Params_pT3 { static constexpr int kLayers = 5, kHits = 10; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; }; struct Params_T5 { static constexpr int kLayers = 5, kHits = 10; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; }; struct Params_pT5 { static constexpr int kLayers = 7, kHits = 14; diff --git a/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h b/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h new file mode 100644 index 0000000000000..afb2560680621 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsHostCollection_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using PixelQuintupletsHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h b/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h new file mode 100644 index 0000000000000..504594dae6d94 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h @@ -0,0 +1,35 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsSoA_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + GENERATE_SOA_LAYOUT(PixelQuintupletsSoALayout, + SOA_COLUMN(unsigned int, pixelSegmentIndices), + SOA_COLUMN(unsigned int, quintupletIndices), + SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), // lower module index (OT part) + SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(float, rPhiChiSquared), // chi2 from pLS to T5 + SOA_COLUMN(float, rPhiChiSquaredInwards), // chi2 from T5 to pLS + SOA_COLUMN(float, rzChiSquared), + SOA_COLUMN(FPX, pixelRadius), // pLS pt converted + SOA_COLUMN(FPX, quintupletRadius), // T5 circle + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, score), // used for ranking (in e.g. duplicate cleaning) + SOA_COLUMN(FPX, centerX), // T3-based circle center x + SOA_COLUMN(FPX, centerY), // T3-based circle center y + SOA_COLUMN(bool, isDup), + SOA_SCALAR(unsigned int, nPixelQuintuplets), + SOA_SCALAR(unsigned int, totOccupancyPixelQuintuplets)); + + using PixelQuintupletsSoA = PixelQuintupletsSoALayout<>; + using PixelQuintuplets = PixelQuintupletsSoA::View; + using PixelQuintupletsConst = PixelQuintupletsSoA::ConstView; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h b/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h new file mode 100644 index 0000000000000..67678e64bfc03 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsHostCollection_h +#define RecoTracker_LSTCore_interface_PixelTripletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using PixelTripletsHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelTripletsSoA.h b/RecoTracker/LSTCore/interface/PixelTripletsSoA.h new file mode 100644 index 0000000000000..bf940e2cd3bd0 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelTripletsSoA.h @@ -0,0 +1,39 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsSoA_h +#define RecoTracker_LSTCore_interface_PixelTripletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + GENERATE_SOA_LAYOUT(PixelTripletsSoALayout, + SOA_COLUMN(unsigned int, pixelSegmentIndices), + SOA_COLUMN(unsigned int, tripletIndices), + SOA_COLUMN(Params_pT3::ArrayU16xLayers, lowerModuleIndices), // lower module index (OT part) + SOA_COLUMN(Params_pT3::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_pT3::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(float, rPhiChiSquared), // chi2 from pLS to T3 + SOA_COLUMN(float, rPhiChiSquaredInwards), // chi2 from T3 to pLS + SOA_COLUMN(float, rzChiSquared), + SOA_COLUMN(FPX, pixelRadius), // pLS pt converted + SOA_COLUMN(FPX, tripletRadius), // T3 circle + SOA_COLUMN(FPX, pt), + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, eta_pix), // eta from pLS + SOA_COLUMN(FPX, phi_pix), // phi from pLS + SOA_COLUMN(FPX, score), // used for ranking (in e.g. duplicate cleaning) + SOA_COLUMN(FPX, centerX), // T3-based circle center x + SOA_COLUMN(FPX, centerY), // T3-based circle center y + SOA_COLUMN(bool, isDup), + SOA_SCALAR(unsigned int, nPixelTriplets), + SOA_SCALAR(unsigned int, totOccupancyPixelTriplets)); + + using PixelTripletsSoA = PixelTripletsSoALayout<>; + using PixelTriplets = PixelTripletsSoA::View; + using PixelTripletsConst = PixelTripletsSoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h b/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h new file mode 100644 index 0000000000000..734ce03057be7 --- /dev/null +++ b/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsHostCollection_h +#define RecoTracker_LSTCore_interface_QuintupletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using QuintupletsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/QuintupletsSoA.h b/RecoTracker/LSTCore/interface/QuintupletsSoA.h new file mode 100644 index 0000000000000..05da002e5e343 --- /dev/null +++ b/RecoTracker/LSTCore/interface/QuintupletsSoA.h @@ -0,0 +1,46 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsSoA_h +#define RecoTracker_LSTCore_interface_QuintupletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + GENERATE_SOA_LAYOUT(QuintupletsSoALayout, + SOA_COLUMN(ArrayUx2, tripletIndices), // inner and outer triplet indices + SOA_COLUMN(Params_T5::ArrayU16xLayers, lowerModuleIndices), // lower module index in each layer + SOA_COLUMN(Params_T5::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_T5::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(FPX, innerRadius), // inner triplet circle radius + SOA_COLUMN(FPX, bridgeRadius), // "middle"/bridge triplet radius + SOA_COLUMN(FPX, outerRadius), // outer triplet radius + SOA_COLUMN(FPX, pt), + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, score_rphisum), // r-phi based score + SOA_COLUMN(char, isDup), // duplicate flag + SOA_COLUMN(bool, tightCutFlag), // tight pass to be a TC + SOA_COLUMN(bool, partOfPT5), + SOA_COLUMN(float, regressionRadius), + SOA_COLUMN(float, regressionG), + SOA_COLUMN(float, regressionF), + SOA_COLUMN(float, rzChiSquared), // r-z only chi2 + SOA_COLUMN(float, chiSquared), + SOA_COLUMN(float, nonAnchorChiSquared)); + + using QuintupletsSoA = QuintupletsSoALayout<>; + using Quintuplets = QuintupletsSoA::View; + using QuintupletsConst = QuintupletsSoA::ConstView; + + GENERATE_SOA_LAYOUT(QuintupletsOccupancySoALayout, + SOA_COLUMN(unsigned int, nQuintuplets), + SOA_COLUMN(unsigned int, totOccupancyQuintuplets)); + + using QuintupletsOccupancySoA = QuintupletsOccupancySoALayout<>; + using QuintupletsOccupancy = QuintupletsOccupancySoA::View; + using QuintupletsOccupancyConst = QuintupletsOccupancySoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TripletsHostCollection.h b/RecoTracker/LSTCore/interface/TripletsHostCollection.h new file mode 100644 index 0000000000000..6eaebd97e5bf6 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TripletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsHostCollection_h +#define RecoTracker_LSTCore_interface_TripletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using TripletsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TripletsSoA.h b/RecoTracker/LSTCore/interface/TripletsSoA.h new file mode 100644 index 0000000000000..e0407ef3a0912 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TripletsSoA.h @@ -0,0 +1,42 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsSoA_h +#define RecoTracker_LSTCore_interface_TripletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + GENERATE_SOA_LAYOUT(TripletsSoALayout, + SOA_COLUMN(ArrayUx2, segmentIndices), // inner and outer segment indices + SOA_COLUMN(Params_T3::ArrayU16xLayers, lowerModuleIndices), // lower module index in each layer + SOA_COLUMN(Params_T3::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_T3::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(FPX, betaIn), // beta/chord angle of the inner segment + SOA_COLUMN(float, centerX), // lower/anchor-hit based circle center x + SOA_COLUMN(float, centerY), // lower/anchor-hit based circle center y + SOA_COLUMN(float, radius), // lower/anchor-hit based circle radius +#ifdef CUT_VALUE_DEBUG + SOA_COLUMN(float, zOut), + SOA_COLUMN(float, rtOut), + SOA_COLUMN(float, betaInCut), +#endif + SOA_COLUMN(bool, partOfPT5), // is it used in a pT5 + SOA_COLUMN(bool, partOfT5), // is it used in a T5 + SOA_COLUMN(bool, partOfPT3)); // is it used in a pT3 + + using TripletsSoA = TripletsSoALayout<>; + using Triplets = TripletsSoA::View; + using TripletsConst = TripletsSoA::ConstView; + + GENERATE_SOA_LAYOUT(TripletsOccupancySoALayout, + SOA_COLUMN(unsigned int, nTriplets), + SOA_COLUMN(unsigned int, totOccupancyTriplets)); + + using TripletsOccupancySoA = TripletsOccupancySoALayout<>; + using TripletsOccupancy = TripletsOccupancySoA::View; + using TripletsOccupancyConst = TripletsOccupancySoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h new file mode 100644 index 0000000000000..e2553f7b42c50 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsDeviceCollection_h + +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using PixelQuintupletsDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h new file mode 100644 index 0000000000000..ac010b9028ac4 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_PixelTripletsDeviceCollection_h + +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using PixelTripletsDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h new file mode 100644 index 0000000000000..df1aa2e554e2d --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_QuintupletsDeviceCollection_h + +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using QuintupletsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h new file mode 100644 index 0000000000000..ea709a7d78efd --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_TripletsDeviceCollection_h + +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using TripletsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 986271225b4a5..e64a9cda41609 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -2,6 +2,14 @@ #include "Event.h" +#include "MiniDoublet.h" +#include "PixelQuintuplet.h" +#include "PixelTriplet.h" +#include "Quintuplet.h" +#include "Segment.h" +#include "TrackCandidate.h" +#include "Triplet.h" + using Device = ALPAKA_ACCELERATOR_NAMESPACE::Device; using Queue = ALPAKA_ACCELERATOR_NAMESPACE::Queue; using Acc1D = ALPAKA_ACCELERATOR_NAMESPACE::Acc1D; @@ -57,24 +65,20 @@ void Event::resetEventSync() { rangesInGPU_.reset(); rangesBuffers_.reset(); segmentsDC_.reset(); - tripletsInGPU_.reset(); - tripletsBuffers_.reset(); - quintupletsInGPU_.reset(); - quintupletsBuffers_.reset(); + tripletsDC_.reset(); + quintupletsDC_.reset(); trackCandidatesDC_.reset(); - pixelTripletsInGPU_.reset(); - pixelTripletsBuffers_.reset(); - pixelQuintupletsInGPU_.reset(); - pixelQuintupletsBuffers_.reset(); + pixelTripletsDC_.reset(); + pixelQuintupletsDC_.reset(); hitsInCPU_.reset(); rangesInCPU_.reset(); miniDoubletsHC_.reset(); segmentsHC_.reset(); - tripletsInCPU_.reset(); - quintupletsInCPU_.reset(); - pixelTripletsInCPU_.reset(); - pixelQuintupletsInCPU_.reset(); + tripletsHC_.reset(); + quintupletsHC_.reset(); + pixelTripletsHC_.reset(); + pixelQuintupletsHC_.reset(); trackCandidatesHC_.reset(); modulesInCPU_.reset(); } @@ -419,7 +423,7 @@ void Event::createSegmentsWithModuleMap() { } void Event::createTriplets() { - if (!tripletsInGPU_) { + if (!tripletsDC_) { WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue_, @@ -435,11 +439,24 @@ void Event::createTriplets() { alpaka::memcpy(queue_, maxTriplets_buf_h, rangesBuffers_->device_nTotalTrips_buf); alpaka::wait(queue_); // wait to get the value before using it - tripletsInGPU_.emplace(); - tripletsBuffers_.emplace(*maxTriplets_buf_h.data(), nLowerModules_, devAcc_, queue_); - tripletsInGPU_->setData(*tripletsBuffers_); - - alpaka::memcpy(queue_, tripletsBuffers_->nMemoryLocations_buf, maxTriplets_buf_h); + std::array const triplets_sizes{ + {static_cast(*maxTriplets_buf_h.data()), static_cast(nLowerModules_)}}; + tripletsDC_.emplace(triplets_sizes, queue_); + + auto tripletsOccupancy = tripletsDC_->view(); + auto nTriplets_view = + alpaka::createView(devAcc_, tripletsOccupancy.nTriplets(), tripletsOccupancy.metadata().size()); + alpaka::memset(queue_, nTriplets_view, 0u); + auto totOccupancyTriplets_view = + alpaka::createView(devAcc_, tripletsOccupancy.totOccupancyTriplets(), tripletsOccupancy.metadata().size()); + alpaka::memset(queue_, totOccupancyTriplets_view, 0u); + auto triplets = tripletsDC_->view(); + auto partOfPT5_view = alpaka::createView(devAcc_, triplets.partOfPT5(), triplets.metadata().size()); + alpaka::memset(queue_, partOfPT5_view, 0u); + auto partOfT5_view = alpaka::createView(devAcc_, triplets.partOfT5(), triplets.metadata().size()); + alpaka::memset(queue_, partOfT5_view, 0u); + auto partOfPT3_view = alpaka::createView(devAcc_, triplets.partOfPT3(), triplets.metadata().size()); + alpaka::memset(queue_, partOfPT3_view, 0u); } uint16_t nonZeroModules = 0; @@ -481,17 +498,18 @@ void Event::createTriplets() { Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; - WorkDiv3D const createTripletsInGPUv2_workDiv = + WorkDiv3D const createTriplets_workDiv = createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); alpaka::exec(queue_, - createTripletsInGPUv2_workDiv, - CreateTripletsInGPUv2{}, + createTriplets_workDiv, + CreateTriplets{}, *modulesBuffers_.data(), miniDoubletsDC_->const_view(), segmentsDC_->const_view(), segmentsDC_->const_view(), - *tripletsInGPU_, + tripletsDC_->view(), + tripletsDC_->view(), *rangesInGPU_, index_gpu_buf.data(), nonZeroModules); @@ -502,7 +520,7 @@ void Event::createTriplets() { addTripletRangesToEventExplicit_workDiv, AddTripletRangesToEventExplicit{}, *modulesBuffers_.data(), - *tripletsInGPU_, + tripletsDC_->const_view(), *rangesInGPU_); if (addObjects_) { @@ -527,17 +545,17 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { CrossCleanpT3{}, *modulesBuffers_.data(), *rangesInGPU_, - *pixelTripletsInGPU_, + pixelTripletsDC_->view(), segmentsDC_->const_view(), - *pixelQuintupletsInGPU_); + pixelQuintupletsDC_->const_view()); - WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); + WorkDiv1D const addpT3asTrackCandidates_workDiv = createWorkDiv({1}, {512}, {1}); alpaka::exec(queue_, - addpT3asTrackCandidatesInGPU_workDiv, - AddpT3asTrackCandidatesInGPU{}, + addpT3asTrackCandidates_workDiv, + AddpT3asTrackCandidates{}, nLowerModules_, - *pixelTripletsInGPU_, + pixelTripletsDC_->const_view(), trackCandidatesDC_->view(), segmentsDC_->const_view(), *rangesInGPU_); @@ -550,13 +568,14 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; - WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = + WorkDiv3D const removeDupQuintupletsBeforeTC_workDiv = createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); alpaka::exec(queue_, - removeDupQuintupletsInGPUBeforeTC_workDiv, - RemoveDupQuintupletsInGPUBeforeTC{}, - *quintupletsInGPU_, + removeDupQuintupletsBeforeTC_workDiv, + RemoveDupQuintupletsBeforeTC{}, + quintupletsDC_->view(), + quintupletsDC_->view(), *rangesInGPU_); Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; @@ -568,21 +587,23 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { crossCleanT5_workDiv, CrossCleanT5{}, *modulesBuffers_.data(), - *quintupletsInGPU_, - *pixelQuintupletsInGPU_, - *pixelTripletsInGPU_, + quintupletsDC_->view(), + quintupletsDC_->const_view(), + pixelQuintupletsDC_->const_view(), + pixelTripletsDC_->const_view(), *rangesInGPU_); - Vec3D const threadsPerBlock_addT5asTrackCandidateInGPU{1, 8, 128}; - Vec3D const blocksPerGrid_addT5asTrackCandidateInGPU{1, 8, 10}; - WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = createWorkDiv( - blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, elementsPerThread); + Vec3D const threadsPerBlock_addT5asTrackCandidate{1, 8, 128}; + Vec3D const blocksPerGrid_addT5asTrackCandidate{1, 8, 10}; + WorkDiv3D const addT5asTrackCandidate_workDiv = + createWorkDiv(blocksPerGrid_addT5asTrackCandidate, threadsPerBlock_addT5asTrackCandidate, elementsPerThread); alpaka::exec(queue_, - addT5asTrackCandidateInGPU_workDiv, - AddT5asTrackCandidateInGPU{}, + addT5asTrackCandidate_workDiv, + AddT5asTrackCandidate{}, nLowerModules_, - *quintupletsInGPU_, + quintupletsDC_->const_view(), + quintupletsDC_->const_view(), trackCandidatesDC_->view(), *rangesInGPU_); @@ -611,14 +632,14 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { CrossCleanpLS{}, *modulesBuffers_.data(), *rangesInGPU_, - *pixelTripletsInGPU_, + pixelTripletsDC_->const_view(), trackCandidatesDC_->view(), segmentsDC_->const_view(), segmentsDC_->const_view(), segmentsDC_->view(), miniDoubletsDC_->const_view(), *hitsInGPU_, - *quintupletsInGPU_); + quintupletsDC_->const_view()); Vec3D const threadsPerBlock_addpLSasTrackCandidateInGPU{1, 1, 384}; Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, max_blocks}; @@ -665,10 +686,13 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { } void Event::createPixelTriplets() { - if (!pixelTripletsInGPU_) { - pixelTripletsInGPU_.emplace(); - pixelTripletsBuffers_.emplace(n_max_pixel_triplets, devAcc_, queue_); - pixelTripletsInGPU_->setData(*pixelTripletsBuffers_); + if (!pixelTripletsDC_) { + pixelTripletsDC_.emplace(n_max_pixel_triplets, queue_); + auto nPixelTriplets_view = alpaka::createView(devAcc_, &(*pixelTripletsDC_)->nPixelTriplets(), 1u); + alpaka::memset(queue_, nPixelTriplets_view, 0u); + auto totOccupancyPixelTriplets_view = + alpaka::createView(devAcc_, &(*pixelTripletsDC_)->totOccupancyPixelTriplets(), 1u); + alpaka::memset(queue_, totOccupancyPixelTriplets_view, 0u); } SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); SegmentsPixelConst segmentsPixel = segmentsDC_->view(); @@ -750,19 +774,19 @@ void Event::createPixelTriplets() { Vec3D const threadsPerBlock{1, 4, 32}; Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; - WorkDiv3D const createPixelTripletsInGPUFromMapv2_workDiv = - createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + WorkDiv3D const createPixelTripletsFromMap_workDiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); alpaka::exec(queue_, - createPixelTripletsInGPUFromMapv2_workDiv, - CreatePixelTripletsInGPUFromMapv2{}, + createPixelTripletsFromMap_workDiv, + CreatePixelTripletsFromMap{}, *modulesBuffers_.data(), *rangesInGPU_, miniDoubletsDC_->const_view(), segmentsDC_->const_view(), segmentsDC_->const_view(), - *tripletsInGPU_, - *pixelTripletsInGPU_, + tripletsDC_->view(), + tripletsDC_->const_view(), + pixelTripletsDC_->view(), connectedPixelSize_dev_buf.data(), connectedPixelIndex_dev_buf.data(), nInnerSegments); @@ -770,7 +794,7 @@ void Event::createPixelTriplets() { #ifdef WARNINGS auto nPixelTriplets_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); - alpaka::memcpy(queue_, nPixelTriplets_buf, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::memcpy(queue_, nPixelTriplets_buf, alpaka::createView(devAcc_, &(*pixelTripletsDC_)->nPixelTriplets(), 1u)); alpaka::wait(queue_); // wait to get the value before using it std::cout << "number of pixel triplets = " << *nPixelTriplets_buf.data() << std::endl; @@ -780,21 +804,21 @@ void Event::createPixelTriplets() { Vec3D const threadsPerBlockDupPixTrip{1, 16, 16}; //seems like more blocks lead to conflicting writes Vec3D const blocksPerGridDupPixTrip{1, 40, 1}; - WorkDiv3D const removeDupPixelTripletsInGPUFromMap_workDiv = + WorkDiv3D const removeDupPixelTripletsFromMap_workDiv = createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); alpaka::exec( - queue_, removeDupPixelTripletsInGPUFromMap_workDiv, RemoveDupPixelTripletsInGPUFromMap{}, *pixelTripletsInGPU_); + queue_, removeDupPixelTripletsFromMap_workDiv, RemoveDupPixelTripletsFromMap{}, pixelTripletsDC_->view()); } void Event::createQuintuplets() { - WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createEligibleModulesListForQuintuplets_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue_, - createEligibleModulesListForQuintupletsGPU_workDiv, - CreateEligibleModulesListForQuintupletsGPU{}, + createEligibleModulesListForQuintuplets_workDiv, + CreateEligibleModulesListForQuintuplets{}, *modulesBuffers_.data(), - *tripletsInGPU_, + tripletsDC_->const_view(), *rangesInGPU_); auto nEligibleT5Modules_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); @@ -807,40 +831,54 @@ void Event::createQuintuplets() { auto nEligibleT5Modules = *nEligibleT5Modules_buf.data(); auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); - if (!quintupletsInGPU_) { - quintupletsInGPU_.emplace(); - quintupletsBuffers_.emplace(nTotalQuintuplets, nLowerModules_, devAcc_, queue_); - quintupletsInGPU_->setData(*quintupletsBuffers_); - - alpaka::memcpy(queue_, quintupletsBuffers_->nMemoryLocations_buf, nTotalQuintuplets_buf); + if (!quintupletsDC_) { + std::array const quintuplets_sizes{{static_cast(nTotalQuintuplets), static_cast(nLowerModules_)}}; + quintupletsDC_.emplace(quintuplets_sizes, queue_); + auto quintupletsOccupancy = quintupletsDC_->view(); + auto nQuintuplets_view = + alpaka::createView(devAcc_, quintupletsOccupancy.nQuintuplets(), quintupletsOccupancy.metadata().size()); + alpaka::memset(queue_, nQuintuplets_view, 0u); + auto totOccupancyQuintuplets_view = alpaka::createView( + devAcc_, quintupletsOccupancy.totOccupancyQuintuplets(), quintupletsOccupancy.metadata().size()); + alpaka::memset(queue_, totOccupancyQuintuplets_view, 0u); + auto quintuplets = quintupletsDC_->view(); + auto isDup_view = alpaka::createView(devAcc_, quintuplets.isDup(), quintuplets.metadata().size()); + alpaka::memset(queue_, isDup_view, 0u); + auto tightCutFlag_view = alpaka::createView(devAcc_, quintuplets.tightCutFlag(), quintuplets.metadata().size()); + alpaka::memset(queue_, tightCutFlag_view, 0u); + auto partOfPT5_view = alpaka::createView(devAcc_, quintuplets.partOfPT5(), quintuplets.metadata().size()); + alpaka::memset(queue_, partOfPT5_view, 0u); } Vec3D const threadsPerBlockQuints{1, 8, 32}; Vec3D const blocksPerGridQuints{std::max((int)nEligibleT5Modules, 1), 1, 1}; - WorkDiv3D const createQuintupletsInGPUv2_workDiv = + WorkDiv3D const createQuintuplets_workDiv = createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); alpaka::exec(queue_, - createQuintupletsInGPUv2_workDiv, - CreateQuintupletsInGPUv2{}, + createQuintuplets_workDiv, + CreateQuintuplets{}, *modulesBuffers_.data(), miniDoubletsDC_->const_view(), segmentsDC_->const_view(), - *tripletsInGPU_, - *quintupletsInGPU_, + tripletsDC_->view(), + tripletsDC_->const_view(), + quintupletsDC_->view(), + quintupletsDC_->view(), *rangesInGPU_, nEligibleT5Modules); Vec3D const threadsPerBlockDupQuint{1, 16, 16}; Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1}; - WorkDiv3D const removeDupQuintupletsInGPUAfterBuild_workDiv = + WorkDiv3D const removeDupQuintupletsAfterBuild_workDiv = createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); alpaka::exec(queue_, - removeDupQuintupletsInGPUAfterBuild_workDiv, - RemoveDupQuintupletsInGPUAfterBuild{}, + removeDupQuintupletsAfterBuild_workDiv, + RemoveDupQuintupletsAfterBuild{}, *modulesBuffers_.data(), - *quintupletsInGPU_, + quintupletsDC_->view(), + quintupletsDC_->const_view(), *rangesInGPU_); WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); @@ -849,7 +887,7 @@ void Event::createQuintuplets() { addQuintupletRangesToEventExplicit_workDiv, AddQuintupletRangesToEventExplicit{}, *modulesBuffers_.data(), - *quintupletsInGPU_, + quintupletsDC_->const_view(), *rangesInGPU_); if (addObjects_) { @@ -875,10 +913,13 @@ void Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { } void Event::createPixelQuintuplets() { - if (!pixelQuintupletsInGPU_) { - pixelQuintupletsInGPU_.emplace(); - pixelQuintupletsBuffers_.emplace(n_max_pixel_quintuplets, devAcc_, queue_); - pixelQuintupletsInGPU_->setData(*pixelQuintupletsBuffers_); + if (!pixelQuintupletsDC_) { + pixelQuintupletsDC_.emplace(n_max_pixel_quintuplets, queue_); + auto nPixelQuintuplets_view = alpaka::createView(devAcc_, &(*pixelQuintupletsDC_)->nPixelQuintuplets(), 1u); + alpaka::memset(queue_, nPixelQuintuplets_view, 0u); + auto totOccupancyPixelQuintuplets_view = + alpaka::createView(devAcc_, &(*pixelQuintupletsDC_)->totOccupancyPixelQuintuplets(), 1u); + alpaka::memset(queue_, totOccupancyPixelQuintuplets_view, 0u); } if (!trackCandidatesDC_) { trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); @@ -964,19 +1005,20 @@ void Event::createPixelQuintuplets() { Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; - WorkDiv3D const createPixelQuintupletsInGPUFromMapv2_workDiv = + WorkDiv3D const createPixelQuintupletsFromMap_workDiv = createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); alpaka::exec(queue_, - createPixelQuintupletsInGPUFromMapv2_workDiv, - CreatePixelQuintupletsInGPUFromMapv2{}, + createPixelQuintupletsFromMap_workDiv, + CreatePixelQuintupletsFromMap{}, *modulesBuffers_.data(), miniDoubletsDC_->const_view(), segmentsDC_->const_view(), segmentsDC_->view(), - *tripletsInGPU_, - *quintupletsInGPU_, - *pixelQuintupletsInGPU_, + tripletsDC_->view(), + quintupletsDC_->view(), + quintupletsDC_->const_view(), + pixelQuintupletsDC_->view(), connectedPixelSize_dev_buf.data(), connectedPixelIndex_dev_buf.data(), nInnerSegments, @@ -984,21 +1026,21 @@ void Event::createPixelQuintuplets() { Vec3D const threadsPerBlockDupPix{1, 16, 16}; Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; - WorkDiv3D const removeDupPixelQuintupletsInGPUFromMap_workDiv = + WorkDiv3D const removeDupPixelQuintupletsFromMap_workDiv = createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); alpaka::exec(queue_, - removeDupPixelQuintupletsInGPUFromMap_workDiv, - RemoveDupPixelQuintupletsInGPUFromMap{}, - *pixelQuintupletsInGPU_); + removeDupPixelQuintupletsFromMap_workDiv, + RemoveDupPixelQuintupletsFromMap{}, + pixelQuintupletsDC_->view()); - WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = createWorkDiv({1}, {256}, {1}); + WorkDiv1D const addpT5asTrackCandidate_workDiv = createWorkDiv({1}, {256}, {1}); alpaka::exec(queue_, - addpT5asTrackCandidateInGPU_workDiv, - AddpT5asTrackCandidateInGPU{}, + addpT5asTrackCandidate_workDiv, + AddpT5asTrackCandidate{}, nLowerModules_, - *pixelQuintupletsInGPU_, + pixelQuintupletsDC_->const_view(), trackCandidatesDC_->view(), segmentsDC_->const_view(), *rangesInGPU_); @@ -1006,7 +1048,8 @@ void Event::createPixelQuintuplets() { #ifdef WARNINGS auto nPixelQuintuplets_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); - alpaka::memcpy(queue_, nPixelQuintuplets_buf, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::memcpy( + queue_, nPixelQuintuplets_buf, alpaka::createView(devAcc_, &(*pixelQuintupletsDC_)->nPixelQuintuplets(), 1u)); alpaka::wait(queue_); // wait to get the value before using it std::cout << "number of pixel quintuplets = " << *nPixelQuintuplets_buf.data() << std::endl; @@ -1078,8 +1121,10 @@ void Event::addSegmentsToEventExplicit() { } void Event::addQuintupletsToEventExplicit() { + auto quintupletsOccupancy = quintupletsDC_->const_view(); + auto nQuintuplets_view = alpaka::createView(devAcc_, quintupletsOccupancy.nQuintuplets(), nLowerModules_); auto nQuintupletsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, nQuintupletsCPU_buf, quintupletsBuffers_->nQuintuplets_buf); + alpaka::memcpy(queue_, nQuintupletsCPU_buf, nQuintuplets_view); // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nModules_, queue_); @@ -1110,8 +1155,10 @@ void Event::addQuintupletsToEventExplicit() { } void Event::addTripletsToEventExplicit() { + auto tripletsOccupancy = tripletsDC_->const_view(); + auto nTriplets_view = alpaka::createView(devAcc_, tripletsOccupancy.nTriplets(), nLowerModules_); auto nTripletsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, nTripletsCPU_buf, tripletsBuffers_->nTriplets_buf); + alpaka::memcpy(queue_, nTripletsCPU_buf, nTriplets_view); // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); @@ -1236,7 +1283,7 @@ unsigned int Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { retur int Event::getNumberOfPixelTriplets() { auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nPixelTriplets_buf_h, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::memcpy(queue_, nPixelTriplets_buf_h, alpaka::createView(devAcc_, &(*pixelTripletsDC_)->nPixelTriplets(), 1u)); alpaka::wait(queue_); return *nPixelTriplets_buf_h.data(); @@ -1245,7 +1292,8 @@ int Event::getNumberOfPixelTriplets() { int Event::getNumberOfPixelQuintuplets() { auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::memcpy( + queue_, nPixelQuintuplets_buf_h, alpaka::createView(devAcc_, &(*pixelQuintupletsDC_)->nPixelQuintuplets(), 1u)); alpaka::wait(queue_); return *nPixelQuintuplets_buf_h.data(); @@ -1441,177 +1489,89 @@ template SegmentsConst Event::getSegments(bool); template SegmentsOccupancyConst Event::getSegments(bool); template SegmentsPixelConst Event::getSegments(bool); -TripletsBuffer& Event::getTriplets(bool sync) { - if (!tripletsInCPU_) { - // Get nMemoryLocations parameter to initialize host based tripletsInCPU_ - auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nMemHost_buf_h, tripletsBuffers_->nMemoryLocations_buf); - alpaka::wait(queue_); // wait for the value before using +template +typename TSoA::ConstView Event::getTriplets(bool sync) { + if constexpr (std::is_same_v) { + return tripletsDC_->const_view(); + } else { + if (!tripletsHC_) { + tripletsHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync( + queue_, *tripletsDC_)); - auto const nMemHost = *nMemHost_buf_h.data(); - tripletsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_); - tripletsInCPU_->setData(*tripletsInCPU_); - - alpaka::memcpy(queue_, tripletsInCPU_->nMemoryLocations_buf, tripletsBuffers_->nMemoryLocations_buf); -#ifdef CUT_VALUE_DEBUG - alpaka::memcpy(queue_, tripletsInCPU_->zOut_buf, tripletsBuffers_->zOut_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->zLo_buf, tripletsBuffers_->zLo_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->zHi_buf, tripletsBuffers_->zHi_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->zLoPointed_buf, tripletsBuffers_->zLoPointed_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->zHiPointed_buf, tripletsBuffers_->zHiPointed_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->dPhiCut_buf, tripletsBuffers_->dPhiCut_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->betaInCut_buf, tripletsBuffers_->betaInCut_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->rtLo_buf, tripletsBuffers_->rtLo_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->rtHi_buf, tripletsBuffers_->rtHi_buf, nMemHost); -#endif - alpaka::memcpy( - queue_, tripletsInCPU_->hitIndices_buf, tripletsBuffers_->hitIndices_buf, Params_T3::kHits * nMemHost); - alpaka::memcpy( - queue_, tripletsInCPU_->logicalLayers_buf, tripletsBuffers_->logicalLayers_buf, Params_T3::kLayers * nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->segmentIndices_buf, tripletsBuffers_->segmentIndices_buf, 2 * nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->betaIn_buf, tripletsBuffers_->betaIn_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->circleRadius_buf, tripletsBuffers_->circleRadius_buf, nMemHost); - alpaka::memcpy(queue_, tripletsInCPU_->nTriplets_buf, tripletsBuffers_->nTriplets_buf); - alpaka::memcpy(queue_, tripletsInCPU_->totOccupancyTriplets_buf, tripletsBuffers_->totOccupancyTriplets_buf); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } } - return tripletsInCPU_.value(); + return tripletsHC_->const_view(); } +template TripletsConst Event::getTriplets(bool); +template TripletsOccupancyConst Event::getTriplets(bool); -QuintupletsBuffer& Event::getQuintuplets(bool sync) { - if (!quintupletsInCPU_) { - // Get nMemoryLocations parameter to initialize host based quintupletsInCPU_ - auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nMemHost_buf_h, quintupletsBuffers_->nMemoryLocations_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nMemHost = *nMemHost_buf_h.data(); - quintupletsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_); - quintupletsInCPU_->setData(*quintupletsInCPU_); +template +typename TSoA::ConstView Event::getQuintuplets(bool sync) { + if constexpr (std::is_same_v) { + return quintupletsDC_->const_view(); + } else { + if (!quintupletsHC_) { + quintupletsHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync( + queue_, *quintupletsDC_)); - alpaka::memcpy(queue_, quintupletsInCPU_->nMemoryLocations_buf, quintupletsBuffers_->nMemoryLocations_buf); - alpaka::memcpy(queue_, quintupletsInCPU_->nQuintuplets_buf, quintupletsBuffers_->nQuintuplets_buf); - alpaka::memcpy( - queue_, quintupletsInCPU_->totOccupancyQuintuplets_buf, quintupletsBuffers_->totOccupancyQuintuplets_buf); - alpaka::memcpy( - queue_, quintupletsInCPU_->tripletIndices_buf, quintupletsBuffers_->tripletIndices_buf, 2 * nMemHost); - alpaka::memcpy(queue_, - quintupletsInCPU_->lowerModuleIndices_buf, - quintupletsBuffers_->lowerModuleIndices_buf, - Params_T5::kLayers * nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->innerRadius_buf, quintupletsBuffers_->innerRadius_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->bridgeRadius_buf, quintupletsBuffers_->bridgeRadius_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->outerRadius_buf, quintupletsBuffers_->outerRadius_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->isDup_buf, quintupletsBuffers_->isDup_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->score_rphisum_buf, quintupletsBuffers_->score_rphisum_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->eta_buf, quintupletsBuffers_->eta_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->phi_buf, quintupletsBuffers_->phi_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->chiSquared_buf, quintupletsBuffers_->chiSquared_buf, nMemHost); - alpaka::memcpy(queue_, quintupletsInCPU_->rzChiSquared_buf, quintupletsBuffers_->rzChiSquared_buf, nMemHost); - alpaka::memcpy( - queue_, quintupletsInCPU_->nonAnchorChiSquared_buf, quintupletsBuffers_->nonAnchorChiSquared_buf, nMemHost); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } } - return quintupletsInCPU_.value(); + return quintupletsHC_->const_view(); } +template QuintupletsConst Event::getQuintuplets(bool); +template QuintupletsOccupancyConst Event::getQuintuplets(bool); -PixelTripletsBuffer& Event::getPixelTriplets(bool sync) { - if (!pixelTripletsInCPU_) { - // Get nPixelTriplets parameter to initialize host based quintupletsInCPU_ - auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nPixelTriplets_buf_h, pixelTripletsBuffers_->nPixelTriplets_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nPixelTriplets = *nPixelTriplets_buf_h.data(); - pixelTripletsInCPU_.emplace(nPixelTriplets, cms::alpakatools::host(), queue_); - pixelTripletsInCPU_->setData(*pixelTripletsInCPU_); +template +PixelTripletsConst Event::getPixelTriplets(bool sync) { + if constexpr (std::is_same_v) { + return pixelTripletsDC_->const_view(); + } else { + if (!pixelTripletsHC_) { + pixelTripletsHC_.emplace(cms::alpakatools::CopyToHost<::PortableCollection>::copyAsync( + queue_, *pixelTripletsDC_)); - alpaka::memcpy(queue_, pixelTripletsInCPU_->nPixelTriplets_buf, pixelTripletsBuffers_->nPixelTriplets_buf); - alpaka::memcpy(queue_, - pixelTripletsInCPU_->totOccupancyPixelTriplets_buf, - pixelTripletsBuffers_->totOccupancyPixelTriplets_buf); - alpaka::memcpy( - queue_, pixelTripletsInCPU_->rzChiSquared_buf, pixelTripletsBuffers_->rzChiSquared_buf, nPixelTriplets); - alpaka::memcpy( - queue_, pixelTripletsInCPU_->rPhiChiSquared_buf, pixelTripletsBuffers_->rPhiChiSquared_buf, nPixelTriplets); - alpaka::memcpy(queue_, - pixelTripletsInCPU_->rPhiChiSquaredInwards_buf, - pixelTripletsBuffers_->rPhiChiSquaredInwards_buf, - nPixelTriplets); - alpaka::memcpy( - queue_, pixelTripletsInCPU_->tripletIndices_buf, pixelTripletsBuffers_->tripletIndices_buf, nPixelTriplets); - alpaka::memcpy(queue_, - pixelTripletsInCPU_->pixelSegmentIndices_buf, - pixelTripletsBuffers_->pixelSegmentIndices_buf, - nPixelTriplets); - alpaka::memcpy( - queue_, pixelTripletsInCPU_->pixelRadius_buf, pixelTripletsBuffers_->pixelRadius_buf, nPixelTriplets); - alpaka::memcpy( - queue_, pixelTripletsInCPU_->tripletRadius_buf, pixelTripletsBuffers_->tripletRadius_buf, nPixelTriplets); - alpaka::memcpy(queue_, pixelTripletsInCPU_->isDup_buf, pixelTripletsBuffers_->isDup_buf, nPixelTriplets); - alpaka::memcpy(queue_, pixelTripletsInCPU_->eta_buf, pixelTripletsBuffers_->eta_buf, nPixelTriplets); - alpaka::memcpy(queue_, pixelTripletsInCPU_->phi_buf, pixelTripletsBuffers_->phi_buf, nPixelTriplets); - alpaka::memcpy(queue_, pixelTripletsInCPU_->score_buf, pixelTripletsBuffers_->score_buf, nPixelTriplets); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } } - return pixelTripletsInCPU_.value(); + return pixelTripletsHC_->const_view(); } +template PixelTripletsConst Event::getPixelTriplets<>(bool); -PixelQuintupletsBuffer& Event::getPixelQuintuplets(bool sync) { - if (!pixelQuintupletsInCPU_) { - // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU_ - auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nPixelQuintuplets = *nPixelQuintuplets_buf_h.data(); - pixelQuintupletsInCPU_.emplace(nPixelQuintuplets, cms::alpakatools::host(), queue_); - pixelQuintupletsInCPU_->setData(*pixelQuintupletsInCPU_); +template +PixelQuintupletsConst Event::getPixelQuintuplets(bool sync) { + if constexpr (std::is_same_v) { + return pixelQuintupletsDC_->const_view(); + } else { + if (!pixelQuintupletsHC_) { + pixelQuintupletsHC_.emplace( + cms::alpakatools::CopyToHost<::PortableCollection>::copyAsync( + queue_, *pixelQuintupletsDC_)); - alpaka::memcpy( - queue_, pixelQuintupletsInCPU_->nPixelQuintuplets_buf, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); - alpaka::memcpy(queue_, - pixelQuintupletsInCPU_->totOccupancyPixelQuintuplets_buf, - pixelQuintupletsBuffers_->totOccupancyPixelQuintuplets_buf); - alpaka::memcpy(queue_, - pixelQuintupletsInCPU_->rzChiSquared_buf, - pixelQuintupletsBuffers_->rzChiSquared_buf, - nPixelQuintuplets); - alpaka::memcpy(queue_, - pixelQuintupletsInCPU_->rPhiChiSquared_buf, - pixelQuintupletsBuffers_->rPhiChiSquared_buf, - nPixelQuintuplets); - alpaka::memcpy(queue_, - pixelQuintupletsInCPU_->rPhiChiSquaredInwards_buf, - pixelQuintupletsBuffers_->rPhiChiSquaredInwards_buf, - nPixelQuintuplets); - alpaka::memcpy(queue_, - pixelQuintupletsInCPU_->pixelIndices_buf, - pixelQuintupletsBuffers_->pixelIndices_buf, - nPixelQuintuplets); - alpaka::memcpy( - queue_, pixelQuintupletsInCPU_->T5Indices_buf, pixelQuintupletsBuffers_->T5Indices_buf, nPixelQuintuplets); - alpaka::memcpy(queue_, pixelQuintupletsInCPU_->isDup_buf, pixelQuintupletsBuffers_->isDup_buf, nPixelQuintuplets); - alpaka::memcpy(queue_, pixelQuintupletsInCPU_->score_buf, pixelQuintupletsBuffers_->score_buf, nPixelQuintuplets); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } } - return pixelQuintupletsInCPU_.value(); + return pixelQuintupletsHC_->const_view(); } +template PixelQuintupletsConst Event::getPixelQuintuplets<>(bool); -const TrackCandidatesHostCollection& Event::getTrackCandidates(bool sync) { +const TrackCandidatesConst& Event::getTrackCandidatesWithSelection(bool inCMSSW, bool sync) { if (!trackCandidatesHC_) { // Get nTrackCanHost parameter to initialize host based instance auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); alpaka::memcpy( queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); - trackCandidatesHC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU becomes usable auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); + trackCandidatesHC_.emplace(nTrackCanHost, queue_); (*trackCandidatesHC_)->nTrackCandidates() = nTrackCanHost; alpaka::memcpy( @@ -1622,50 +1582,22 @@ const TrackCandidatesHostCollection& Event::getTrackCandidates(bool sync) { alpaka::memcpy(queue_, alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->pixelSeedIndex(), nTrackCanHost), alpaka::createView(devAcc_, (*trackCandidatesDC_)->pixelSeedIndex(), nTrackCanHost)); - alpaka::memcpy(queue_, - alpaka::createView(cms::alpakatools::host(), - (*trackCandidatesHC_)->logicalLayers()->data(), - Params_pT5::kLayers * nTrackCanHost), - alpaka::createView( - devAcc_, (*trackCandidatesDC_)->logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost)); - alpaka::memcpy( - queue_, - alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->directObjectIndices(), nTrackCanHost), - alpaka::createView(devAcc_, (*trackCandidatesDC_)->directObjectIndices(), nTrackCanHost)); - alpaka::memcpy( - queue_, - alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->objectIndices()->data(), 2 * nTrackCanHost), - alpaka::createView(devAcc_, (*trackCandidatesDC_)->objectIndices()->data(), 2 * nTrackCanHost)); - alpaka::memcpy( - queue_, - alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->trackCandidateType(), nTrackCanHost), - alpaka::createView(devAcc_, (*trackCandidatesDC_)->trackCandidateType(), nTrackCanHost)); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data - } - return trackCandidatesHC_.value(); -} - -const TrackCandidatesHostCollection& Event::getTrackCandidatesInCMSSW(bool sync) { - if (!trackCandidatesHC_) { - // Get nTrackCanHost parameter to initialize host based instance - auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy( - queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, &(*trackCandidatesDC_)->nTrackCandidates(), 1u)); - trackCandidatesHC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); - alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU becomes usable - - auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - - (*trackCandidatesHC_)->nTrackCandidates() = nTrackCanHost; - alpaka::memcpy( - queue_, - alpaka::createView( - cms::alpakatools::host(), (*trackCandidatesHC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost), - alpaka::createView(devAcc_, (*trackCandidatesDC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); - alpaka::memcpy(queue_, - alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->pixelSeedIndex(), nTrackCanHost), - alpaka::createView(devAcc_, (*trackCandidatesDC_)->pixelSeedIndex(), nTrackCanHost)); + if (not inCMSSW) { + alpaka::memcpy(queue_, + alpaka::createView(cms::alpakatools::host(), + (*trackCandidatesHC_)->logicalLayers()->data(), + Params_pT5::kLayers * nTrackCanHost), + alpaka::createView( + devAcc_, (*trackCandidatesDC_)->logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost)); + alpaka::memcpy( + queue_, + alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->directObjectIndices(), nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->directObjectIndices(), nTrackCanHost)); + alpaka::memcpy(queue_, + alpaka::createView( + cms::alpakatools::host(), (*trackCandidatesHC_)->objectIndices()->data(), 2 * nTrackCanHost), + alpaka::createView(devAcc_, (*trackCandidatesDC_)->objectIndices()->data(), 2 * nTrackCanHost)); + } alpaka::memcpy( queue_, alpaka::createView(cms::alpakatools::host(), (*trackCandidatesHC_)->trackCandidateType(), nTrackCanHost), @@ -1673,7 +1605,7 @@ const TrackCandidatesHostCollection& Event::getTrackCandidatesInCMSSW(bool sync) if (sync) alpaka::wait(queue_); // host consumers expect filled data } - return trackCandidatesHC_.value(); + return trackCandidatesHC_.value().const_view(); } ModulesBuffer& Event::getModules(bool isFull, bool sync) { diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index a3c3a21f09e2c..97cab0bc608a7 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -3,20 +3,26 @@ #include +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/QuintupletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" #include "RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h" +#include "RecoTracker/LSTCore/interface/TripletsHostCollection.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/alpaka/LST.h" +#include "RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h" #include "RecoTracker/LSTCore/interface/Module.h" #include "Hit.h" -#include "Segment.h" -#include "Triplet.h" #include "Kernels.h" -#include "Quintuplet.h" -#include "MiniDoublet.h" -#include "PixelQuintuplet.h" -#include "PixelTriplet.h" -#include "TrackCandidate.h" #include "HeterogeneousCore/AlpakaInterface/interface/host.h" @@ -49,27 +55,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::optional> hitsBuffers_; std::optional miniDoubletsDC_; std::optional segmentsDC_; - std::optional tripletsInGPU_; - std::optional> tripletsBuffers_; - std::optional quintupletsInGPU_; - std::optional> quintupletsBuffers_; + std::optional tripletsDC_; + std::optional quintupletsDC_; std::optional trackCandidatesDC_; - std::optional pixelTripletsInGPU_; - std::optional> pixelTripletsBuffers_; - std::optional pixelQuintupletsInGPU_; - std::optional> pixelQuintupletsBuffers_; + std::optional pixelTripletsDC_; + std::optional pixelQuintupletsDC_; //CPU interface stuff std::optional> rangesInCPU_; std::optional> hitsInCPU_; std::optional miniDoubletsHC_; std::optional segmentsHC_; - std::optional> tripletsInCPU_; + std::optional tripletsHC_; std::optional trackCandidatesHC_; std::optional> modulesInCPU_; - std::optional> quintupletsInCPU_; - std::optional> pixelTripletsInCPU_; - std::optional> pixelQuintupletsInCPU_; + std::optional quintupletsHC_; + std::optional pixelTripletsHC_; + std::optional pixelQuintupletsHC_; void initSync(bool verbose); @@ -185,12 +187,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { typename TSoA::ConstView getMiniDoublets(bool sync = true); template typename TSoA::ConstView getSegments(bool sync = true); - TripletsBuffer& getTriplets(bool sync = true); - QuintupletsBuffer& getQuintuplets(bool sync = true); - PixelTripletsBuffer& getPixelTriplets(bool sync = true); - PixelQuintupletsBuffer& getPixelQuintuplets(bool sync = true); - const TrackCandidatesHostCollection& getTrackCandidates(bool sync = true); - const TrackCandidatesHostCollection& getTrackCandidatesInCMSSW(bool sync = true); + template + typename TSoA::ConstView getTriplets(bool sync = true); + template + typename TSoA::ConstView getQuintuplets(bool sync = true); + template + PixelTripletsConst getPixelTriplets(bool sync = true); + template + PixelQuintupletsConst getPixelQuintuplets(bool sync = true); + const TrackCandidatesConst& getTrackCandidatesWithSelection(bool inCMSSW, bool sync); + const TrackCandidatesConst& getTrackCandidates(bool sync = true) { + return getTrackCandidatesWithSelection(false, sync); + } + const TrackCandidatesConst& getTrackCandidatesInCMSSW(bool sync = true) { + return getTrackCandidatesWithSelection(true, sync); + } ModulesBuffer& getModules(bool isFull = false, bool sync = true); }; diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h index b4fecca8f90cf..ae4391cc0558c 100644 --- a/RecoTracker/LSTCore/src/alpaka/Kernels.h +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -2,32 +2,32 @@ #define RecoTracker_LSTCore_src_alpaka_Kernels_h #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" #include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" #include "Hit.h" -#include "MiniDoublet.h" #include "ObjectRanges.h" -#include "Segment.h" -#include "Triplet.h" -#include "Quintuplet.h" -#include "PixelQuintuplet.h" -#include "PixelTriplet.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(Quintuplets& quintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(Quintuplets quintuplets, unsigned int quintupletIndex, bool secondpass = false) { - quintupletsInGPU.isDup[quintupletIndex] |= 1 + secondpass; + quintuplets.isDup()[quintupletIndex] |= 1 + secondpass; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(PixelTriplets& pixelTripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(PixelTriplets pixelTriplets, unsigned int pixelTripletIndex) { - pixelTripletsInGPU.isDup[pixelTripletIndex] = true; + pixelTriplets.isDup()[pixelTripletIndex] = true; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(PixelQuintuplets& pixelQuintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(PixelQuintuplets pixelQuintuplets, unsigned int pixelQuintupletIndex) { - pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = true; + pixelQuintuplets.isDup()[pixelQuintupletIndex] = true; } ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(SegmentsPixel segmentsPixel, @@ -36,15 +36,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { segmentsPixel.isDup()[pixelSegmentArrayIndex] |= 1 + secondpass; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, - unsigned int jx, - Quintuplets const& quintupletsInGPU) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, unsigned int jx, QuintupletsConst quintuplets) { unsigned int hits1[Params_T5::kHits]; unsigned int hits2[Params_T5::kHits]; for (int i = 0; i < Params_T5::kHits; i++) { - hits1[i] = quintupletsInGPU.hitIndices[Params_T5::kHits * ix + i]; - hits2[i] = quintupletsInGPU.hitIndices[Params_T5::kHits * jx + i]; + hits1[i] = quintuplets.hitIndices()[ix][i]; + hits2[i] = quintuplets.hitIndices()[jx][i]; } int nMatched = 0; @@ -65,13 +63,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitspT5(unsigned int ix, unsigned int jx, - PixelQuintuplets const& pixelQuintupletsInGPU) { + PixelQuintupletsConst pixelQuintuplets) { unsigned int hits1[Params_pT5::kHits]; unsigned int hits2[Params_pT5::kHits]; for (int i = 0; i < Params_pT5::kHits; i++) { - hits1[i] = pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * ix + i]; - hits2[i] = pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * jx + i]; + hits1[i] = pixelQuintuplets.hitIndices()[ix][i]; + hits2[i] = pixelQuintuplets.hitIndices()[jx][i]; } int nMatched = 0; @@ -92,14 +90,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE void checkHitspT3(unsigned int ix, unsigned int jx, - PixelTriplets const& pixelTripletsInGPU, + PixelTripletsConst pixelTriplets, int* matched) { int phits1[Params_pLS::kHits]; int phits2[Params_pLS::kHits]; for (int i = 0; i < Params_pLS::kHits; i++) { - phits1[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * ix + i]; - phits2[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * jx + i]; + phits1[i] = pixelTriplets.hitIndices()[ix][i]; + phits2[i] = pixelTriplets.hitIndices()[jx][i]; } int npMatched = 0; @@ -120,8 +118,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { int hits2[Params_T3::kHits]; for (int i = 0; i < Params_T3::kHits; i++) { - hits1[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * ix + i + 4]; // Omitting the pLS hits - hits2[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * jx + i + 4]; // Omitting the pLS hits + hits1[i] = pixelTriplets.hitIndices()[ix][i + 4]; // Omitting the pLS hits + hits2[i] = pixelTriplets.hitIndices()[jx][i + 4]; // Omitting the pLS hits } int nMatched = 0; @@ -142,34 +140,35 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { matched[1] = nMatched; } - struct RemoveDupQuintupletsInGPUAfterBuild { + struct RemoveDupQuintupletsAfterBuild { template ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, - Quintuplets quintupletsInGPU, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); for (unsigned int lowmod = globalThreadIdx[0]; lowmod < *modulesInGPU.nLowerModules; lowmod += gridThreadExtent[0]) { - unsigned int nQuintuplets_lowmod = quintupletsInGPU.nQuintuplets[lowmod]; + unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets()[lowmod]; int quintupletModuleIndices_lowmod = rangesInGPU.quintupletModuleIndices[lowmod]; for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) { unsigned int ix = quintupletModuleIndices_lowmod + ix1; - float eta1 = __H2F(quintupletsInGPU.eta[ix]); - float phi1 = __H2F(quintupletsInGPU.phi[ix]); - float score_rphisum1 = __H2F(quintupletsInGPU.score_rphisum[ix]); + float eta1 = __H2F(quintuplets.eta()[ix]); + float phi1 = __H2F(quintuplets.phi()[ix]); + float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) { unsigned int jx = quintupletModuleIndices_lowmod + jx1; - float eta2 = __H2F(quintupletsInGPU.eta[jx]); - float phi2 = __H2F(quintupletsInGPU.phi[jx]); + float eta2 = __H2F(quintuplets.eta()[jx]); + float phi2 = __H2F(quintuplets.phi()[jx]); float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = calculate_dPhi(phi1, phi2); - float score_rphisum2 = __H2F(quintupletsInGPU.score_rphisum[jx]); + float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); if (dEta > 0.1f) continue; @@ -177,13 +176,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (alpaka::math::abs(acc, dPhi) > 0.1f) continue; - int nMatched = checkHitsT5(ix, jx, quintupletsInGPU); + int nMatched = checkHitsT5(ix, jx, quintuplets); const int minNHitsForDup_T5 = 7; if (nMatched >= minNHitsForDup_T5) { if (score_rphisum1 >= score_rphisum2) { - rmQuintupletFromMemory(quintupletsInGPU, ix); + rmQuintupletFromMemory(quintuplets, ix); } else { - rmQuintupletFromMemory(quintupletsInGPU, jx); + rmQuintupletFromMemory(quintuplets, jx); } } } @@ -192,16 +191,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct RemoveDupQuintupletsInGPUBeforeTC { + struct RemoveDupQuintupletsBeforeTC { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, Quintuplets quintupletsInGPU, ObjectRanges rangesInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < *(rangesInGPU.nEligibleT5Modules); lowmodIdx1 += gridThreadExtent[1]) { uint16_t lowmod1 = rangesInGPU.indicesOfEligibleT5Modules[lowmodIdx1]; - unsigned int nQuintuplets_lowmod1 = quintupletsInGPU.nQuintuplets[lowmod1]; + unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets()[lowmod1]; if (nQuintuplets_lowmod1 == 0) continue; @@ -210,7 +212,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < *(rangesInGPU.nEligibleT5Modules); lowmodIdx2 += gridThreadExtent[2]) { uint16_t lowmod2 = rangesInGPU.indicesOfEligibleT5Modules[lowmodIdx2]; - unsigned int nQuintuplets_lowmod2 = quintupletsInGPU.nQuintuplets[lowmod2]; + unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets()[lowmod2]; if (nQuintuplets_lowmod2 == 0) continue; @@ -218,7 +220,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { for (unsigned int ix1 = 0; ix1 < nQuintuplets_lowmod1; ix1 += 1) { unsigned int ix = quintupletModuleIndices_lowmod1 + ix1; - if (quintupletsInGPU.partOfPT5[ix] || (quintupletsInGPU.isDup[ix] & 1)) + if (quintuplets.partOfPT5()[ix] || (quintuplets.isDup()[ix] & 1)) continue; for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) { @@ -226,16 +228,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (ix == jx) continue; - if (quintupletsInGPU.partOfPT5[jx] || (quintupletsInGPU.isDup[jx] & 1)) + if (quintuplets.partOfPT5()[jx] || (quintuplets.isDup()[jx] & 1)) continue; - float eta1 = __H2F(quintupletsInGPU.eta[ix]); - float phi1 = __H2F(quintupletsInGPU.phi[ix]); - float score_rphisum1 = __H2F(quintupletsInGPU.score_rphisum[ix]); + float eta1 = __H2F(quintuplets.eta()[ix]); + float phi1 = __H2F(quintuplets.phi()[ix]); + float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); - float eta2 = __H2F(quintupletsInGPU.eta[jx]); - float phi2 = __H2F(quintupletsInGPU.phi[jx]); - float score_rphisum2 = __H2F(quintupletsInGPU.score_rphisum[jx]); + float eta2 = __H2F(quintuplets.eta()[jx]); + float phi2 = __H2F(quintuplets.phi()[jx]); + float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = calculate_dPhi(phi1, phi2); @@ -247,15 +249,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; float dR2 = dEta * dEta + dPhi * dPhi; - int nMatched = checkHitsT5(ix, jx, quintupletsInGPU); + int nMatched = checkHitsT5(ix, jx, quintuplets); const int minNHitsForDup_T5 = 5; if (dR2 < 0.001f || nMatched >= minNHitsForDup_T5) { if (score_rphisum1 > score_rphisum2) { - rmQuintupletFromMemory(quintupletsInGPU, ix, true); + rmQuintupletFromMemory(quintuplets, ix, true); } else if (score_rphisum1 < score_rphisum2) { - rmQuintupletFromMemory(quintupletsInGPU, jx, true); + rmQuintupletFromMemory(quintuplets, jx, true); } else { - rmQuintupletFromMemory(quintupletsInGPU, (ix < jx ? ix : jx), true); + rmQuintupletFromMemory(quintuplets, (ix < jx ? ix : jx), true); } } } @@ -265,35 +267,32 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct RemoveDupPixelTripletsInGPUFromMap { + struct RemoveDupPixelTripletsFromMap { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTripletsInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTriplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (unsigned int ix = globalThreadIdx[1]; ix < *pixelTripletsInGPU.nPixelTriplets; ix += gridThreadExtent[1]) { - for (unsigned int jx = globalThreadIdx[2]; jx < *pixelTripletsInGPU.nPixelTriplets; jx += gridThreadExtent[2]) { + for (unsigned int ix = globalThreadIdx[1]; ix < pixelTriplets.nPixelTriplets(); ix += gridThreadExtent[1]) { + for (unsigned int jx = globalThreadIdx[2]; jx < pixelTriplets.nPixelTriplets(); jx += gridThreadExtent[2]) { if (ix == jx) continue; int nMatched[2]; - checkHitspT3(ix, jx, pixelTripletsInGPU, nMatched); + checkHitspT3(ix, jx, pixelTriplets, nMatched); const int minNHitsForDup_pT3 = 5; if ((nMatched[0] + nMatched[1]) >= minNHitsForDup_pT3) { // Check the layers - if (pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * jx + 2] < - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * ix + 2]) { - rmPixelTripletFromMemory(pixelTripletsInGPU, ix); + if (pixelTriplets.logicalLayers()[jx][2] < pixelTriplets.logicalLayers()[ix][2]) { + rmPixelTripletFromMemory(pixelTriplets, ix); break; - } else if (pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * ix + 2] == - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * jx + 2] && - __H2F(pixelTripletsInGPU.score[ix]) > __H2F(pixelTripletsInGPU.score[jx])) { - rmPixelTripletFromMemory(pixelTripletsInGPU, ix); + } else if (pixelTriplets.logicalLayers()[ix][2] == pixelTriplets.logicalLayers()[jx][2] && + __H2F(pixelTriplets.score()[ix]) > __H2F(pixelTriplets.score()[jx])) { + rmPixelTripletFromMemory(pixelTriplets, ix); break; - } else if (pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * ix + 2] == - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * jx + 2] && - (__H2F(pixelTripletsInGPU.score[ix]) == __H2F(pixelTripletsInGPU.score[jx])) && (ix < jx)) { - rmPixelTripletFromMemory(pixelTripletsInGPU, ix); + } else if (pixelTriplets.logicalLayers()[ix][2] == pixelTriplets.logicalLayers()[jx][2] && + (__H2F(pixelTriplets.score()[ix]) == __H2F(pixelTriplets.score()[jx])) && (ix < jx)) { + rmPixelTripletFromMemory(pixelTriplets, ix); break; } } @@ -302,25 +301,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct RemoveDupPixelQuintupletsInGPUFromMap { + struct RemoveDupPixelQuintupletsFromMap { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintupletsInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintuplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - unsigned int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets; + unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) { - float score1 = __H2F(pixelQuintupletsInGPU.score[ix]); + float score1 = __H2F(pixelQuintuplets.score()[ix]); for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) { if (ix == jx) continue; - int nMatched = checkHitspT5(ix, jx, pixelQuintupletsInGPU); - float score2 = __H2F(pixelQuintupletsInGPU.score[jx]); + int nMatched = checkHitspT5(ix, jx, pixelQuintuplets); + float score2 = __H2F(pixelQuintuplets.score()[jx]); const int minNHitsForDup_pT5 = 7; if (nMatched >= minNHitsForDup_pT5) { if (score1 > score2 or ((score1 == score2) and (ix > jx))) { - rmPixelQuintupletFromMemory(pixelQuintupletsInGPU, ix); + rmPixelQuintupletFromMemory(pixelQuintuplets, ix); break; } } diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index 2c5cfd499c7e0..1f3f11e79cbb2 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -255,7 +255,7 @@ void LST::getOutput(Event& event) { std::vector tc_trackCandidateType; HitsBuffer& hitsBuffer = event.getHitsInCMSSW(false); // sync on next line - auto const& trackCandidates = event.getTrackCandidatesInCMSSW().const_view(); + auto const& trackCandidates = event.getTrackCandidatesInCMSSW(); unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h index 6a125b96070cb..ff7e9d6656975 100644 --- a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h @@ -3,12 +3,12 @@ #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" #include "NeuralNetworkWeights.h" -#include "Segment.h" -#include "MiniDoublet.h" #include "Hit.h" -#include "Triplet.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { @@ -19,7 +19,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules const& modulesInGPU, MiniDoubletsConst mds, SegmentsConst segments, - Triplets const& tripletsInGPU, + TripletsConst triplets, const float* xVec, const float* yVec, const unsigned int* mdIndices, @@ -59,7 +59,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { layer2_adjustment = 1; // get upper segment to be in second layer } unsigned int md_idx_for_t5_eta_phi = - segments.mdIndices()[tripletsInGPU.segmentIndices[2 * innerTripletIndex]][layer2_adjustment]; + segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]][layer2_adjustment]; bool is_endcap1 = (modulesInGPU.subdets[lowerModuleIndex1] == 4); // true if anchor hit 1 is in the endcap bool is_endcap2 = (modulesInGPU.subdets[lowerModuleIndex2] == 4); // true if anchor hit 2 is in the endcap bool is_endcap3 = (modulesInGPU.subdets[lowerModuleIndex3] == 4); // true if anchor hit 3 is in the endcap diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index d33022cd112b1..475c27e750825 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -2,117 +2,23 @@ #define RecoTracker_LSTCore_src_alpaka_PixelQuintuplet_h #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" #include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" -#include "Segment.h" -#include "MiniDoublet.h" #include "Hit.h" -#include "Triplet.h" -#include "Quintuplet.h" -#include "PixelTriplet.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct PixelQuintuplets { - unsigned int* pixelIndices; - unsigned int* T5Indices; - unsigned int* nPixelQuintuplets; - unsigned int* totOccupancyPixelQuintuplets; - bool* isDup; - FPX* score; - FPX* eta; - FPX* phi; - uint8_t* logicalLayers; - unsigned int* hitIndices; - uint16_t* lowerModuleIndices; - FPX* pixelRadius; - FPX* quintupletRadius; - FPX* centerX; - FPX* centerY; - float* rzChiSquared; - float* rPhiChiSquared; - float* rPhiChiSquaredInwards; - - template - void setData(TBuff& buf) { - pixelIndices = buf.pixelIndices_buf.data(); - T5Indices = buf.T5Indices_buf.data(); - nPixelQuintuplets = buf.nPixelQuintuplets_buf.data(); - totOccupancyPixelQuintuplets = buf.totOccupancyPixelQuintuplets_buf.data(); - isDup = buf.isDup_buf.data(); - score = buf.score_buf.data(); - eta = buf.eta_buf.data(); - phi = buf.phi_buf.data(); - logicalLayers = buf.logicalLayers_buf.data(); - hitIndices = buf.hitIndices_buf.data(); - lowerModuleIndices = buf.lowerModuleIndices_buf.data(); - pixelRadius = buf.pixelRadius_buf.data(); - quintupletRadius = buf.quintupletRadius_buf.data(); - centerX = buf.centerX_buf.data(); - centerY = buf.centerY_buf.data(); - rzChiSquared = buf.rzChiSquared_buf.data(); - rPhiChiSquared = buf.rPhiChiSquared_buf.data(); - rPhiChiSquaredInwards = buf.rPhiChiSquaredInwards_buf.data(); - } - }; - - template - struct PixelQuintupletsBuffer { - Buf pixelIndices_buf; - Buf T5Indices_buf; - Buf nPixelQuintuplets_buf; - Buf totOccupancyPixelQuintuplets_buf; - Buf isDup_buf; - Buf score_buf; - Buf eta_buf; - Buf phi_buf; - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf lowerModuleIndices_buf; - Buf pixelRadius_buf; - Buf quintupletRadius_buf; - Buf centerX_buf; - Buf centerY_buf; - Buf rzChiSquared_buf; - Buf rPhiChiSquared_buf; - Buf rPhiChiSquaredInwards_buf; - - PixelQuintuplets data_; - - template - PixelQuintupletsBuffer(unsigned int maxPixelQuintuplets, TDevAcc const& devAccIn, TQueue& queue) - : pixelIndices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - T5Indices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - nPixelQuintuplets_buf(allocBufWrapper(devAccIn, 1, queue)), - totOccupancyPixelQuintuplets_buf(allocBufWrapper(devAccIn, 1, queue)), - isDup_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - score_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - eta_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - phi_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - logicalLayers_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets * Params_pT5::kLayers, queue)), - hitIndices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets * Params_pT5::kHits, queue)), - lowerModuleIndices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets * Params_pT5::kLayers, queue)), - pixelRadius_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - quintupletRadius_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - centerX_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - centerY_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - rzChiSquared_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - rPhiChiSquared_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), - rPhiChiSquaredInwards_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)) { - alpaka::memset(queue, nPixelQuintuplets_buf, 0u); - alpaka::memset(queue, totOccupancyPixelQuintuplets_buf, 0u); - } - - inline PixelQuintuplets const* data() const { return &data_; } - inline void setData(PixelQuintupletsBuffer& buf) { data_.setData(buf); } - }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelQuintupletToMemory(Modules const& modulesInGPU, MiniDoubletsConst mds, SegmentsConst segments, - Quintuplets const& quintupletsInGPU, - PixelQuintuplets& pixelQuintupletsInGPU, + QuintupletsConst quintuplets, + PixelQuintuplets pixelQuintuplets, unsigned int pixelIndex, - unsigned int T5Index, + unsigned int t5Index, unsigned int pixelQuintupletIndex, float rzChiSquared, float rPhiChiSquared, @@ -124,81 +30,56 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float quintupletRadius, float centerX, float centerY) { - pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex] = pixelIndex; - pixelQuintupletsInGPU.T5Indices[pixelQuintupletIndex] = T5Index; - pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = false; - pixelQuintupletsInGPU.score[pixelQuintupletIndex] = __F2H(score); - pixelQuintupletsInGPU.eta[pixelQuintupletIndex] = __F2H(eta); - pixelQuintupletsInGPU.phi[pixelQuintupletIndex] = __F2H(phi); - - pixelQuintupletsInGPU.pixelRadius[pixelQuintupletIndex] = __F2H(pixelRadius); - pixelQuintupletsInGPU.quintupletRadius[pixelQuintupletIndex] = __F2H(quintupletRadius); - pixelQuintupletsInGPU.centerX[pixelQuintupletIndex] = __F2H(centerX); - pixelQuintupletsInGPU.centerY[pixelQuintupletIndex] = __F2H(centerY); - - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex] = 0; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 1] = 0; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 2] = - quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers]; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 3] = - quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 1]; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 4] = - quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 2]; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 5] = - quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 3]; - pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 6] = - quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 4]; - - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex] = - segments.innerLowerModuleIndices()[pixelIndex]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 1] = - segments.outerLowerModuleIndices()[pixelIndex]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 2] = - quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 3] = - quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 1]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 4] = - quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 2]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 5] = - quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 3]; - pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 6] = - quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 4]; + pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex] = pixelIndex; + pixelQuintuplets.quintupletIndices()[pixelQuintupletIndex] = t5Index; + pixelQuintuplets.isDup()[pixelQuintupletIndex] = false; + pixelQuintuplets.score()[pixelQuintupletIndex] = __F2H(score); + pixelQuintuplets.eta()[pixelQuintupletIndex] = __F2H(eta); + pixelQuintuplets.phi()[pixelQuintupletIndex] = __F2H(phi); + + pixelQuintuplets.pixelRadius()[pixelQuintupletIndex] = __F2H(pixelRadius); + pixelQuintuplets.quintupletRadius()[pixelQuintupletIndex] = __F2H(quintupletRadius); + pixelQuintuplets.centerX()[pixelQuintupletIndex] = __F2H(centerX); + pixelQuintuplets.centerY()[pixelQuintupletIndex] = __F2H(centerY); + + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][0] = 0; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][1] = 0; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][2] = quintuplets.logicalLayers()[t5Index][0]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][3] = quintuplets.logicalLayers()[t5Index][1]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][4] = quintuplets.logicalLayers()[t5Index][2]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][5] = quintuplets.logicalLayers()[t5Index][3]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][6] = quintuplets.logicalLayers()[t5Index][4]; + + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][0] = segments.innerLowerModuleIndices()[pixelIndex]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][1] = segments.outerLowerModuleIndices()[pixelIndex]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][2] = quintuplets.lowerModuleIndices()[t5Index][0]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][3] = quintuplets.lowerModuleIndices()[t5Index][1]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][4] = quintuplets.lowerModuleIndices()[t5Index][2]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][5] = quintuplets.lowerModuleIndices()[t5Index][3]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][6] = quintuplets.lowerModuleIndices()[t5Index][4]; unsigned int pixelInnerMD = segments.mdIndices()[pixelIndex][0]; unsigned int pixelOuterMD = segments.mdIndices()[pixelIndex][1]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex] = mds.anchorHitIndices()[pixelInnerMD]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 1] = - mds.outerHitIndices()[pixelInnerMD]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 2] = - mds.anchorHitIndices()[pixelOuterMD]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 3] = - mds.outerHitIndices()[pixelOuterMD]; - - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 4] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 5] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 1]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 6] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 2]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 7] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 3]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 8] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 4]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 9] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 5]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 10] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 6]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 11] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 7]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 12] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 8]; - pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 13] = - quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 9]; - - pixelQuintupletsInGPU.rzChiSquared[pixelQuintupletIndex] = rzChiSquared; - pixelQuintupletsInGPU.rPhiChiSquared[pixelQuintupletIndex] = rPhiChiSquared; - pixelQuintupletsInGPU.rPhiChiSquaredInwards[pixelQuintupletIndex] = rPhiChiSquaredInwards; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][0] = mds.anchorHitIndices()[pixelInnerMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][1] = mds.outerHitIndices()[pixelInnerMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][2] = mds.anchorHitIndices()[pixelOuterMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][3] = mds.outerHitIndices()[pixelOuterMD]; + + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][4] = quintuplets.hitIndices()[t5Index][0]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][5] = quintuplets.hitIndices()[t5Index][1]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][6] = quintuplets.hitIndices()[t5Index][2]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][7] = quintuplets.hitIndices()[t5Index][3]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][8] = quintuplets.hitIndices()[t5Index][4]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][9] = quintuplets.hitIndices()[t5Index][5]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][10] = quintuplets.hitIndices()[t5Index][6]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][11] = quintuplets.hitIndices()[t5Index][7]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][12] = quintuplets.hitIndices()[t5Index][8]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][13] = quintuplets.hitIndices()[t5Index][9]; + + pixelQuintuplets.rzChiSquared()[pixelQuintupletIndex] = rzChiSquared; + pixelQuintuplets.rPhiChiSquared()[pixelQuintupletIndex] = rPhiChiSquared; + pixelQuintuplets.rPhiChiSquaredInwards()[pixelQuintupletIndex] = rPhiChiSquaredInwards; } ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(Modules const& modulesInGPU, @@ -677,8 +558,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { MiniDoubletsConst mds, SegmentsConst segments, SegmentsPixelConst segmentsPixel, - Triplets const& tripletsInGPU, - Quintuplets const& quintupletsInGPU, + TripletsConst triplets, + QuintupletsConst quintuplets, unsigned int pixelSegmentIndex, unsigned int quintupletIndex, float& rzChiSquared, @@ -689,8 +570,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& centerX, float& centerY, unsigned int pixelSegmentArrayIndex) { - unsigned int T5InnerT3Index = quintupletsInGPU.tripletIndices[2 * quintupletIndex]; - unsigned int T5OuterT3Index = quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]; + unsigned int t5InnerT3Index = quintuplets.tripletIndices()[quintupletIndex][0]; + unsigned int t5OuterT3Index = quintuplets.tripletIndices()[quintupletIndex][1]; float pixelRadiusTemp, tripletRadius, rPhiChiSquaredTemp, rzChiSquaredTemp, rPhiChiSquaredInwardsTemp, centerXTemp, centerYTemp; @@ -701,9 +582,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds, segments, segmentsPixel, - tripletsInGPU, + triplets, pixelSegmentIndex, - T5InnerT3Index, + t5InnerT3Index, pixelRadiusTemp, tripletRadius, centerXTemp, @@ -714,10 +595,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { false)) return false; - unsigned int firstSegmentIndex = tripletsInGPU.segmentIndices[2 * T5InnerT3Index]; - unsigned int secondSegmentIndex = tripletsInGPU.segmentIndices[2 * T5InnerT3Index + 1]; - unsigned int thirdSegmentIndex = tripletsInGPU.segmentIndices[2 * T5OuterT3Index]; - unsigned int fourthSegmentIndex = tripletsInGPU.segmentIndices[2 * T5OuterT3Index + 1]; + unsigned int firstSegmentIndex = triplets.segmentIndices()[t5InnerT3Index][0]; + unsigned int secondSegmentIndex = triplets.segmentIndices()[t5InnerT3Index][1]; + unsigned int thirdSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][0]; + unsigned int fourthSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][1]; unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; @@ -727,11 +608,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int fourthMDIndex = segments.mdIndices()[thirdSegmentIndex][1]; unsigned int fifthMDIndex = segments.mdIndices()[fourthSegmentIndex][1]; - uint16_t lowerModuleIndex1 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex]; - uint16_t lowerModuleIndex2 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 1]; - uint16_t lowerModuleIndex3 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 2]; - uint16_t lowerModuleIndex4 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 3]; - uint16_t lowerModuleIndex5 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 4]; + uint16_t lowerModuleIndex1 = quintuplets.lowerModuleIndices()[quintupletIndex][0]; + uint16_t lowerModuleIndex2 = quintuplets.lowerModuleIndices()[quintupletIndex][1]; + uint16_t lowerModuleIndex3 = quintuplets.lowerModuleIndices()[quintupletIndex][2]; + uint16_t lowerModuleIndex4 = quintuplets.lowerModuleIndices()[quintupletIndex][3]; + uint16_t lowerModuleIndex5 = quintuplets.lowerModuleIndices()[quintupletIndex][4]; uint16_t lowerModuleIndices[Params_T5::kLayers] = { lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5}; @@ -779,9 +660,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { centerY = segmentsPixel.circleCenterY()[pixelSegmentArrayIndex]; pixelRadius = segmentsPixel.circleRadius()[pixelSegmentArrayIndex]; - float T5CenterX = quintupletsInGPU.regressionG[quintupletIndex]; - float T5CenterY = quintupletsInGPU.regressionF[quintupletIndex]; - quintupletRadius = quintupletsInGPU.regressionRadius[quintupletIndex]; + float T5CenterX = quintuplets.regressionG()[quintupletIndex]; + float T5CenterY = quintuplets.regressionF()[quintupletIndex]; + quintupletRadius = quintuplets.regressionRadius()[quintupletIndex]; rPhiChiSquared = computePT5RPhiChiSquared(acc, modulesInGPU, lowerModuleIndices, centerX, centerY, pixelRadius, xs, ys); @@ -801,7 +682,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float yPix[] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; rPhiChiSquaredInwards = computePT5RPhiChiSquaredInwards(T5CenterX, T5CenterY, quintupletRadius, xPix, yPix); - if (quintupletsInGPU.regressionRadius[quintupletIndex] < 5.0f * kR1GeVf) { + if (quintuplets.regressionRadius()[quintupletIndex] < 5.0f * kR1GeVf) { if (not passPT5RPhiChiSquaredInwardsCuts(modulesInGPU, lowerModuleIndex1, lowerModuleIndex2, @@ -818,16 +699,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return true; } - struct CreatePixelQuintupletsInGPUFromMapv2 { + struct CreatePixelQuintupletsFromMap { template ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, MiniDoubletsConst mds, SegmentsConst segments, SegmentsPixel segmentsPixel, - Triplets tripletsInGPU, - Quintuplets quintupletsInGPU, - PixelQuintuplets pixelQuintupletsInGPU, + Triplets triplets, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + PixelQuintuplets pixelQuintuplets, unsigned int* connectedPixelSize, unsigned int* connectedPixelIndex, unsigned int nPixelSegments, @@ -850,7 +732,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules; if (segmentsPixel.isDup()[i_pLS]) continue; - unsigned int nOuterQuintuplets = quintupletsInGPU.nQuintuplets[quintupletLowerModuleIndex]; + unsigned int nOuterQuintuplets = quintupletsOccupancy.nQuintuplets()[quintupletLowerModuleIndex]; if (nOuterQuintuplets == 0) continue; @@ -864,7 +746,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int quintupletIndex = rangesInGPU.quintupletModuleIndices[quintupletLowerModuleIndex] + outerQuintupletArrayIndex; - if (quintupletsInGPU.isDup[quintupletIndex]) + if (quintuplets.isDup()[quintupletIndex]) continue; float rzChiSquared, rPhiChiSquared, rPhiChiSquaredInwards, pixelRadius, quintupletRadius, centerX, centerY; @@ -875,8 +757,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds, segments, segmentsPixel, - tripletsInGPU, - quintupletsInGPU, + triplets, + quintuplets, pixelSegmentIndex, quintupletIndex, rzChiSquared, @@ -889,22 +771,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { static_cast(i_pLS)); if (success) { unsigned int totOccupancyPixelQuintuplets = alpaka::atomicAdd( - acc, pixelQuintupletsInGPU.totOccupancyPixelQuintuplets, 1u, alpaka::hierarchy::Threads{}); + acc, &pixelQuintuplets.totOccupancyPixelQuintuplets(), 1u, alpaka::hierarchy::Threads{}); if (totOccupancyPixelQuintuplets >= n_max_pixel_quintuplets) { #ifdef WARNINGS printf("Pixel Quintuplet excess alert!\n"); #endif } else { unsigned int pixelQuintupletIndex = - alpaka::atomicAdd(acc, pixelQuintupletsInGPU.nPixelQuintuplets, 1u, alpaka::hierarchy::Threads{}); - float eta = __H2F(quintupletsInGPU.eta[quintupletIndex]); - float phi = __H2F(quintupletsInGPU.phi[quintupletIndex]); + alpaka::atomicAdd(acc, &pixelQuintuplets.nPixelQuintuplets(), 1u, alpaka::hierarchy::Threads{}); + float eta = __H2F(quintuplets.eta()[quintupletIndex]); + float phi = __H2F(quintuplets.phi()[quintupletIndex]); addPixelQuintupletToMemory(modulesInGPU, mds, segments, - quintupletsInGPU, - pixelQuintupletsInGPU, + quintuplets, + pixelQuintuplets, pixelSegmentIndex, quintupletIndex, pixelQuintupletIndex, @@ -919,10 +801,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { centerX, centerY); - tripletsInGPU.partOfPT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex]] = true; - tripletsInGPU.partOfPT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]] = true; + triplets.partOfPT5()[quintuplets.tripletIndices()[quintupletIndex][0]] = true; + triplets.partOfPT5()[quintuplets.tripletIndices()[quintupletIndex][1]] = true; segmentsPixel.partOfPT5()[i_pLS] = true; - quintupletsInGPU.partOfPT5[quintupletIndex] = true; + quintuplets.partOfPT5()[quintupletIndex] = true; } // tot occupancy } // end success } // end T5 diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 1401aefdf797d..8c096f5981865 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -3,136 +3,20 @@ #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" -#include "Triplet.h" -#include "Segment.h" -#include "MiniDoublet.h" #include "Hit.h" #include "ObjectRanges.h" -#include "Quintuplet.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - // One pixel segment, one outer tracker triplet! - struct PixelTriplets { - unsigned int* pixelSegmentIndices; - unsigned int* tripletIndices; - unsigned int* nPixelTriplets; - unsigned int* totOccupancyPixelTriplets; - - float* rPhiChiSquared; - float* rPhiChiSquaredInwards; - float* rzChiSquared; - - FPX* pixelRadius; - FPX* tripletRadius; - FPX* pt; - FPX* eta; - FPX* phi; - FPX* eta_pix; - FPX* phi_pix; - FPX* score; - bool* isDup; - bool* partOfPT5; - - uint8_t* logicalLayers; - unsigned int* hitIndices; - uint16_t* lowerModuleIndices; - FPX* centerX; - FPX* centerY; - - template - void setData(TBuff& buf) { - pixelSegmentIndices = buf.pixelSegmentIndices_buf.data(); - tripletIndices = buf.tripletIndices_buf.data(); - nPixelTriplets = buf.nPixelTriplets_buf.data(); - totOccupancyPixelTriplets = buf.totOccupancyPixelTriplets_buf.data(); - pixelRadius = buf.pixelRadius_buf.data(); - tripletRadius = buf.tripletRadius_buf.data(); - pt = buf.pt_buf.data(); - eta = buf.eta_buf.data(); - phi = buf.phi_buf.data(); - eta_pix = buf.eta_pix_buf.data(); - phi_pix = buf.phi_pix_buf.data(); - score = buf.score_buf.data(); - isDup = buf.isDup_buf.data(); - partOfPT5 = buf.partOfPT5_buf.data(); - logicalLayers = buf.logicalLayers_buf.data(); - hitIndices = buf.hitIndices_buf.data(); - lowerModuleIndices = buf.lowerModuleIndices_buf.data(); - centerX = buf.centerX_buf.data(); - centerY = buf.centerY_buf.data(); - rPhiChiSquared = buf.rPhiChiSquared_buf.data(); - rPhiChiSquaredInwards = buf.rPhiChiSquaredInwards_buf.data(); - rzChiSquared = buf.rzChiSquared_buf.data(); - } - }; - - template - struct PixelTripletsBuffer { - Buf pixelSegmentIndices_buf; - Buf tripletIndices_buf; - Buf nPixelTriplets_buf; - Buf totOccupancyPixelTriplets_buf; - Buf pixelRadius_buf; - Buf tripletRadius_buf; - Buf pt_buf; - Buf eta_buf; - Buf phi_buf; - Buf eta_pix_buf; - Buf phi_pix_buf; - Buf score_buf; - Buf isDup_buf; - Buf partOfPT5_buf; - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf lowerModuleIndices_buf; - Buf centerX_buf; - Buf centerY_buf; - Buf pixelRadiusError_buf; - Buf rPhiChiSquared_buf; - Buf rPhiChiSquaredInwards_buf; - Buf rzChiSquared_buf; - - PixelTriplets data_; - - template - PixelTripletsBuffer(unsigned int maxPixelTriplets, TDevAcc const& devAccIn, TQueue& queue) - : pixelSegmentIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - tripletIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - nPixelTriplets_buf(allocBufWrapper(devAccIn, 1, queue)), - totOccupancyPixelTriplets_buf(allocBufWrapper(devAccIn, 1, queue)), - pixelRadius_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - tripletRadius_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - pt_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - eta_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - phi_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - eta_pix_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - phi_pix_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - score_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - isDup_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - partOfPT5_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - logicalLayers_buf(allocBufWrapper(devAccIn, maxPixelTriplets * Params_pT3::kLayers, queue)), - hitIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets * Params_pT3::kHits, queue)), - lowerModuleIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets * Params_pT3::kLayers, queue)), - centerX_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - centerY_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - pixelRadiusError_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - rPhiChiSquared_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - rPhiChiSquaredInwards_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), - rzChiSquared_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)) { - alpaka::memset(queue, nPixelTriplets_buf, 0u); - alpaka::memset(queue, totOccupancyPixelTriplets_buf, 0u); - alpaka::memset(queue, partOfPT5_buf, false); - } - - inline PixelTriplets const* data() const { return &data_; } - inline void setData(PixelTripletsBuffer& buf) { data_.setData(buf); } - }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelTripletToMemory(MiniDoubletsConst mds, SegmentsConst segments, - Triplets const& tripletsInGPU, - PixelTriplets& pixelTripletsInGPU, + TripletsConst triplets, + PixelTriplets pixelTriplets, unsigned int pixelSegmentIndex, unsigned int tripletIndex, float pixelRadius, @@ -149,63 +33,49 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float eta_pix, float phi_pix, float score) { - pixelTripletsInGPU.pixelSegmentIndices[pixelTripletIndex] = pixelSegmentIndex; - pixelTripletsInGPU.tripletIndices[pixelTripletIndex] = tripletIndex; - pixelTripletsInGPU.pixelRadius[pixelTripletIndex] = __F2H(pixelRadius); - pixelTripletsInGPU.tripletRadius[pixelTripletIndex] = __F2H(tripletRadius); - pixelTripletsInGPU.pt[pixelTripletIndex] = __F2H(pt); - pixelTripletsInGPU.eta[pixelTripletIndex] = __F2H(eta); - pixelTripletsInGPU.phi[pixelTripletIndex] = __F2H(phi); - pixelTripletsInGPU.eta_pix[pixelTripletIndex] = __F2H(eta_pix); - pixelTripletsInGPU.phi_pix[pixelTripletIndex] = __F2H(phi_pix); - pixelTripletsInGPU.isDup[pixelTripletIndex] = false; - pixelTripletsInGPU.score[pixelTripletIndex] = __F2H(score); - - pixelTripletsInGPU.centerX[pixelTripletIndex] = __F2H(centerX); - pixelTripletsInGPU.centerY[pixelTripletIndex] = __F2H(centerY); - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex] = 0; - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 1] = 0; - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 2] = - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers]; - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 3] = - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 1]; - pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 4] = - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 2]; - - pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex] = - segments.innerLowerModuleIndices()[pixelSegmentIndex]; - pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 1] = - segments.outerLowerModuleIndices()[pixelSegmentIndex]; - pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 2] = - tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex]; - pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 3] = - tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 1]; - pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 4] = - tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 2]; + pixelTriplets.pixelSegmentIndices()[pixelTripletIndex] = pixelSegmentIndex; + pixelTriplets.tripletIndices()[pixelTripletIndex] = tripletIndex; + pixelTriplets.pixelRadius()[pixelTripletIndex] = __F2H(pixelRadius); + pixelTriplets.tripletRadius()[pixelTripletIndex] = __F2H(tripletRadius); + pixelTriplets.pt()[pixelTripletIndex] = __F2H(pt); + pixelTriplets.eta()[pixelTripletIndex] = __F2H(eta); + pixelTriplets.phi()[pixelTripletIndex] = __F2H(phi); + pixelTriplets.eta_pix()[pixelTripletIndex] = __F2H(eta_pix); + pixelTriplets.phi_pix()[pixelTripletIndex] = __F2H(phi_pix); + pixelTriplets.isDup()[pixelTripletIndex] = false; + pixelTriplets.score()[pixelTripletIndex] = __F2H(score); + + pixelTriplets.centerX()[pixelTripletIndex] = __F2H(centerX); + pixelTriplets.centerY()[pixelTripletIndex] = __F2H(centerY); + pixelTriplets.logicalLayers()[pixelTripletIndex][0] = 0; + pixelTriplets.logicalLayers()[pixelTripletIndex][1] = 0; + pixelTriplets.logicalLayers()[pixelTripletIndex][2] = triplets.logicalLayers()[tripletIndex][0]; + pixelTriplets.logicalLayers()[pixelTripletIndex][3] = triplets.logicalLayers()[tripletIndex][1]; + pixelTriplets.logicalLayers()[pixelTripletIndex][4] = triplets.logicalLayers()[tripletIndex][2]; + + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][0] = segments.innerLowerModuleIndices()[pixelSegmentIndex]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][1] = segments.outerLowerModuleIndices()[pixelSegmentIndex]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][2] = triplets.lowerModuleIndices()[tripletIndex][0]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][3] = triplets.lowerModuleIndices()[tripletIndex][1]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][4] = triplets.lowerModuleIndices()[tripletIndex][2]; unsigned int pixelInnerMD = segments.mdIndices()[pixelSegmentIndex][0]; unsigned int pixelOuterMD = segments.mdIndices()[pixelSegmentIndex][1]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex] = mds.anchorHitIndices()[pixelInnerMD]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 1] = mds.outerHitIndices()[pixelInnerMD]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 2] = mds.anchorHitIndices()[pixelOuterMD]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 3] = mds.outerHitIndices()[pixelOuterMD]; - - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 4] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 5] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 1]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 6] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 2]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 7] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 3]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 8] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 4]; - pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 9] = - tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 5]; - pixelTripletsInGPU.rPhiChiSquared[pixelTripletIndex] = rPhiChiSquared; - pixelTripletsInGPU.rPhiChiSquaredInwards[pixelTripletIndex] = rPhiChiSquaredInwards; - pixelTripletsInGPU.rzChiSquared[pixelTripletIndex] = rzChiSquared; + pixelTriplets.hitIndices()[pixelTripletIndex][0] = mds.anchorHitIndices()[pixelInnerMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][1] = mds.outerHitIndices()[pixelInnerMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][2] = mds.anchorHitIndices()[pixelOuterMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][3] = mds.outerHitIndices()[pixelOuterMD]; + + pixelTriplets.hitIndices()[pixelTripletIndex][4] = triplets.hitIndices()[tripletIndex][0]; + pixelTriplets.hitIndices()[pixelTripletIndex][5] = triplets.hitIndices()[tripletIndex][1]; + pixelTriplets.hitIndices()[pixelTripletIndex][6] = triplets.hitIndices()[tripletIndex][2]; + pixelTriplets.hitIndices()[pixelTripletIndex][7] = triplets.hitIndices()[tripletIndex][3]; + pixelTriplets.hitIndices()[pixelTripletIndex][8] = triplets.hitIndices()[tripletIndex][4]; + pixelTriplets.hitIndices()[pixelTripletIndex][9] = triplets.hitIndices()[tripletIndex][5]; + pixelTriplets.rPhiChiSquared()[pixelTripletIndex] = rPhiChiSquared; + pixelTriplets.rPhiChiSquaredInwards()[pixelTripletIndex] = rPhiChiSquaredInwards; + pixelTriplets.rzChiSquared()[pixelTripletIndex] = rzChiSquared; }; template @@ -771,7 +641,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { MiniDoubletsConst mds, SegmentsConst segments, SegmentsPixelConst segmentsPixel, - Triplets const& tripletsInGPU, + TripletsConst triplets, unsigned int pixelSegmentIndex, unsigned int tripletIndex, float& pixelRadius, @@ -785,9 +655,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //run pT4 compatibility between the pixel segment and inner segment, and between the pixel and outer segment of the triplet uint16_t pixelModuleIndex = segments.innerLowerModuleIndices()[pixelSegmentIndex]; - uint16_t lowerModuleIndex = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex]; - uint16_t middleModuleIndex = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 1]; - uint16_t upperModuleIndex = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 2]; + uint16_t lowerModuleIndex = triplets.lowerModuleIndices()[tripletIndex][0]; + uint16_t middleModuleIndex = triplets.lowerModuleIndices()[tripletIndex][1]; + uint16_t upperModuleIndex = triplets.lowerModuleIndices()[tripletIndex][2]; { // pixel segment vs inner segment of the triplet @@ -801,7 +671,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { lowerModuleIndex, middleModuleIndex, pixelSegmentIndex, - tripletsInGPU.segmentIndices[Params_LS::kLayers * tripletIndex])) + triplets.segmentIndices()[tripletIndex][0])) return false; //pixel segment vs outer segment of triplet @@ -815,7 +685,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { middleModuleIndex, upperModuleIndex, pixelSegmentIndex, - tripletsInGPU.segmentIndices[Params_LS::kLayers * tripletIndex + 1])) + triplets.segmentIndices()[tripletIndex][1])) return false; } @@ -837,8 +707,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { pixelRadius = pixelSegmentPt * kR1GeVf; float pixelRadiusError = pixelSegmentPtError * kR1GeVf; - unsigned int tripletInnerSegmentIndex = tripletsInGPU.segmentIndices[2 * tripletIndex]; - unsigned int tripletOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * tripletIndex + 1]; + unsigned int tripletInnerSegmentIndex = triplets.segmentIndices()[tripletIndex][0]; + unsigned int tripletOuterSegmentIndex = triplets.segmentIndices()[tripletIndex][1]; unsigned int firstMDIndex = segments.mdIndices()[tripletInnerSegmentIndex][0]; unsigned int secondMDIndex = segments.mdIndices()[tripletInnerSegmentIndex][1]; @@ -850,9 +720,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds.anchorY()[firstMDIndex], mds.anchorY()[secondMDIndex], mds.anchorY()[thirdMDIndex]}; float g, f; - tripletRadius = tripletsInGPU.circleRadius[tripletIndex]; - g = tripletsInGPU.circleCenterX[tripletIndex]; - f = tripletsInGPU.circleCenterY[tripletIndex]; + tripletRadius = triplets.radius()[tripletIndex]; + g = triplets.centerX()[tripletIndex]; + f = triplets.centerY()[tripletIndex]; if (not passRadiusCriterion(acc, modulesInGPU, @@ -922,7 +792,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return true; }; - struct CreatePixelTripletsInGPUFromMapv2 { + struct CreatePixelTripletsFromMap { template ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, @@ -930,8 +800,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { MiniDoubletsConst mds, SegmentsConst segments, SegmentsPixelConst segmentsPixel, - Triplets tripletsInGPU, - PixelTriplets pixelTripletsInGPU, + Triplets triplets, + TripletsOccupancyConst tripletsOccupancy, + PixelTriplets pixelTriplets, unsigned int* connectedPixelSize, unsigned int* connectedPixelIndex, unsigned int nPixelSegments) const { @@ -961,7 +832,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules; - unsigned int nOuterTriplets = tripletsInGPU.nTriplets[tripletLowerModuleIndex]; + unsigned int nOuterTriplets = tripletsOccupancy.nTriplets()[tripletLowerModuleIndex]; if (nOuterTriplets == 0) continue; @@ -988,10 +859,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { outerTripletArrayIndex += gridThreadExtent[2]) { unsigned int outerTripletIndex = rangesInGPU.tripletModuleIndices[tripletLowerModuleIndex] + outerTripletArrayIndex; - if (modulesInGPU.moduleType[tripletsInGPU.lowerModuleIndices[3 * outerTripletIndex + 1]] == TwoS) + if (modulesInGPU.moduleType[triplets.lowerModuleIndices()[outerTripletIndex][1]] == TwoS) continue; //REMOVES PS-2S - if (tripletsInGPU.partOfPT5[outerTripletIndex]) + if (triplets.partOfPT5()[outerTripletIndex]) continue; //don't create pT3s for T3s accounted in pT5s float pixelRadius, tripletRadius, rPhiChiSquared, rzChiSquared, rPhiChiSquaredInwards, centerX, centerY; @@ -1001,7 +872,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds, segments, segmentsPixel, - tripletsInGPU, + triplets, pixelSegmentIndex, outerTripletIndex, pixelRadius, @@ -1013,27 +884,29 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { rPhiChiSquaredInwards); if (success) { - float phi = mds.anchorPhi()[segments.mdIndices()[tripletsInGPU.segmentIndices[2 * outerTripletIndex]] - [layer2_adjustment]]; - float eta = mds.anchorEta()[segments.mdIndices()[tripletsInGPU.segmentIndices[2 * outerTripletIndex]] - [layer2_adjustment]]; + float phi = + mds.anchorPhi()[segments + .mdIndices()[triplets.segmentIndices()[outerTripletIndex][0]][layer2_adjustment]]; + float eta = + mds.anchorEta()[segments + .mdIndices()[triplets.segmentIndices()[outerTripletIndex][0]][layer2_adjustment]]; float eta_pix = segmentsPixel.eta()[i_pLS]; float phi_pix = segmentsPixel.phi()[i_pLS]; float pt = segmentsPixel.ptIn()[i_pLS]; float score = rPhiChiSquared + rPhiChiSquaredInwards; - unsigned int totOccupancyPixelTriplets = alpaka::atomicAdd( - acc, pixelTripletsInGPU.totOccupancyPixelTriplets, 1u, alpaka::hierarchy::Threads{}); + unsigned int totOccupancyPixelTriplets = + alpaka::atomicAdd(acc, &pixelTriplets.totOccupancyPixelTriplets(), 1u, alpaka::hierarchy::Threads{}); if (totOccupancyPixelTriplets >= n_max_pixel_triplets) { #ifdef WARNINGS printf("Pixel Triplet excess alert!\n"); #endif } else { unsigned int pixelTripletIndex = - alpaka::atomicAdd(acc, pixelTripletsInGPU.nPixelTriplets, 1u, alpaka::hierarchy::Threads{}); + alpaka::atomicAdd(acc, &pixelTriplets.nPixelTriplets(), 1u, alpaka::hierarchy::Threads{}); addPixelTripletToMemory(mds, segments, - tripletsInGPU, - pixelTripletsInGPU, + triplets, + pixelTriplets, pixelSegmentIndex, outerTripletIndex, pixelRadius, @@ -1050,7 +923,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { eta_pix, phi_pix, score); - tripletsInGPU.partOfPT3[outerTripletIndex] = true; + triplets.partOfPT3()[outerTripletIndex] = true; } } } // for outerTripletArrayIndex diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 1d506c11c3d63..2fbcd2a0c989f 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -3,145 +3,20 @@ #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/Module.h" #include "RecoTracker/LSTCore/interface/EndcapGeometry.h" #include "NeuralNetwork.h" -#include "Segment.h" -#include "MiniDoublet.h" #include "Hit.h" #include "ObjectRanges.h" -#include "Triplet.h" +#include "Triplet.h" // FIXME: need to refactor common functions to a common place namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct Quintuplets { - unsigned int* tripletIndices; - uint16_t* lowerModuleIndices; - unsigned int* nQuintuplets; - unsigned int* totOccupancyQuintuplets; - unsigned int* nMemoryLocations; - - FPX* innerRadius; - FPX* bridgeRadius; - FPX* outerRadius; - FPX* pt; - FPX* eta; - FPX* phi; - FPX* score_rphisum; - uint8_t* layer; - char* isDup; - bool* TightCutFlag; - bool* partOfPT5; - - float* regressionRadius; - float* regressionG; - float* regressionF; - - uint8_t* logicalLayers; - unsigned int* hitIndices; - float* rzChiSquared; - float* chiSquared; - float* nonAnchorChiSquared; - - template - void setData(TBuff& buf) { - tripletIndices = buf.tripletIndices_buf.data(); - lowerModuleIndices = buf.lowerModuleIndices_buf.data(); - nQuintuplets = buf.nQuintuplets_buf.data(); - totOccupancyQuintuplets = buf.totOccupancyQuintuplets_buf.data(); - nMemoryLocations = buf.nMemoryLocations_buf.data(); - innerRadius = buf.innerRadius_buf.data(); - bridgeRadius = buf.bridgeRadius_buf.data(); - outerRadius = buf.outerRadius_buf.data(); - pt = buf.pt_buf.data(); - eta = buf.eta_buf.data(); - phi = buf.phi_buf.data(); - score_rphisum = buf.score_rphisum_buf.data(); - layer = buf.layer_buf.data(); - isDup = buf.isDup_buf.data(); - TightCutFlag = buf.TightCutFlag_buf.data(); - partOfPT5 = buf.partOfPT5_buf.data(); - regressionRadius = buf.regressionRadius_buf.data(); - regressionG = buf.regressionG_buf.data(); - regressionF = buf.regressionF_buf.data(); - logicalLayers = buf.logicalLayers_buf.data(); - hitIndices = buf.hitIndices_buf.data(); - rzChiSquared = buf.rzChiSquared_buf.data(); - chiSquared = buf.chiSquared_buf.data(); - nonAnchorChiSquared = buf.nonAnchorChiSquared_buf.data(); - } - }; - - template - struct QuintupletsBuffer { - Buf tripletIndices_buf; - Buf lowerModuleIndices_buf; - Buf nQuintuplets_buf; - Buf totOccupancyQuintuplets_buf; - Buf nMemoryLocations_buf; - - Buf innerRadius_buf; - Buf bridgeRadius_buf; - Buf outerRadius_buf; - Buf pt_buf; - Buf eta_buf; - Buf phi_buf; - Buf score_rphisum_buf; - Buf layer_buf; - Buf isDup_buf; - Buf TightCutFlag_buf; - Buf partOfPT5_buf; - - Buf regressionRadius_buf; - Buf regressionG_buf; - Buf regressionF_buf; - - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf rzChiSquared_buf; - Buf chiSquared_buf; - Buf nonAnchorChiSquared_buf; - - Quintuplets data_; - - template - QuintupletsBuffer(unsigned int nTotalQuintuplets, unsigned int nLowerModules, TDevAcc const& devAccIn, TQueue& queue) - : tripletIndices_buf(allocBufWrapper(devAccIn, 2 * nTotalQuintuplets, queue)), - lowerModuleIndices_buf(allocBufWrapper(devAccIn, Params_T5::kLayers * nTotalQuintuplets, queue)), - nQuintuplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), - totOccupancyQuintuplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), - nMemoryLocations_buf(allocBufWrapper(devAccIn, 1, queue)), - innerRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - bridgeRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - outerRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - pt_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - eta_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - phi_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - score_rphisum_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - layer_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - isDup_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - TightCutFlag_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - partOfPT5_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - regressionRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - regressionG_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - regressionF_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - logicalLayers_buf(allocBufWrapper(devAccIn, Params_T5::kLayers * nTotalQuintuplets, queue)), - hitIndices_buf(allocBufWrapper(devAccIn, Params_T5::kHits * nTotalQuintuplets, queue)), - rzChiSquared_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - chiSquared_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), - nonAnchorChiSquared_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)) { - alpaka::memset(queue, nQuintuplets_buf, 0u); - alpaka::memset(queue, totOccupancyQuintuplets_buf, 0u); - alpaka::memset(queue, isDup_buf, 0u); - alpaka::memset(queue, TightCutFlag_buf, false); - alpaka::memset(queue, partOfPT5_buf, false); - } - - inline Quintuplets const* data() const { return &data_; } - inline void setData(QuintupletsBuffer& buf) { data_.setData(buf); } - }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool checkIntervalOverlap(float firstMin, float firstMax, float secondMin, @@ -149,8 +24,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(Triplets const& tripletsInGPU, - Quintuplets& quintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(TripletsConst triplets, + Quintuplets quintuplets, unsigned int innerTripletIndex, unsigned int outerTripletIndex, uint16_t lowerModule1, @@ -173,62 +48,46 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float scores, uint8_t layer, unsigned int quintupletIndex, - bool TightCutFlag) { - quintupletsInGPU.tripletIndices[2 * quintupletIndex] = innerTripletIndex; - quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1] = outerTripletIndex; - - quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex] = lowerModule1; - quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 1] = lowerModule2; - quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 2] = lowerModule3; - quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 3] = lowerModule4; - quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 4] = lowerModule5; - quintupletsInGPU.innerRadius[quintupletIndex] = __F2H(innerRadius); - quintupletsInGPU.outerRadius[quintupletIndex] = __F2H(outerRadius); - quintupletsInGPU.pt[quintupletIndex] = __F2H(pt); - quintupletsInGPU.eta[quintupletIndex] = __F2H(eta); - quintupletsInGPU.phi[quintupletIndex] = __F2H(phi); - quintupletsInGPU.score_rphisum[quintupletIndex] = __F2H(scores); - quintupletsInGPU.layer[quintupletIndex] = layer; - quintupletsInGPU.isDup[quintupletIndex] = 0; - quintupletsInGPU.TightCutFlag[quintupletIndex] = TightCutFlag; - quintupletsInGPU.regressionRadius[quintupletIndex] = regressionRadius; - quintupletsInGPU.regressionG[quintupletIndex] = regressionG; - quintupletsInGPU.regressionF[quintupletIndex] = regressionF; - quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex] = - tripletsInGPU.logicalLayers[Params_T3::kLayers * innerTripletIndex]; - quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 1] = - tripletsInGPU.logicalLayers[Params_T3::kLayers * innerTripletIndex + 1]; - quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 2] = - tripletsInGPU.logicalLayers[Params_T3::kLayers * innerTripletIndex + 2]; - quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 3] = - tripletsInGPU.logicalLayers[Params_T3::kLayers * outerTripletIndex + 1]; - quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 4] = - tripletsInGPU.logicalLayers[Params_T3::kLayers * outerTripletIndex + 2]; - - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 1] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 1]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 2] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 2]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 3] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 3]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 4] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 4]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 5] = - tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 5]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 6] = - tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 2]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 7] = - tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 3]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 8] = - tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 4]; - quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 9] = - tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 5]; - quintupletsInGPU.bridgeRadius[quintupletIndex] = bridgeRadius; - quintupletsInGPU.rzChiSquared[quintupletIndex] = rzChiSquared; - quintupletsInGPU.chiSquared[quintupletIndex] = rPhiChiSquared; - quintupletsInGPU.nonAnchorChiSquared[quintupletIndex] = nonAnchorChiSquared; + bool tightCutFlag) { + quintuplets.tripletIndices()[quintupletIndex][0] = innerTripletIndex; + quintuplets.tripletIndices()[quintupletIndex][1] = outerTripletIndex; + + quintuplets.lowerModuleIndices()[quintupletIndex][0] = lowerModule1; + quintuplets.lowerModuleIndices()[quintupletIndex][1] = lowerModule2; + quintuplets.lowerModuleIndices()[quintupletIndex][2] = lowerModule3; + quintuplets.lowerModuleIndices()[quintupletIndex][3] = lowerModule4; + quintuplets.lowerModuleIndices()[quintupletIndex][4] = lowerModule5; + quintuplets.innerRadius()[quintupletIndex] = __F2H(innerRadius); + quintuplets.outerRadius()[quintupletIndex] = __F2H(outerRadius); + quintuplets.pt()[quintupletIndex] = __F2H(pt); + quintuplets.eta()[quintupletIndex] = __F2H(eta); + quintuplets.phi()[quintupletIndex] = __F2H(phi); + quintuplets.score_rphisum()[quintupletIndex] = __F2H(scores); + quintuplets.isDup()[quintupletIndex] = 0; + quintuplets.tightCutFlag()[quintupletIndex] = tightCutFlag; + quintuplets.regressionRadius()[quintupletIndex] = regressionRadius; + quintuplets.regressionG()[quintupletIndex] = regressionG; + quintuplets.regressionF()[quintupletIndex] = regressionF; + quintuplets.logicalLayers()[quintupletIndex][0] = triplets.logicalLayers()[innerTripletIndex][0]; + quintuplets.logicalLayers()[quintupletIndex][1] = triplets.logicalLayers()[innerTripletIndex][1]; + quintuplets.logicalLayers()[quintupletIndex][2] = triplets.logicalLayers()[innerTripletIndex][2]; + quintuplets.logicalLayers()[quintupletIndex][3] = triplets.logicalLayers()[outerTripletIndex][1]; + quintuplets.logicalLayers()[quintupletIndex][4] = triplets.logicalLayers()[outerTripletIndex][2]; + + quintuplets.hitIndices()[quintupletIndex][0] = triplets.hitIndices()[innerTripletIndex][0]; + quintuplets.hitIndices()[quintupletIndex][1] = triplets.hitIndices()[innerTripletIndex][1]; + quintuplets.hitIndices()[quintupletIndex][2] = triplets.hitIndices()[innerTripletIndex][2]; + quintuplets.hitIndices()[quintupletIndex][3] = triplets.hitIndices()[innerTripletIndex][3]; + quintuplets.hitIndices()[quintupletIndex][4] = triplets.hitIndices()[innerTripletIndex][4]; + quintuplets.hitIndices()[quintupletIndex][5] = triplets.hitIndices()[innerTripletIndex][5]; + quintuplets.hitIndices()[quintupletIndex][6] = triplets.hitIndices()[outerTripletIndex][2]; + quintuplets.hitIndices()[quintupletIndex][7] = triplets.hitIndices()[outerTripletIndex][3]; + quintuplets.hitIndices()[quintupletIndex][8] = triplets.hitIndices()[outerTripletIndex][4]; + quintuplets.hitIndices()[quintupletIndex][9] = triplets.hitIndices()[outerTripletIndex][5]; + quintuplets.bridgeRadius()[quintupletIndex] = bridgeRadius; + quintuplets.rzChiSquared()[quintupletIndex] = rzChiSquared; + quintuplets.chiSquared()[quintupletIndex] = rPhiChiSquared; + quintuplets.nonAnchorChiSquared()[quintupletIndex] = nonAnchorChiSquared; } //90% constraint @@ -750,12 +609,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } template - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(Triplets const& tripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(TripletsConst triplets, SegmentsConst segments, unsigned int innerTripletIndex, unsigned int outerTripletIndex) { - unsigned int innerOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1]; - unsigned int outerInnerSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex]; + unsigned int innerOuterSegmentIndex = triplets.segmentIndices()[innerTripletIndex][1]; + unsigned int outerInnerSegmentIndex = triplets.segmentIndices()[outerTripletIndex][0]; unsigned int innerOuterOuterMiniDoubletIndex = segments.mdIndices()[innerOuterSegmentIndex][1]; //inner triplet outer segment outer MD index unsigned int outerInnerInnerMiniDoubletIndex = @@ -2161,7 +2020,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules& modulesInGPU, MiniDoubletsConst mds, SegmentsConst segments, - Triplets& tripletsInGPU, + TripletsConst triplets, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -2179,10 +2038,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& chiSquared, float& nonAnchorChiSquared, bool& TightCutFlag) { - unsigned int firstSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex]; - unsigned int secondSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1]; - unsigned int thirdSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex]; - unsigned int fourthSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex + 1]; + unsigned int firstSegmentIndex = triplets.segmentIndices()[innerTripletIndex][0]; + unsigned int secondSegmentIndex = triplets.segmentIndices()[innerTripletIndex][1]; + unsigned int thirdSegmentIndex = triplets.segmentIndices()[outerTripletIndex][0]; + unsigned int fourthSegmentIndex = triplets.segmentIndices()[outerTripletIndex][1]; unsigned int innerOuterOuterMiniDoubletIndex = segments.mdIndices()[secondSegmentIndex][1]; //inner triplet outer segment outer MD index @@ -2307,11 +2166,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { computeErrorInRadius(acc, x3Vec, y3Vec, x1Vec, y1Vec, x2Vec, y2Vec, outerRadiusMin2S, outerRadiusMax2S); float g, f; - outerRadius = tripletsInGPU.circleRadius[outerTripletIndex]; + outerRadius = triplets.radius()[outerTripletIndex]; bridgeRadius = computeRadiusFromThreeAnchorHits(acc, x2, y2, x3, y3, x4, y4, g, f); - innerRadius = tripletsInGPU.circleRadius[innerTripletIndex]; - g = tripletsInGPU.circleCenterX[innerTripletIndex]; - f = tripletsInGPU.circleCenterY[innerTripletIndex]; + innerRadius = triplets.radius()[innerTripletIndex]; + g = triplets.centerX()[innerTripletIndex]; + f = triplets.centerY()[innerTripletIndex]; #ifdef USE_RZCHI2 float inner_pt = 2 * k2Rinv1GeVf * innerRadius; @@ -2432,7 +2291,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { modulesInGPU, mds, segments, - tripletsInGPU, + triplets, xVec, yVec, mdIndices, @@ -2498,14 +2357,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return true; } - struct CreateQuintupletsInGPUv2 { + struct CreateQuintuplets { template ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, MiniDoubletsConst mds, SegmentsConst segments, - Triplets tripletsInGPU, - Quintuplets quintupletsInGPU, + Triplets triplets, + TripletsOccupancyConst tripletsOccupancy, + Quintuplets quintuplets, + QuintupletsOccupancy quintupletsOccupancy, ObjectRanges rangesInGPU, uint16_t nEligibleT5Modules) const { auto const globalThreadIdx = alpaka::getIdx(acc); @@ -2524,18 +2385,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { else { continue; } - unsigned int nInnerTriplets = tripletsInGPU.nTriplets[lowerModule1]; + unsigned int nInnerTriplets = tripletsOccupancy.nTriplets()[lowerModule1]; for (unsigned int innerTripletArrayIndex = globalThreadIdx[1]; innerTripletArrayIndex < nInnerTriplets; innerTripletArrayIndex += gridThreadExtent[1]) { unsigned int innerTripletIndex = rangesInGPU.tripletModuleIndices[lowerModule1] + innerTripletArrayIndex; - uint16_t lowerModule2 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * innerTripletIndex + 1]; - uint16_t lowerModule3 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * innerTripletIndex + 2]; - unsigned int nOuterTriplets = tripletsInGPU.nTriplets[lowerModule3]; + uint16_t lowerModule2 = triplets.lowerModuleIndices()[innerTripletIndex][1]; + uint16_t lowerModule3 = triplets.lowerModuleIndices()[innerTripletIndex][2]; + unsigned int nOuterTriplets = tripletsOccupancy.nTriplets()[lowerModule3]; for (unsigned int outerTripletArrayIndex = globalThreadIdx[2]; outerTripletArrayIndex < nOuterTriplets; outerTripletArrayIndex += gridThreadExtent[2]) { unsigned int outerTripletIndex = rangesInGPU.tripletModuleIndices[lowerModule3] + outerTripletArrayIndex; - uint16_t lowerModule4 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * outerTripletIndex + 1]; - uint16_t lowerModule5 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * outerTripletIndex + 2]; + uint16_t lowerModule4 = triplets.lowerModuleIndices()[outerTripletIndex][1]; + uint16_t lowerModule5 = triplets.lowerModuleIndices()[outerTripletIndex][2]; float innerRadius, outerRadius, bridgeRadius, regressionG, regressionF, regressionRadius, rzChiSquared, chiSquared, nonAnchorChiSquared; //required for making distributions @@ -2545,7 +2406,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { modulesInGPU, mds, segments, - tripletsInGPU, + triplets, lowerModule1, lowerModule2, lowerModule3, @@ -2566,14 +2427,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (success) { int totOccupancyQuintuplets = alpaka::atomicAdd( - acc, &quintupletsInGPU.totOccupancyQuintuplets[lowerModule1], 1u, alpaka::hierarchy::Threads{}); + acc, &quintupletsOccupancy.totOccupancyQuintuplets()[lowerModule1], 1u, alpaka::hierarchy::Threads{}); if (totOccupancyQuintuplets >= rangesInGPU.quintupletModuleOccupancy[lowerModule1]) { #ifdef WARNINGS printf("Quintuplet excess alert! Module index = %d\n", lowerModule1); #endif } else { int quintupletModuleIndex = alpaka::atomicAdd( - acc, &quintupletsInGPU.nQuintuplets[lowerModule1], 1u, alpaka::hierarchy::Threads{}); + acc, &quintupletsOccupancy.nQuintuplets()[lowerModule1], 1u, alpaka::hierarchy::Threads{}); //this if statement should never get executed! if (rangesInGPU.quintupletModuleIndices[lowerModule1] == -1) { #ifdef WARNINGS @@ -2582,14 +2443,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } else { unsigned int quintupletIndex = rangesInGPU.quintupletModuleIndices[lowerModule1] + quintupletModuleIndex; - float phi = mds.anchorPhi()[segments.mdIndices()[tripletsInGPU.segmentIndices[2 * innerTripletIndex]] + float phi = mds.anchorPhi()[segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]] [layer2_adjustment]]; - float eta = mds.anchorEta()[segments.mdIndices()[tripletsInGPU.segmentIndices[2 * innerTripletIndex]] + float eta = mds.anchorEta()[segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]] [layer2_adjustment]]; float pt = (innerRadius + outerRadius) * k2Rinv1GeVf; float scores = chiSquared + nonAnchorChiSquared; - addQuintupletToMemory(tripletsInGPU, - quintupletsInGPU, + addQuintupletToMemory(triplets, + quintuplets, innerTripletIndex, outerTripletIndex, lowerModule1, @@ -2614,8 +2475,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { quintupletIndex, TightCutFlag); - tripletsInGPU.partOfT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex]] = true; - tripletsInGPU.partOfT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]] = true; + triplets.partOfT5()[quintuplets.tripletIndices()[quintupletIndex][0]] = true; + triplets.partOfT5()[quintuplets.tripletIndices()[quintupletIndex][1]] = true; } } } @@ -2625,11 +2486,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct CreateEligibleModulesListForQuintupletsGPU { + struct CreateEligibleModulesListForQuintuplets { template ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, - Triplets tripletsInGPU, + TripletsOccupancyConst tripletsOccupancy, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); @@ -2658,7 +2519,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { short module_subdets = modulesInGPU.subdets[i]; float module_eta = alpaka::math::abs(acc, modulesInGPU.eta[i]); - if (tripletsInGPU.nTriplets[i] == 0) + if (tripletsOccupancy.nTriplets()[i] == 0) continue; if (module_subdets == Barrel and module_layers >= 3) continue; @@ -2733,7 +2594,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, - Quintuplets quintupletsInGPU, + QuintupletsOccupancyConst quintupletsOccupancy, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); @@ -2743,13 +2604,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const gridThreadExtent = alpaka::getWorkDiv(acc); for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { - if (quintupletsInGPU.nQuintuplets[i] == 0 or rangesInGPU.quintupletModuleIndices[i] == -1) { + if (quintupletsOccupancy.nQuintuplets()[i] == 0 or rangesInGPU.quintupletModuleIndices[i] == -1) { rangesInGPU.quintupletRanges[i * 2] = -1; rangesInGPU.quintupletRanges[i * 2 + 1] = -1; } else { rangesInGPU.quintupletRanges[i * 2] = rangesInGPU.quintupletModuleIndices[i]; rangesInGPU.quintupletRanges[i * 2 + 1] = - rangesInGPU.quintupletModuleIndices[i] + quintupletsInGPU.nQuintuplets[i] - 1; + rangesInGPU.quintupletModuleIndices[i] + quintupletsOccupancy.nQuintuplets()[i] - 1; } } } diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 5ff4b7ad478cf..9631382573e06 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -3,15 +3,14 @@ #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/Module.h" -#include "RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" #include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" -#include "RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" -#include "Triplet.h" -#include "Segment.h" -#include "MiniDoublet.h" -#include "PixelTriplet.h" -#include "Quintuplet.h" #include "Hit.h" #include "ObjectRanges.h" @@ -39,9 +38,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { short trackCandidateType, unsigned int innerTrackletIndex, unsigned int outerTrackletIndex, - uint8_t* logicalLayerIndices, - uint16_t* lowerModuleIndices, - unsigned int* hitIndices, + const uint8_t* logicalLayerIndices, + const uint16_t* lowerModuleIndices, + const unsigned int* hitIndices, int pixelSeedIndex, float centerX, float centerY, @@ -114,29 +113,29 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, ObjectRanges rangesInGPU, - PixelTriplets pixelTripletsInGPU, + PixelTriplets pixelTriplets, SegmentsPixelConst segmentsPixel, - PixelQuintuplets pixelQuintupletsInGPU) const { + PixelQuintupletsConst pixelQuintuplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - unsigned int nPixelTriplets = *pixelTripletsInGPU.nPixelTriplets; + unsigned int nPixelTriplets = pixelTriplets.nPixelTriplets(); for (unsigned int pixelTripletIndex = globalThreadIdx[2]; pixelTripletIndex < nPixelTriplets; pixelTripletIndex += gridThreadExtent[2]) { - if (pixelTripletsInGPU.isDup[pixelTripletIndex]) + if (pixelTriplets.isDup()[pixelTripletIndex]) continue; // Cross cleaning step - float eta1 = __H2F(pixelTripletsInGPU.eta_pix[pixelTripletIndex]); - float phi1 = __H2F(pixelTripletsInGPU.phi_pix[pixelTripletIndex]); + float eta1 = __H2F(pixelTriplets.eta_pix()[pixelTripletIndex]); + float phi1 = __H2F(pixelTriplets.phi_pix()[pixelTripletIndex]); int pixelModuleIndex = *modulesInGPU.nLowerModules; unsigned int prefix = rangesInGPU.segmentModuleIndices[pixelModuleIndex]; - unsigned int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets; + unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); for (unsigned int pixelQuintupletIndex = globalThreadIdx[1]; pixelQuintupletIndex < nPixelQuintuplets; pixelQuintupletIndex += gridThreadExtent[1]) { - unsigned int pLS_jx = pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex]; + unsigned int pLS_jx = pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex]; float eta2 = segmentsPixel.eta()[pLS_jx - prefix]; float phi2 = segmentsPixel.phi()[pLS_jx - prefix]; float dEta = alpaka::math::abs(acc, (eta1 - eta2)); @@ -144,7 +143,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-5f) - pixelTripletsInGPU.isDup[pixelTripletIndex] = true; + pixelTriplets.isDup()[pixelTripletIndex] = true; } } } @@ -154,9 +153,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, - Quintuplets quintupletsInGPU, - PixelQuintuplets pixelQuintupletsInGPU, - PixelTriplets pixelTripletsInGPU, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + PixelQuintupletsConst pixelQuintuplets, + PixelTripletsConst pixelTriplets, ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -167,29 +167,29 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (rangesInGPU.quintupletModuleIndices[innerInnerInnerLowerModuleArrayIndex] == -1) continue; - unsigned int nQuints = quintupletsInGPU.nQuintuplets[innerInnerInnerLowerModuleArrayIndex]; + unsigned int nQuints = quintupletsOccupancy.nQuintuplets()[innerInnerInnerLowerModuleArrayIndex]; for (unsigned int innerObjectArrayIndex = globalThreadIdx[1]; innerObjectArrayIndex < nQuints; innerObjectArrayIndex += gridThreadExtent[1]) { unsigned int quintupletIndex = rangesInGPU.quintupletModuleIndices[innerInnerInnerLowerModuleArrayIndex] + innerObjectArrayIndex; // Don't add duplicate T5s or T5s that are accounted in pT5s - if (quintupletsInGPU.isDup[quintupletIndex] or quintupletsInGPU.partOfPT5[quintupletIndex]) + if (quintuplets.isDup()[quintupletIndex] or quintuplets.partOfPT5()[quintupletIndex]) continue; #ifdef Crossclean_T5 - unsigned int loop_bound = *pixelQuintupletsInGPU.nPixelQuintuplets + *pixelTripletsInGPU.nPixelTriplets; + unsigned int loop_bound = pixelQuintuplets.nPixelQuintuplets() + pixelTriplets.nPixelTriplets(); // Cross cleaning step - float eta1 = __H2F(quintupletsInGPU.eta[quintupletIndex]); - float phi1 = __H2F(quintupletsInGPU.phi[quintupletIndex]); + float eta1 = __H2F(quintuplets.eta()[quintupletIndex]); + float phi1 = __H2F(quintuplets.phi()[quintupletIndex]); for (unsigned int jx = globalThreadIdx[2]; jx < loop_bound; jx += gridThreadExtent[2]) { float eta2, phi2; - if (jx < *pixelQuintupletsInGPU.nPixelQuintuplets) { - eta2 = __H2F(pixelQuintupletsInGPU.eta[jx]); - phi2 = __H2F(pixelQuintupletsInGPU.phi[jx]); + if (jx < pixelQuintuplets.nPixelQuintuplets()) { + eta2 = __H2F(pixelQuintuplets.eta()[jx]); + phi2 = __H2F(pixelQuintuplets.phi()[jx]); } else { - eta2 = __H2F(pixelTripletsInGPU.eta[jx - *pixelQuintupletsInGPU.nPixelQuintuplets]); - phi2 = __H2F(pixelTripletsInGPU.phi[jx - *pixelQuintupletsInGPU.nPixelQuintuplets]); + eta2 = __H2F(pixelTriplets.eta()[jx - pixelQuintuplets.nPixelQuintuplets()]); + phi2 = __H2F(pixelTriplets.phi()[jx - pixelQuintuplets.nPixelQuintuplets()]); } float dEta = alpaka::math::abs(acc, eta1 - eta2); @@ -197,7 +197,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-3f) - quintupletsInGPU.isDup[quintupletIndex] = true; + quintuplets.isDup()[quintupletIndex] = true; } #endif } @@ -210,14 +210,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, ObjectRanges rangesInGPU, - PixelTriplets pixelTripletsInGPU, + PixelTripletsConst pixelTriplets, TrackCandidates cands, SegmentsConst segments, SegmentsOccupancyConst segmentsOccupancy, SegmentsPixel segmentsPixel, MiniDoubletsConst mds, Hits hitsInGPU, - Quintuplets quintupletsInGPU) const { + QuintupletsConst quintuplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -240,8 +240,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (type == 4) // T5 { unsigned int quintupletIndex = innerTrackletIdx; // T5 index - float eta2 = __H2F(quintupletsInGPU.eta[quintupletIndex]); - float phi2 = __H2F(quintupletsInGPU.phi[quintupletIndex]); + float eta2 = __H2F(quintuplets.eta()[quintupletIndex]); + float phi2 = __H2F(quintuplets.phi()[quintupletIndex]); float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = calculate_dPhi(phi1, phi2); @@ -251,14 +251,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } if (type == 5) // pT3 { - int pLSIndex = pixelTripletsInGPU.pixelSegmentIndices[innerTrackletIdx]; + int pLSIndex = pixelTriplets.pixelSegmentIndices()[innerTrackletIdx]; int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hitsInGPU); if (npMatched > 0) segmentsPixel.isDup()[pixelArrayIndex] = true; int pT3Index = innerTrackletIdx; - float eta2 = __H2F(pixelTripletsInGPU.eta_pix[pT3Index]); - float phi2 = __H2F(pixelTripletsInGPU.phi_pix[pT3Index]); + float eta2 = __H2F(pixelTriplets.eta_pix()[pT3Index]); + float phi2 = __H2F(pixelTriplets.phi_pix()[pT3Index]); float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = calculate_dPhi(phi1, phi2); @@ -288,11 +288,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct AddpT3asTrackCandidatesInGPU { + struct AddpT3asTrackCandidates { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - PixelTriplets pixelTripletsInGPU, + PixelTripletsConst pixelTriplets, TrackCandidates cands, SegmentsPixelConst segmentsPixel, ObjectRanges rangesInGPU) const { @@ -303,11 +303,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - unsigned int nPixelTriplets = *pixelTripletsInGPU.nPixelTriplets; + unsigned int nPixelTriplets = pixelTriplets.nPixelTriplets(); unsigned int pLS_offset = rangesInGPU.segmentModuleIndices[nLowerModules]; for (unsigned int pixelTripletIndex = globalThreadIdx[0]; pixelTripletIndex < nPixelTriplets; pixelTripletIndex += gridThreadExtent[0]) { - if ((pixelTripletsInGPU.isDup[pixelTripletIndex])) + if ((pixelTriplets.isDup()[pixelTripletIndex])) continue; unsigned int trackCandidateIdx = @@ -323,19 +323,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } else { alpaka::atomicAdd(acc, &cands.nTrackCandidatespT3(), 1u, alpaka::hierarchy::Threads{}); - float radius = 0.5f * (__H2F(pixelTripletsInGPU.pixelRadius[pixelTripletIndex]) + - __H2F(pixelTripletsInGPU.tripletRadius[pixelTripletIndex])); - unsigned int pT3PixelIndex = pixelTripletsInGPU.pixelSegmentIndices[pixelTripletIndex]; + float radius = 0.5f * (__H2F(pixelTriplets.pixelRadius()[pixelTripletIndex]) + + __H2F(pixelTriplets.tripletRadius()[pixelTripletIndex])); + unsigned int pT3PixelIndex = pixelTriplets.pixelSegmentIndices()[pixelTripletIndex]; addTrackCandidateToMemory(cands, 5 /*track candidate type pT3=5*/, pixelTripletIndex, pixelTripletIndex, - &pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex], - &pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex], - &pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex], + pixelTriplets.logicalLayers()[pixelTripletIndex].data(), + pixelTriplets.lowerModuleIndices()[pixelTripletIndex].data(), + pixelTriplets.hitIndices()[pixelTripletIndex].data(), segmentsPixel.seedIdx()[pT3PixelIndex - pLS_offset], - __H2F(pixelTripletsInGPU.centerX[pixelTripletIndex]), - __H2F(pixelTripletsInGPU.centerY[pixelTripletIndex]), + __H2F(pixelTriplets.centerX()[pixelTripletIndex]), + __H2F(pixelTriplets.centerY()[pixelTripletIndex]), radius, trackCandidateIdx, pixelTripletIndex); @@ -344,11 +344,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct AddT5asTrackCandidateInGPU { + struct AddT5asTrackCandidate { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - Quintuplets quintupletsInGPU, + QuintupletsConst quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, TrackCandidates cands, ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); @@ -358,12 +359,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (rangesInGPU.quintupletModuleIndices[idx] == -1) continue; - unsigned int nQuints = quintupletsInGPU.nQuintuplets[idx]; + unsigned int nQuints = quintupletsOccupancy.nQuintuplets()[idx]; for (unsigned int jdx = globalThreadIdx[2]; jdx < nQuints; jdx += gridThreadExtent[2]) { unsigned int quintupletIndex = rangesInGPU.quintupletModuleIndices[idx] + jdx; - if (quintupletsInGPU.isDup[quintupletIndex] or quintupletsInGPU.partOfPT5[quintupletIndex]) + if (quintuplets.isDup()[quintupletIndex] or quintuplets.partOfPT5()[quintupletIndex]) continue; - if (!(quintupletsInGPU.TightCutFlag[quintupletIndex])) + if (!(quintuplets.tightCutFlag()[quintupletIndex])) continue; unsigned int trackCandidateIdx = @@ -382,13 +383,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { 4 /*track candidate type T5=4*/, quintupletIndex, quintupletIndex, - &quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex], - &quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex], - &quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex], + quintuplets.logicalLayers()[quintupletIndex].data(), + quintuplets.lowerModuleIndices()[quintupletIndex].data(), + quintuplets.hitIndices()[quintupletIndex].data(), -1 /*no pixel seed index for T5s*/, - quintupletsInGPU.regressionG[quintupletIndex], - quintupletsInGPU.regressionF[quintupletIndex], - quintupletsInGPU.regressionRadius[quintupletIndex], + quintuplets.regressionG()[quintupletIndex], + quintuplets.regressionF()[quintupletIndex], + quintuplets.regressionRadius()[quintupletIndex], trackCandidateIdx, quintupletIndex); } @@ -438,11 +439,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; - struct AddpT5asTrackCandidateInGPU { + struct AddpT5asTrackCandidate { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - PixelQuintuplets pixelQuintupletsInGPU, + PixelQuintupletsConst pixelQuintuplets, TrackCandidates cands, SegmentsPixelConst segmentsPixel, ObjectRanges rangesInGPU) const { @@ -453,11 +454,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets; + int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); unsigned int pLS_offset = rangesInGPU.segmentModuleIndices[nLowerModules]; for (int pixelQuintupletIndex = globalThreadIdx[0]; pixelQuintupletIndex < nPixelQuintuplets; pixelQuintupletIndex += gridThreadExtent[0]) { - if (pixelQuintupletsInGPU.isDup[pixelQuintupletIndex]) + if (pixelQuintuplets.isDup()[pixelQuintupletIndex]) continue; unsigned int trackCandidateIdx = @@ -473,23 +474,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } else { alpaka::atomicAdd(acc, &cands.nTrackCandidatespT5(), 1u, alpaka::hierarchy::Threads{}); - float radius = 0.5f * (__H2F(pixelQuintupletsInGPU.pixelRadius[pixelQuintupletIndex]) + - __H2F(pixelQuintupletsInGPU.quintupletRadius[pixelQuintupletIndex])); - unsigned int pT5PixelIndex = pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex]; - addTrackCandidateToMemory( - cands, - 7 /*track candidate type pT5=7*/, - pT5PixelIndex, - pixelQuintupletsInGPU.T5Indices[pixelQuintupletIndex], - &pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex], - &pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex], - &pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex], - segmentsPixel.seedIdx()[pT5PixelIndex - pLS_offset], - __H2F(pixelQuintupletsInGPU.centerX[pixelQuintupletIndex]), - __H2F(pixelQuintupletsInGPU.centerY[pixelQuintupletIndex]), - radius, - trackCandidateIdx, - pixelQuintupletIndex); + float radius = 0.5f * (__H2F(pixelQuintuplets.pixelRadius()[pixelQuintupletIndex]) + + __H2F(pixelQuintuplets.quintupletRadius()[pixelQuintupletIndex])); + unsigned int pT5PixelIndex = pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex]; + addTrackCandidateToMemory(cands, + 7 /*track candidate type pT5=7*/, + pT5PixelIndex, + pixelQuintuplets.quintupletIndices()[pixelQuintupletIndex], + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex].data(), + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex].data(), + pixelQuintuplets.hitIndices()[pixelQuintupletIndex].data(), + segmentsPixel.seedIdx()[pT5PixelIndex - pLS_offset], + __H2F(pixelQuintuplets.centerX()[pixelQuintupletIndex]), + __H2F(pixelQuintuplets.centerY()[pixelQuintupletIndex]), + radius, + trackCandidateIdx, + pixelQuintupletIndex); } } } diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index 3c8b4cddbe4ab..b5d4ed615ca68 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -5,6 +5,7 @@ #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" #include "Segment.h" #include "MiniDoublet.h" @@ -12,195 +13,57 @@ #include "ObjectRanges.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct Triplets { - unsigned int* segmentIndices; - uint16_t* lowerModuleIndices; //3 of them - unsigned int* nTriplets; - unsigned int* totOccupancyTriplets; - unsigned int* nMemoryLocations; - uint8_t* logicalLayers; - unsigned int* hitIndices; - FPX* betaIn; - float* circleRadius; - float* circleCenterX; - float* circleCenterY; - bool* partOfPT5; - bool* partOfT5; - bool* partOfPT3; -#ifdef CUT_VALUE_DEBUG - //debug variables - float* zOut; - float* rtOut; - float* betaInCut; -#endif - template - void setData(TBuff& buf) { - segmentIndices = buf.segmentIndices_buf.data(); - lowerModuleIndices = buf.lowerModuleIndices_buf.data(); - nTriplets = buf.nTriplets_buf.data(); - totOccupancyTriplets = buf.totOccupancyTriplets_buf.data(); - nMemoryLocations = buf.nMemoryLocations_buf.data(); - logicalLayers = buf.logicalLayers_buf.data(); - hitIndices = buf.hitIndices_buf.data(); - betaIn = buf.betaIn_buf.data(); - circleRadius = buf.circleRadius_buf.data(); - circleCenterX = buf.circleCenterX_buf.data(); - circleCenterY = buf.circleCenterY_buf.data(); - partOfPT5 = buf.partOfPT5_buf.data(); - partOfT5 = buf.partOfT5_buf.data(); - partOfPT3 = buf.partOfPT3_buf.data(); -#ifdef CUT_VALUE_DEBUG - zOut = buf.zOut_buf.data(); - rtOut = buf.rtOut_buf.data(); - betaInCut = buf.betaInCut_buf.data(); -#endif - } - }; - - template - struct TripletsBuffer { - Buf segmentIndices_buf; - Buf lowerModuleIndices_buf; - Buf nTriplets_buf; - Buf totOccupancyTriplets_buf; - Buf nMemoryLocations_buf; - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf betaIn_buf; - Buf circleRadius_buf; - Buf circleCenterX_buf; - Buf circleCenterY_buf; - Buf partOfPT5_buf; - Buf partOfT5_buf; - Buf partOfPT3_buf; - -#ifdef CUT_VALUE_DEBUG - Buf zOut_buf; - Buf rtOut_buf; - Buf deltaPhiPos_buf; - Buf deltaPhi_buf; - Buf zLo_buf; - Buf zHi_buf; - Buf zLoPointed_buf; - Buf zHiPointed_buf; - Buf dPhiCut_buf; - Buf betaInCut_buf; - Buf rtLo_buf; - Buf rtHi_buf; -#endif - - Triplets data_; - - template - TripletsBuffer(unsigned int maxTriplets, unsigned int nLowerModules, TDevAcc const& devAccIn, TQueue& queue) - : segmentIndices_buf(allocBufWrapper(devAccIn, 2 * maxTriplets, queue)), - lowerModuleIndices_buf(allocBufWrapper(devAccIn, Params_T3::kLayers * maxTriplets, queue)), - nTriplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), - totOccupancyTriplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), - nMemoryLocations_buf(allocBufWrapper(devAccIn, 1, queue)), - logicalLayers_buf(allocBufWrapper(devAccIn, maxTriplets * Params_T3::kLayers, queue)), - hitIndices_buf(allocBufWrapper(devAccIn, maxTriplets * Params_T3::kHits, queue)), - betaIn_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - circleRadius_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - circleCenterX_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - circleCenterY_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - partOfPT5_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - partOfT5_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - partOfPT3_buf(allocBufWrapper(devAccIn, maxTriplets, queue)) -#ifdef CUT_VALUE_DEBUG - , - zOut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - rtOut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - deltaPhiPos_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - deltaPhi_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - zLo_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - zHi_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - zLoPointed_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - zHiPointed_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - dPhiCut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - betaInCut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - rtLo_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), - rtHi_buf(allocBufWrapper(devAccIn, maxTriplets, queue)) -#endif - { - alpaka::memset(queue, nTriplets_buf, 0u); - alpaka::memset(queue, totOccupancyTriplets_buf, 0u); - alpaka::memset(queue, partOfPT5_buf, false); - alpaka::memset(queue, partOfT5_buf, false); - alpaka::memset(queue, partOfPT3_buf, false); - } - - inline Triplets const* data() const { return &data_; } - inline void setData(TripletsBuffer& buf) { data_.setData(buf); } - }; - -#ifdef CUT_VALUE_DEBUG ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, MiniDoubletsConst mds, SegmentsConst segments, - Triplets& tripletsInGPU, + Triplets& triplets, unsigned int innerSegmentIndex, unsigned int outerSegmentIndex, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, +#ifdef CUT_VALUE_DEBUG float zOut, float rtOut, +#endif float betaIn, float betaInCut, float circleRadius, float circleCenterX, float circleCenterY, - unsigned int tripletIndex) -#else - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, - MiniDoubletsConst mds, - SegmentsConst segments, - Triplets& tripletsInGPU, - unsigned int innerSegmentIndex, - unsigned int outerSegmentIndex, - uint16_t innerInnerLowerModuleIndex, - uint16_t middleLowerModuleIndex, - uint16_t outerOuterLowerModuleIndex, - float betaIn, - float circleRadius, - float circleCenterX, - float circleCenterY, - unsigned int tripletIndex) -#endif - { - tripletsInGPU.segmentIndices[tripletIndex * 2] = innerSegmentIndex; - tripletsInGPU.segmentIndices[tripletIndex * 2 + 1] = outerSegmentIndex; - tripletsInGPU.lowerModuleIndices[tripletIndex * Params_T3::kLayers] = innerInnerLowerModuleIndex; - tripletsInGPU.lowerModuleIndices[tripletIndex * Params_T3::kLayers + 1] = middleLowerModuleIndex; - tripletsInGPU.lowerModuleIndices[tripletIndex * Params_T3::kLayers + 2] = outerOuterLowerModuleIndex; - - tripletsInGPU.betaIn[tripletIndex] = __F2H(betaIn); - tripletsInGPU.circleRadius[tripletIndex] = circleRadius; - tripletsInGPU.circleCenterX[tripletIndex] = circleCenterX; - tripletsInGPU.circleCenterY[tripletIndex] = circleCenterY; - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers] = + unsigned int tripletIndex) { + triplets.segmentIndices()[tripletIndex][0] = innerSegmentIndex; + triplets.segmentIndices()[tripletIndex][1] = outerSegmentIndex; + triplets.lowerModuleIndices()[tripletIndex][0] = innerInnerLowerModuleIndex; + triplets.lowerModuleIndices()[tripletIndex][1] = middleLowerModuleIndex; + triplets.lowerModuleIndices()[tripletIndex][2] = outerOuterLowerModuleIndex; + + triplets.betaIn()[tripletIndex] = __F2H(betaIn); + triplets.radius()[tripletIndex] = circleRadius; + triplets.centerX()[tripletIndex] = circleCenterX; + triplets.centerY()[tripletIndex] = circleCenterY; + triplets.logicalLayers()[tripletIndex][0] = modulesInGPU.layers[innerInnerLowerModuleIndex] + (modulesInGPU.subdets[innerInnerLowerModuleIndex] == 4) * 6; - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 1] = + triplets.logicalLayers()[tripletIndex][1] = modulesInGPU.layers[middleLowerModuleIndex] + (modulesInGPU.subdets[middleLowerModuleIndex] == 4) * 6; - tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 2] = + triplets.logicalLayers()[tripletIndex][2] = modulesInGPU.layers[outerOuterLowerModuleIndex] + (modulesInGPU.subdets[outerOuterLowerModuleIndex] == 4) * 6; //get the hits unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits] = mds.anchorHitIndices()[firstMDIndex]; - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 1] = mds.outerHitIndices()[firstMDIndex]; - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 2] = mds.anchorHitIndices()[secondMDIndex]; - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 3] = mds.outerHitIndices()[secondMDIndex]; - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 4] = mds.anchorHitIndices()[thirdMDIndex]; - tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 5] = mds.outerHitIndices()[thirdMDIndex]; + triplets.hitIndices()[tripletIndex][0] = mds.anchorHitIndices()[firstMDIndex]; + triplets.hitIndices()[tripletIndex][1] = mds.outerHitIndices()[firstMDIndex]; + triplets.hitIndices()[tripletIndex][2] = mds.anchorHitIndices()[secondMDIndex]; + triplets.hitIndices()[tripletIndex][3] = mds.outerHitIndices()[secondMDIndex]; + triplets.hitIndices()[tripletIndex][4] = mds.anchorHitIndices()[thirdMDIndex]; + triplets.hitIndices()[tripletIndex][5] = mds.outerHitIndices()[thirdMDIndex]; #ifdef CUT_VALUE_DEBUG - tripletsInGPU.zOut[tripletIndex] = zOut; - tripletsInGPU.rtOut[tripletIndex] = rtOut; - tripletsInGPU.betaInCut[tripletIndex] = betaInCut; + triplets.zOut()[tripletIndex] = zOut; + triplets.rtOut()[tripletIndex] = rtOut; + triplets.betaInCut()[tripletIndex] = betaInCut; #endif } @@ -798,14 +661,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return true; } - struct CreateTripletsInGPUv2 { + struct CreateTriplets { template ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, MiniDoubletsConst mds, SegmentsConst segments, SegmentsOccupancyConst segmentsOccupancy, - Triplets tripletsInGPU, + Triplets triplets, + TripletsOccupancy tripletsOccupancy, ObjectRanges rangesInGPU, uint16_t* index_gpu, uint16_t nonZeroModules) const { @@ -861,7 +725,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (success) { unsigned int totOccupancyTriplets = alpaka::atomicAdd(acc, - &tripletsInGPU.totOccupancyTriplets[innerInnerLowerModuleIndex], + &tripletsOccupancy.totOccupancyTriplets()[innerInnerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); if (static_cast(totOccupancyTriplets) >= @@ -871,43 +735,28 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { #endif } else { unsigned int tripletModuleIndex = alpaka::atomicAdd( - acc, &tripletsInGPU.nTriplets[innerInnerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + acc, &tripletsOccupancy.nTriplets()[innerInnerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); unsigned int tripletIndex = rangesInGPU.tripletModuleIndices[innerInnerLowerModuleIndex] + tripletModuleIndex; -#ifdef CUT_VALUE_DEBUG addTripletToMemory(modulesInGPU, mds, segments, - tripletsInGPU, + triplets, innerSegmentIndex, outerSegmentIndex, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, +#ifdef CUT_VALUE_DEBUG zOut, rtOut, +#endif betaIn, betaInCut, circleRadius, circleCenterX, circleCenterY, tripletIndex); -#else - addTripletToMemory(modulesInGPU, - mds, - segments, - tripletsInGPU, - innerSegmentIndex, - outerSegmentIndex, - innerInnerLowerModuleIndex, - middleLowerModuleIndex, - outerOuterLowerModuleIndex, - betaIn, - circleRadius, - circleCenterX, - circleCenterY, - tripletIndex); -#endif } } } @@ -1023,7 +872,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, - Triplets tripletsInGPU, + TripletsOccupancyConst tripletsOccupancy, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); @@ -1033,12 +882,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const gridThreadExtent = alpaka::getWorkDiv(acc); for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { - if (tripletsInGPU.nTriplets[i] == 0) { + if (tripletsOccupancy.nTriplets()[i] == 0) { rangesInGPU.tripletRanges[i * 2] = -1; rangesInGPU.tripletRanges[i * 2 + 1] = -1; } else { rangesInGPU.tripletRanges[i * 2] = rangesInGPU.tripletModuleIndices[i]; - rangesInGPU.tripletRanges[i * 2 + 1] = rangesInGPU.tripletModuleIndices[i] + tripletsInGPU.nTriplets[i] - 1; + rangesInGPU.tripletRanges[i * 2 + 1] = + rangesInGPU.tripletModuleIndices[i] + tripletsOccupancy.nTriplets()[i] - 1; } } } diff --git a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc index 4c325a7be72e4..0fd216621b2e2 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc +++ b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc @@ -117,11 +117,11 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromT3(Event* event, unsigned int T3) { - Triplets const* triplets = event->getTriplets().data(); - unsigned int LS_1 = triplets->segmentIndices[2 * T3]; - unsigned int LS_2 = triplets->segmentIndices[2 * T3 + 1]; - return {LS_1, LS_2}; +std::vector getLSsFromT3(Event* event, unsigned int t3) { + auto const triplets = event->getTriplets(); + unsigned int ls_1 = triplets.segmentIndices()[t3][0]; + unsigned int ls_2 = triplets.segmentIndices()[t3][1]; + return {ls_1, ls_2}; } //____________________________________________________________________________________________ @@ -152,11 +152,11 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getT3sFromT5(Event* event, unsigned int T5) { - Quintuplets const* quintuplets = event->getQuintuplets().data(); - unsigned int T3_1 = quintuplets->tripletIndices[2 * T5]; - unsigned int T3_2 = quintuplets->tripletIndices[2 * T5 + 1]; - return {T3_1, T3_2}; +std::vector getT3sFromT5(Event* event, unsigned int t5) { + auto const quintuplets = event->getQuintuplets(); + unsigned int t3_1 = quintuplets.tripletIndices()[t5][0]; + unsigned int t3_2 = quintuplets.tripletIndices()[t5][1]; + return {t3_1, t3_2}; } //____________________________________________________________________________________________ @@ -225,17 +225,17 @@ std::tuple, std::vector> getHitIdxsAndHi //____________________________________________________________________________________________ unsigned int getPixelLSFrompT3(Event* event, unsigned int pT3) { - PixelTriplets const* pixelTriplets = event->getPixelTriplets().data(); + auto const pixelTriplets = event->getPixelTriplets(); ObjectRanges const* rangesEvt = event->getRanges().data(); Modules const* modulesEvt = event->getModules().data(); const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; - return pixelTriplets->pixelSegmentIndices[pT3] - pLS_offset; + return pixelTriplets.pixelSegmentIndices()[pT3] - pLS_offset; } //____________________________________________________________________________________________ unsigned int getT3FrompT3(Event* event, unsigned int pT3) { - PixelTriplets const* pixelTriplets = event->getPixelTriplets().data(); - return pixelTriplets->tripletIndices[pT3]; + auto const pixelTriplets = event->getPixelTriplets(); + return pixelTriplets.tripletIndices()[pT3]; } //____________________________________________________________________________________________ @@ -314,17 +314,17 @@ std::tuple, std::vector> getHitIdxsAndHi //____________________________________________________________________________________________ unsigned int getPixelLSFrompT5(Event* event, unsigned int pT5) { - PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets().data(); + auto const pixelQuintuplets = event->getPixelQuintuplets(); ObjectRanges const* rangesEvt = event->getRanges().data(); Modules const* modulesEvt = event->getModules().data(); const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; - return pixelQuintuplets->pixelIndices[pT5] - pLS_offset; + return pixelQuintuplets.pixelSegmentIndices()[pT5] - pLS_offset; } //____________________________________________________________________________________________ unsigned int getT5FrompT5(Event* event, unsigned int pT5) { - PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets().data(); - return pixelQuintuplets->T5Indices[pT5]; + auto const pixelQuintuplets = event->getPixelQuintuplets(); + return pixelQuintuplets.quintupletIndices()[pT5]; } //____________________________________________________________________________________________ @@ -412,7 +412,7 @@ std::tuple, std::vector> getHitIdxsAndHi //____________________________________________________________________________________________ std::vector getLSsFromTC(Event* event, unsigned int iTC) { // Get the type of the track candidate - auto const& trackCandidates = event->getTrackCandidates().const_view(); + auto const& trackCandidates = event->getTrackCandidates(); short type = trackCandidates.trackCandidateType()[iTC]; unsigned int objidx = trackCandidates.directObjectIndices()[iTC]; switch (type) { @@ -435,7 +435,7 @@ std::vector getLSsFromTC(Event* event, unsigned int iTC) { std::tuple, std::vector> getHitIdxsAndHitTypesFromTC(Event* event, unsigned iTC) { // Get the type of the track candidate - auto const& trackCandidates = event->getTrackCandidates().const_view(); + auto const& trackCandidates = event->getTrackCandidates(); short type = trackCandidates.trackCandidateType()[iTC]; unsigned int objidx = trackCandidates.directObjectIndices()[iTC]; switch (type) { diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc index abcb324be0dc9..4b6ad8d838bfc 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc @@ -226,7 +226,7 @@ void setOutputBranches(Event* event) { std::vector> tc_matched_simIdx; // ============ Track candidates ============= - auto const& trackCandidates = event->getTrackCandidates().const_view(); + auto const& trackCandidates = event->getTrackCandidates(); unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { // Compute reco quantities of track candidate based on final object @@ -291,21 +291,20 @@ void setOptionalOutputBranches(Event* event) { //________________________________________________________________________________________________________________________________ void setPixelQuintupletOutputBranches(Event* event) { // ============ pT5 ============= - PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets().data(); - Quintuplets const* quintuplets = event->getQuintuplets().data(); - SegmentsPixelConst segmentsPixel = event->getSegments(); + auto const pixelQuintuplets = event->getPixelQuintuplets(); + auto const quintuplets = event->getQuintuplets(); + auto const segmentsPixel = event->getSegments(); Modules const* modules = event->getModules().data(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); - unsigned int nPixelQuintuplets = - *pixelQuintuplets->nPixelQuintuplets; // size of this nPixelTriplets array is 1 (NOTE: parallelism lost here.) + unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); std::vector sim_pT5_matched(n_accepted_simtrk); std::vector> pT5_matched_simIdx; for (unsigned int pT5 = 0; pT5 < nPixelQuintuplets; pT5++) { unsigned int T5Index = getT5FrompT5(event, pT5); unsigned int pLSIndex = getPixelLSFrompT5(event, pT5); - float pt = (__H2F(quintuplets->innerRadius[T5Index]) * k2Rinv1GeVf * 2 + segmentsPixel.ptIn()[pLSIndex]) / 2; + float pt = (__H2F(quintuplets.innerRadius()[T5Index]) * k2Rinv1GeVf * 2 + segmentsPixel.ptIn()[pLSIndex]) / 2; float eta = segmentsPixel.eta()[pLSIndex]; float phi = segmentsPixel.phi()[pLSIndex]; @@ -366,7 +365,8 @@ void setPixelQuintupletOutputBranches(Event* event) { //________________________________________________________________________________________________________________________________ void setQuintupletOutputBranches(Event* event) { - Quintuplets const* quintuplets = event->getQuintuplets().data(); + auto const quintuplets = event->getQuintuplets(); + auto const quintupletsOccupancy = event->getQuintuplets(); ObjectRanges const* ranges = event->getRanges().data(); Modules const* modules = event->getModules().data(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); @@ -375,12 +375,12 @@ void setQuintupletOutputBranches(Event* event) { std::vector> t5_matched_simIdx; for (unsigned int lowerModuleIdx = 0; lowerModuleIdx < *(modules->nLowerModules); ++lowerModuleIdx) { - int nQuintuplets = quintuplets->nQuintuplets[lowerModuleIdx]; + int nQuintuplets = quintupletsOccupancy.nQuintuplets()[lowerModuleIdx]; for (unsigned int idx = 0; idx < nQuintuplets; idx++) { unsigned int quintupletIndex = ranges->quintupletModuleIndices[lowerModuleIdx] + idx; - float pt = __H2F(quintuplets->innerRadius[quintupletIndex]) * k2Rinv1GeVf * 2; - float eta = __H2F(quintuplets->eta[quintupletIndex]); - float phi = __H2F(quintuplets->phi[quintupletIndex]); + float pt = __H2F(quintuplets.innerRadius()[quintupletIndex]) * k2Rinv1GeVf * 2; + float eta = __H2F(quintuplets.eta()[quintupletIndex]); + float phi = __H2F(quintuplets.phi()[quintupletIndex]); std::vector hit_idx = getHitIdxsFromT5(event, quintupletIndex); std::vector hit_type = getHitTypesFromT5(event, quintupletIndex); @@ -399,11 +399,11 @@ void setQuintupletOutputBranches(Event* event) { ana.tx->pushbackToBranch("t5_pt", pt); ana.tx->pushbackToBranch("t5_eta", eta); ana.tx->pushbackToBranch("t5_phi", phi); - ana.tx->pushbackToBranch("t5_innerRadius", __H2F(quintuplets->innerRadius[quintupletIndex])); - ana.tx->pushbackToBranch("t5_bridgeRadius", __H2F(quintuplets->bridgeRadius[quintupletIndex])); - ana.tx->pushbackToBranch("t5_outerRadius", __H2F(quintuplets->outerRadius[quintupletIndex])); - ana.tx->pushbackToBranch("t5_chiSquared", quintuplets->chiSquared[quintupletIndex]); - ana.tx->pushbackToBranch("t5_rzChiSquared", quintuplets->rzChiSquared[quintupletIndex]); + ana.tx->pushbackToBranch("t5_innerRadius", __H2F(quintuplets.innerRadius()[quintupletIndex])); + ana.tx->pushbackToBranch("t5_bridgeRadius", __H2F(quintuplets.bridgeRadius()[quintupletIndex])); + ana.tx->pushbackToBranch("t5_outerRadius", __H2F(quintuplets.outerRadius()[quintupletIndex])); + ana.tx->pushbackToBranch("t5_chiSquared", quintuplets.chiSquared()[quintupletIndex]); + ana.tx->pushbackToBranch("t5_rzChiSquared", quintuplets.rzChiSquared()[quintupletIndex]); ana.tx->pushbackToBranch("t5_layer_binary", layer_binary); ana.tx->pushbackToBranch("t5_moduleType_binary", moduleType_binary); @@ -437,12 +437,12 @@ void setQuintupletOutputBranches(Event* event) { //________________________________________________________________________________________________________________________________ void setPixelTripletOutputBranches(Event* event) { - PixelTriplets const* pixelTriplets = event->getPixelTriplets().data(); + auto const pixelTriplets = event->getPixelTriplets(); Modules const* modules = event->getModules().data(); SegmentsPixelConst segmentsPixel = event->getSegments(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); - unsigned int nPixelTriplets = *pixelTriplets->nPixelTriplets; + unsigned int nPixelTriplets = pixelTriplets.nPixelTriplets(); std::vector sim_pT3_matched(n_accepted_simtrk); std::vector> pT3_matched_simIdx; @@ -506,7 +506,7 @@ void setGnnNtupleBranches(Event* event) { Hits const* hitsEvt = event->getHits().data(); Modules const* modules = event->getModules().data(); ObjectRanges const* ranges = event->getRanges().data(); - auto const& trackCandidates = event->getTrackCandidates().const_view(); + auto const& trackCandidates = event->getTrackCandidates(); std::set mds_used_in_sg; std::map md_index_map; @@ -710,7 +710,7 @@ void setGnnNtupleMiniDoublet(Event* event, unsigned int MD) { //________________________________________________________________________________________________________________________________ std::tuple> parseTrackCandidate(Event* event, unsigned int idx) { // Get the type of the track candidate - auto const& trackCandidates = event->getTrackCandidates().const_view(); + auto const& trackCandidates = event->getTrackCandidates(); short type = trackCandidates.trackCandidateType()[idx]; enum { pT5 = 7, pT3 = 5, T5 = 4, pLS = 8 }; @@ -744,9 +744,9 @@ std::tuple> parseTrackCandidate( std::tuple, std::vector> parsepT5(Event* event, unsigned int idx) { // Get relevant information - auto const& trackCandidates = event->getTrackCandidates().const_view(); - Quintuplets const* quintuplets = event->getQuintuplets().data(); - SegmentsPixelConst segmentsPixel = event->getSegments(); + auto const trackCandidates = event->getTrackCandidates(); + auto const quintuplets = event->getQuintuplets(); + auto const segmentsPixel = event->getSegments(); // // pictorial representation of a pT5 @@ -842,7 +842,7 @@ std::tuple, std::vectorinnerRadius[T5Index]) * 2 * k2Rinv1GeVf; + float pt_T5 = __H2F(quintuplets.innerRadius()[T5Index]) * 2 * k2Rinv1GeVf; const float pt = (pt_T5 + pt_pLS) / 2; // Form the hit idx/type std::vector @@ -856,9 +856,9 @@ std::tuple, std::vector, std::vector> parsepT3(Event* event, unsigned int idx) { // Get relevant information - auto const& trackCandidates = event->getTrackCandidates().const_view(); - Triplets const* triplets = event->getTriplets().data(); - SegmentsPixelConst segmentsPixel = event->getSegments(); + auto const trackCandidates = event->getTrackCandidates(); + auto const triplets = event->getTriplets(); + auto const segmentsPixel = event->getSegments(); // // pictorial representation of a pT3 @@ -875,7 +875,7 @@ std::tuple, std::vectorcircleRadius[T3] * 2 * k2Rinv1GeVf; + float pt_T3 = triplets.radius()[T3] * 2 * k2Rinv1GeVf; // average pt const float pt = (pt_pLS + pt_T3) / 2; @@ -890,8 +890,8 @@ std::tuple, std::vector, std::vector> parseT5(Event* event, unsigned int idx) { - auto const& trackCandidates = event->getTrackCandidates().const_view(); - Quintuplets const* quintuplets = event->getQuintuplets().data(); + auto const trackCandidates = event->getTrackCandidates(); + auto const quintuplets = event->getQuintuplets(); unsigned int T5 = trackCandidates.directObjectIndices()[idx]; std::vector hits = getHitsFromT5(event, T5); @@ -907,7 +907,7 @@ std::tuple, std::vectorinnerRadius[T5] * k2Rinv1GeVf * 2; + const float pt = quintuplets.innerRadius()[T5] * k2Rinv1GeVf * 2; // T5 eta and phi are computed using outer and innermost hits lst_math::Hit hitA(trk.ph2_x()[Hit_0], trk.ph2_y()[Hit_0], trk.ph2_z()[Hit_0]); @@ -924,7 +924,7 @@ std::tuple, std::vector, std::vector> parsepLS(Event* event, unsigned int idx) { - auto const& trackCandidates = event->getTrackCandidates().const_view(); + auto const& trackCandidates = event->getTrackCandidates(); SegmentsPixelConst segmentsPixel = event->getSegments(); // Getting pLS index @@ -1073,7 +1073,8 @@ void printpLSs(Event* event) { //________________________________________________________________________________________________________________________________ void printT3s(Event* event) { - Triplets const* triplets = event->getTriplets().data(); + auto const triplets = event->getTriplets(); + auto const tripletsOccupancy = event->getTriplets(); SegmentsConst segments = event->getSegments(); MiniDoubletsConst miniDoublets = event->getMiniDoublets(); Hits const* hitsEvt = event->getHits().data(); @@ -1081,12 +1082,12 @@ void printT3s(Event* event) { int nTriplets = 0; for (unsigned int i = 0; i < *(modules->nLowerModules); ++i) { // unsigned int idx = modules->lowerModuleIndices[i]; - nTriplets += triplets->nTriplets[i]; + nTriplets += tripletsOccupancy.nTriplets()[i]; unsigned int idx = i; - for (unsigned int jdx = 0; jdx < triplets->nTriplets[idx]; jdx++) { + for (unsigned int jdx = 0; jdx < tripletsOccupancy.nTriplets()[idx]; jdx++) { unsigned int tpIdx = idx * 5000 + jdx; - unsigned int InnerSegmentIndex = triplets->segmentIndices[2 * tpIdx]; - unsigned int OuterSegmentIndex = triplets->segmentIndices[2 * tpIdx + 1]; + unsigned int InnerSegmentIndex = triplets.segmentIndices()[tpIdx][0]; + unsigned int OuterSegmentIndex = triplets.segmentIndices()[tpIdx][1]; unsigned int InnerSegmentInnerMiniDoubletIndex = segments.mdIndices()[InnerSegmentIndex][0]; unsigned int InnerSegmentOuterMiniDoubletIndex = segments.mdIndices()[InnerSegmentIndex][1]; unsigned int OuterSegmentOuterMiniDoubletIndex = segments.mdIndices()[OuterSegmentIndex][1];