diff --git a/RecoTracker/LSTCore/interface/Constants.h b/RecoTracker/LSTCore/interface/Constants.h index 00a2c83a1ce29..8fe8d99aa1b29 100644 --- a/RecoTracker/LSTCore/interface/Constants.h +++ b/RecoTracker/LSTCore/interface/Constants.h @@ -75,6 +75,17 @@ namespace lst { typedef float FPX; #endif +// Needed for files that are compiled by g++ to not throw an error. +// uint4 is defined only for CUDA, so we will have to revisit this soon when running on other backends. +#if !defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_HIP_ENABLED) + struct uint4 { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; +#endif + // Defining the constant host device variables right up here // Currently pixel tracks treated as LSs with 2 double layers (IT layers 1+2 and 3+4) and 4 hits. To be potentially handled better in the future. struct Params_pLS { @@ -82,6 +93,7 @@ namespace lst { }; struct Params_LS { static constexpr int kLayers = 2, kHits = 4; + using ArrayUxLayers = edm::StdArray; }; struct Params_T3 { static constexpr int kLayers = 3, kHits = 6; diff --git a/RecoTracker/LSTCore/interface/SegmentsSoA.h b/RecoTracker/LSTCore/interface/SegmentsSoA.h new file mode 100644 index 0000000000000..40a75bbbee3ea --- /dev/null +++ b/RecoTracker/LSTCore/interface/SegmentsSoA.h @@ -0,0 +1,67 @@ +#ifndef RecoTracker_LSTCore_interface_SegmentsSoA_h +#define RecoTracker_LSTCore_interface_SegmentsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(SegmentsSoALayout, + SOA_COLUMN(FPX, dPhis), + SOA_COLUMN(FPX, dPhiMins), + SOA_COLUMN(FPX, dPhiMaxs), + SOA_COLUMN(FPX, dPhiChanges), + SOA_COLUMN(FPX, dPhiChangeMins), + SOA_COLUMN(FPX, dPhiChangeMaxs), + SOA_COLUMN(uint16_t, innerLowerModuleIndices), + SOA_COLUMN(uint16_t, outerLowerModuleIndices), + SOA_COLUMN(Params_LS::ArrayUxLayers, mdIndices), + SOA_COLUMN(unsigned int, innerMiniDoubletAnchorHitIndices), + SOA_COLUMN(unsigned int, outerMiniDoubletAnchorHitIndices) + //SOA_SCALAR(unsigned int, nMemoryLocations) + ) + + GENERATE_SOA_LAYOUT(SegmentsOccupancySoALayout, + SOA_COLUMN(unsigned int, nSegments), //number of segments per inner lower module + SOA_COLUMN(unsigned int, totOccupancySegments)) + + GENERATE_SOA_LAYOUT(SegmentsPixelSoALayout, + SOA_COLUMN(unsigned int, seedIdx), + SOA_COLUMN(int, charge), + SOA_COLUMN(int, superbin), + SOA_COLUMN(uint4, pLSHitsIdxs), + SOA_COLUMN(PixelType, pixelType), + SOA_COLUMN(char, isQuad), + SOA_COLUMN(char, isDup), + SOA_COLUMN(bool, partOfPT5), + SOA_COLUMN(float, ptIn), + SOA_COLUMN(float, ptErr), + SOA_COLUMN(float, px), + SOA_COLUMN(float, py), + SOA_COLUMN(float, pz), + SOA_COLUMN(float, etaErr), + SOA_COLUMN(float, eta), + SOA_COLUMN(float, phi), + SOA_COLUMN(float, score), + SOA_COLUMN(float, circleCenterX), + SOA_COLUMN(float, circleCenterY), + SOA_COLUMN(float, circleRadius)) + + using SegmentsSoA = SegmentsSoALayout<>; + using SegmentsOccupancySoA = SegmentsOccupancySoALayout<>; + using SegmentsPixelSoA = SegmentsPixelSoALayout<>; + + using Segments = SegmentsSoA::View; + using SegmentsConst = SegmentsSoA::ConstView; + using SegmentsOccupancy = SegmentsOccupancySoA::View; + using SegmentsOccupancyConst = SegmentsOccupancySoA::ConstView; + using SegmentsPixel = SegmentsPixelSoA::View; + using SegmentsPixelConst = SegmentsPixelSoA::ConstView; + + using SegmentsHostCollection = PortableHostMultiCollection; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h index 4477c5232608b..208f49cc52538 100644 --- a/RecoTracker/LSTCore/interface/alpaka/Constants.h +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -9,17 +9,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Vec3D constexpr elementsPerThread(Vec3D::all(static_cast(1))); -// Needed for files that are compiled by g++ to not throw an error. -// uint4 is defined only for CUDA, so we will have to revisit this soon when running on other backends. -#if !defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_HIP_ENABLED) - struct uint4 { - unsigned int x; - unsigned int y; - unsigned int z; - unsigned int w; - }; -#endif - // Adjust grid and block sizes based on backend configuration template > ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, diff --git a/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h index 39d3a7bc2f6f8..3011e1d2f87b7 100644 --- a/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h +++ b/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h @@ -1,5 +1,5 @@ -#ifndef RecoTracker_LSTCore_interface_alpaka_MiniDoubletsSoA_h -#define RecoTracker_LSTCore_interface_alpaka_MiniDoubletsSoA_h +#ifndef RecoTracker_LSTCore_interface_alpaka_MiniDoubletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_MiniDoubletsDeviceCollection_h #include "DataFormats/Portable/interface/alpaka/PortableCollection.h" diff --git a/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h new file mode 100644 index 0000000000000..ac634aa51bade --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_SegmentsDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_SegmentsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using SegmentsDeviceCollection = PortableCollection3; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 3ec637da03420..986271225b4a5 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -56,8 +56,7 @@ void Event::resetEventSync() { miniDoubletsDC_.reset(); rangesInGPU_.reset(); rangesBuffers_.reset(); - segmentsInGPU_.reset(); - segmentsBuffers_.reset(); + segmentsDC_.reset(); tripletsInGPU_.reset(); tripletsBuffers_.reset(); quintupletsInGPU_.reset(); @@ -71,7 +70,7 @@ void Event::resetEventSync() { hitsInCPU_.reset(); rangesInCPU_.reset(); miniDoubletsHC_.reset(); - segmentsInCPU_.reset(); + segmentsHC_.reset(); tripletsInCPU_.reset(); quintupletsInCPU_.reset(); pixelTripletsInCPU_.reset(); @@ -203,7 +202,7 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, alpaka::memset(queue_, nMDs_view, 0u); alpaka::memset(queue_, totOccupancyMDs_view, 0u); } - if (!segmentsInGPU_) { + if (!segmentsDC_) { // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them @@ -223,11 +222,17 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, nTotalSegments_ += n_max_pixel_segments_per_module; - segmentsInGPU_.emplace(); - segmentsBuffers_.emplace(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc_, queue_); - segmentsInGPU_->setData(*segmentsBuffers_); - - alpaka::memcpy(queue_, segmentsBuffers_->nMemoryLocations_buf, nTotalSegments_view); + std::array const segments_sizes{{static_cast(nTotalSegments_), + static_cast(nLowerModules_ + 1), + static_cast(n_max_pixel_segments_per_module)}}; + segmentsDC_.emplace(segments_sizes, queue_); + + auto nSegments_view = + alpaka::createView(devAcc_, segmentsDC_->view().nSegments(), nLowerModules_ + 1); + auto totOccupancySegments_view = alpaka::createView( + devAcc_, segmentsDC_->view().totOccupancySegments(), nLowerModules_ + 1); + alpaka::memset(queue_, nSegments_view, 0u); + alpaka::memset(queue_, totOccupancySegments_view, 0u); } auto hitIndices0_dev = allocBufWrapper(devAcc_, size, queue_); @@ -242,29 +247,33 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, alpaka::memcpy(queue_, hitIndices3_dev, hitIndices3, size); alpaka::memcpy(queue_, dPhiChange_dev, dPhiChange, size); - alpaka::memcpy(queue_, segmentsBuffers_->ptIn_buf, ptIn, size); - alpaka::memcpy(queue_, segmentsBuffers_->ptErr_buf, ptErr, size); - alpaka::memcpy(queue_, segmentsBuffers_->px_buf, px, size); - alpaka::memcpy(queue_, segmentsBuffers_->py_buf, py, size); - alpaka::memcpy(queue_, segmentsBuffers_->pz_buf, pz, size); - alpaka::memcpy(queue_, segmentsBuffers_->etaErr_buf, etaErr, size); - alpaka::memcpy(queue_, segmentsBuffers_->isQuad_buf, isQuad, size); - alpaka::memcpy(queue_, segmentsBuffers_->eta_buf, eta, size); - alpaka::memcpy(queue_, segmentsBuffers_->phi_buf, phi, size); - alpaka::memcpy(queue_, segmentsBuffers_->charge_buf, charge, size); - alpaka::memcpy(queue_, segmentsBuffers_->seedIdx_buf, seedIdx, size); - alpaka::memcpy(queue_, segmentsBuffers_->superbin_buf, superbin, size); - alpaka::memcpy(queue_, segmentsBuffers_->pixelType_buf, pixelType, size); + SegmentsPixel segmentsPixel = segmentsDC_->view(); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.ptIn(), size), ptIn, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.ptErr(), size), ptErr, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.px(), size), px, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.py(), size), py, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.pz(), size), pz, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.etaErr(), size), etaErr, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.isQuad(), size), isQuad, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.eta(), size), eta, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.phi(), size), phi, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.charge(), size), charge, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.seedIdx(), size), seedIdx, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.superbin(), size), superbin, size); + alpaka::memcpy(queue_, alpaka::createView(devAcc_, segmentsPixel.pixelType(), size), pixelType, size); // Create source views for size and mdSize auto src_view_size = alpaka::createView(cms::alpakatools::host(), &size, (Idx)1u); auto src_view_mdSize = alpaka::createView(cms::alpakatools::host(), &mdSize, (Idx)1u); - auto dst_view_segments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)pixelModuleIndex); + SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); + auto nSegments_view = alpaka::createView(devAcc_, segmentsOccupancy.nSegments(), (Idx)nLowerModules_ + 1); + auto dst_view_segments = alpaka::createSubView(nSegments_view, (Idx)1u, (Idx)pixelModuleIndex); alpaka::memcpy(queue_, dst_view_segments, src_view_size); - auto dst_view_totOccupancySegments = - alpaka::createSubView(segmentsBuffers_->totOccupancySegments_buf, (Idx)1u, (Idx)pixelModuleIndex); + auto totOccupancySegments_view = + alpaka::createView(devAcc_, segmentsOccupancy.totOccupancySegments(), (Idx)nLowerModules_ + 1); + auto dst_view_totOccupancySegments = alpaka::createSubView(totOccupancySegments_view, (Idx)1u, (Idx)pixelModuleIndex); alpaka::memcpy(queue_, dst_view_totOccupancySegments, src_view_size); auto mdsOccupancy = miniDoubletsDC_->view(); @@ -290,7 +299,8 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, *rangesInGPU_, *hitsInGPU_, miniDoubletsDC_->view(), - *segmentsInGPU_, + segmentsDC_->view(), + segmentsDC_->view(), hitIndices0_dev.data(), hitIndices1_dev.data(), hitIndices2_dev.data(), @@ -365,10 +375,18 @@ void Event::createMiniDoublets() { } void Event::createSegmentsWithModuleMap() { - if (!segmentsInGPU_) { - segmentsInGPU_.emplace(); - segmentsBuffers_.emplace(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc_, queue_); - segmentsInGPU_->setData(*segmentsBuffers_); + if (!segmentsDC_) { + std::array const segments_sizes{{static_cast(nTotalSegments_), + static_cast(nLowerModules_ + 1), + static_cast(n_max_pixel_segments_per_module)}}; + segmentsDC_.emplace(segments_sizes, queue_); + + auto nSegments_view = + alpaka::createView(devAcc_, segmentsDC_->view().nSegments(), nLowerModules_ + 1); + auto totOccupancySegments_view = alpaka::createView( + devAcc_, segmentsDC_->view().totOccupancySegments(), nLowerModules_ + 1); + alpaka::memset(queue_, nSegments_view, 0u); + alpaka::memset(queue_, totOccupancySegments_view, 0u); } Vec3D const threadsPerBlockCreateSeg{1, 1, 64}; @@ -382,7 +400,8 @@ void Event::createSegmentsWithModuleMap() { *modulesBuffers_.data(), miniDoubletsDC_->const_view(), miniDoubletsDC_->const_view(), - *segmentsInGPU_, + segmentsDC_->view(), + segmentsDC_->view(), *rangesInGPU_); WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); @@ -391,7 +410,7 @@ void Event::createSegmentsWithModuleMap() { addSegmentRangesToEventExplicit_workDiv, AddSegmentRangesToEventExplicit{}, *modulesBuffers_.data(), - *segmentsInGPU_, + segmentsDC_->view(), *rangesInGPU_); if (addObjects_) { @@ -408,7 +427,7 @@ void Event::createTriplets() { CreateTripletArrayRanges{}, *modulesBuffers_.data(), *rangesInGPU_, - *segmentsInGPU_); + segmentsDC_->const_view()); // TODO: Why are we pulling this back down only to put it back on the device in a new struct? auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); @@ -428,7 +447,9 @@ void Event::createTriplets() { // Allocate and copy nSegments from device to host (only nLowerModules in OT, not the +1 with pLSs) auto nSegments_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); - alpaka::memcpy(queue_, nSegments_buf_h, segmentsBuffers_->nSegments_buf, nLowerModules_); + auto nSegments_buf_d = + alpaka::createView(devAcc_, segmentsDC_->const_view().nSegments(), nLowerModules_); + alpaka::memcpy(queue_, nSegments_buf_h, nSegments_buf_d, nLowerModules_); // ... same for module_nConnectedModules // FIXME: replace by ES host data @@ -468,7 +489,8 @@ void Event::createTriplets() { CreateTripletsInGPUv2{}, *modulesBuffers_.data(), miniDoubletsDC_->const_view(), - *segmentsInGPU_, + segmentsDC_->const_view(), + segmentsDC_->const_view(), *tripletsInGPU_, *rangesInGPU_, index_gpu_buf.data(), @@ -506,7 +528,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { *modulesBuffers_.data(), *rangesInGPU_, *pixelTripletsInGPU_, - *segmentsInGPU_, + segmentsDC_->const_view(), *pixelQuintupletsInGPU_); WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); @@ -517,7 +539,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { nLowerModules_, *pixelTripletsInGPU_, trackCandidatesDC_->view(), - *segmentsInGPU_, + segmentsDC_->const_view(), *rangesInGPU_); // Pull nEligibleT5Modules from the device. @@ -570,7 +592,13 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { WorkDiv3D const checkHitspLS_workDiv = createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU_, true); + alpaka::exec(queue_, + checkHitspLS_workDiv, + CheckHitspLS{}, + *modulesBuffers_.data(), + segmentsDC_->const_view(), + segmentsDC_->view(), + true); } Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; @@ -585,7 +613,9 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { *rangesInGPU_, *pixelTripletsInGPU_, trackCandidatesDC_->view(), - *segmentsInGPU_, + segmentsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), miniDoubletsDC_->const_view(), *hitsInGPU_, *quintupletsInGPU_); @@ -600,7 +630,8 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { AddpLSasTrackCandidateInGPU{}, nLowerModules_, trackCandidatesDC_->view(), - *segmentsInGPU_, + segmentsDC_->const_view(), + segmentsDC_->const_view(), tc_pls_triplets); // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached @@ -639,12 +670,16 @@ void Event::createPixelTriplets() { pixelTripletsBuffers_.emplace(n_max_pixel_triplets, devAcc_, queue_); pixelTripletsInGPU_->setData(*pixelTripletsBuffers_); } + SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); + SegmentsPixelConst segmentsPixel = segmentsDC_->view(); auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); auto pixelTypes_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); - alpaka::memcpy(queue_, superbins_buf, segmentsBuffers_->superbin_buf); - alpaka::memcpy(queue_, pixelTypes_buf, segmentsBuffers_->pixelType_buf); + alpaka::memcpy( + queue_, superbins_buf, alpaka::createView(devAcc_, segmentsPixel.superbin(), n_max_pixel_segments_per_module)); + alpaka::memcpy( + queue_, pixelTypes_buf, alpaka::createView(devAcc_, segmentsPixel.pixelType(), n_max_pixel_segments_per_module)); auto const* superbins = superbins_buf.data(); auto const* pixelTypes = pixelTypes_buf.data(); @@ -652,7 +687,9 @@ void Event::createPixelTriplets() { auto nInnerSegments_src_view = alpaka::createView(cms::alpakatools::host(), &nInnerSegments, (size_t)1u); // Create a sub-view for the device buffer - auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); + unsigned int totalModules = nLowerModules_ + 1; + auto dev_view_nSegments_buf = alpaka::createView(devAcc_, segmentsOccupancy.nSegments(), totalModules); + auto dev_view_nSegments = alpaka::createSubView(dev_view_nSegments_buf, (Idx)1u, (Idx)nLowerModules_); alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using @@ -722,7 +759,8 @@ void Event::createPixelTriplets() { *modulesBuffers_.data(), *rangesInGPU_, miniDoubletsDC_->const_view(), - *segmentsInGPU_, + segmentsDC_->const_view(), + segmentsDC_->const_view(), *tripletsInGPU_, *pixelTripletsInGPU_, connectedPixelSize_dev_buf.data(), @@ -787,7 +825,7 @@ void Event::createQuintuplets() { CreateQuintupletsInGPUv2{}, *modulesBuffers_.data(), miniDoubletsDC_->const_view(), - *segmentsInGPU_, + segmentsDC_->const_view(), *tripletsInGPU_, *quintupletsInGPU_, *rangesInGPU_, @@ -826,7 +864,13 @@ void Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { WorkDiv3D const checkHitspLS_workDiv = createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU_, false); + alpaka::exec(queue_, + checkHitspLS_workDiv, + CheckHitspLS{}, + *modulesBuffers_.data(), + segmentsDC_->const_view(), + segmentsDC_->view(), + false); } } @@ -841,12 +885,16 @@ void Event::createPixelQuintuplets() { auto buf = trackCandidatesDC_->buffer(); alpaka::memset(queue_, buf, 0u); } + SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); + SegmentsPixelConst segmentsPixel = segmentsDC_->view(); auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); auto pixelTypes_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); - alpaka::memcpy(queue_, superbins_buf, segmentsBuffers_->superbin_buf); - alpaka::memcpy(queue_, pixelTypes_buf, segmentsBuffers_->pixelType_buf); + alpaka::memcpy( + queue_, superbins_buf, alpaka::createView(devAcc_, segmentsPixel.superbin(), n_max_pixel_segments_per_module)); + alpaka::memcpy( + queue_, pixelTypes_buf, alpaka::createView(devAcc_, segmentsPixel.pixelType(), n_max_pixel_segments_per_module)); auto const* superbins = superbins_buf.data(); auto const* pixelTypes = pixelTypes_buf.data(); @@ -854,7 +902,9 @@ void Event::createPixelQuintuplets() { auto nInnerSegments_src_view = alpaka::createView(cms::alpakatools::host(), &nInnerSegments, (size_t)1u); // Create a sub-view for the device buffer - auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); + unsigned int totalModules = nLowerModules_ + 1; + auto dev_view_nSegments_buf = alpaka::createView(devAcc_, segmentsOccupancy.nSegments(), totalModules); + auto dev_view_nSegments = alpaka::createSubView(dev_view_nSegments_buf, (Idx)1u, (Idx)nLowerModules_); alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using @@ -922,7 +972,8 @@ void Event::createPixelQuintuplets() { CreatePixelQuintupletsInGPUFromMapv2{}, *modulesBuffers_.data(), miniDoubletsDC_->const_view(), - *segmentsInGPU_, + segmentsDC_->const_view(), + segmentsDC_->view(), *tripletsInGPU_, *quintupletsInGPU_, *pixelQuintupletsInGPU_, @@ -949,7 +1000,7 @@ void Event::createPixelQuintuplets() { nLowerModules_, *pixelQuintupletsInGPU_, trackCandidatesDC_->view(), - *segmentsInGPU_, + segmentsDC_->const_view(), *rangesInGPU_); #ifdef WARNINGS @@ -998,7 +1049,9 @@ void Event::addMiniDoubletsToEventExplicit() { void Event::addSegmentsToEventExplicit() { auto nSegmentsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); - alpaka::memcpy(queue_, nSegmentsCPU_buf, segmentsBuffers_->nSegments_buf, nLowerModules_); + auto nSegments_buf = + alpaka::createView(devAcc_, segmentsDC_->const_view().nSegments(), nLowerModules_); + alpaka::memcpy(queue_, nSegmentsCPU_buf, nSegments_buf, nLowerModules_); // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); @@ -1368,41 +1421,25 @@ typename TSoA::ConstView Event::getMiniDoublets(bool sync) { template MiniDoubletsConst Event::getMiniDoublets(bool); template MiniDoubletsOccupancyConst Event::getMiniDoublets(bool); -SegmentsBuffer& Event::getSegments(bool sync) { - if (!segmentsInCPU_) { - // Get nMemoryLocations parameter to initialize host based segmentsInCPU_ - auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue_, nMemHost_buf_h, segmentsBuffers_->nMemoryLocations_buf); - alpaka::wait(queue_); // wait for the value before using - - auto const nMemHost = *nMemHost_buf_h.data(); - segmentsInCPU_.emplace(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, cms::alpakatools::host(), queue_); - segmentsInCPU_->setData(*segmentsInCPU_); - - alpaka::memcpy(queue_, segmentsInCPU_->nMemoryLocations_buf, segmentsBuffers_->nMemoryLocations_buf); - alpaka::memcpy(queue_, segmentsInCPU_->nSegments_buf, segmentsBuffers_->nSegments_buf); - alpaka::memcpy(queue_, segmentsInCPU_->mdIndices_buf, segmentsBuffers_->mdIndices_buf, 2u * nMemHost); - alpaka::memcpy(queue_, - segmentsInCPU_->innerMiniDoubletAnchorHitIndices_buf, - segmentsBuffers_->innerMiniDoubletAnchorHitIndices_buf, - nMemHost); - alpaka::memcpy(queue_, - segmentsInCPU_->outerMiniDoubletAnchorHitIndices_buf, - segmentsBuffers_->outerMiniDoubletAnchorHitIndices_buf, - nMemHost); - alpaka::memcpy(queue_, segmentsInCPU_->totOccupancySegments_buf, segmentsBuffers_->totOccupancySegments_buf); - alpaka::memcpy(queue_, segmentsInCPU_->ptIn_buf, segmentsBuffers_->ptIn_buf); - alpaka::memcpy(queue_, segmentsInCPU_->eta_buf, segmentsBuffers_->eta_buf); - alpaka::memcpy(queue_, segmentsInCPU_->phi_buf, segmentsBuffers_->phi_buf); - alpaka::memcpy(queue_, segmentsInCPU_->seedIdx_buf, segmentsBuffers_->seedIdx_buf); - alpaka::memcpy(queue_, segmentsInCPU_->isDup_buf, segmentsBuffers_->isDup_buf); - alpaka::memcpy(queue_, segmentsInCPU_->isQuad_buf, segmentsBuffers_->isQuad_buf); - alpaka::memcpy(queue_, segmentsInCPU_->score_buf, segmentsBuffers_->score_buf); - if (sync) - alpaka::wait(queue_); // host consumers expect filled data +template +typename TSoA::ConstView Event::getSegments(bool sync) { + if constexpr (std::is_same_v) { + return segmentsDC_->const_view(); + } else { + if (!segmentsHC_) { + segmentsHC_.emplace( + cms::alpakatools:: + CopyToHost>::copyAsync( + queue_, *segmentsDC_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return segmentsHC_->const_view(); } - return segmentsInCPU_.value(); } +template SegmentsConst Event::getSegments(bool); +template SegmentsOccupancyConst Event::getSegments(bool); +template SegmentsPixelConst Event::getSegments(bool); TripletsBuffer& Event::getTriplets(bool sync) { if (!tripletsInCPU_) { diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 4e8746f52c876..a3c3a21f09e2c 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -48,8 +48,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::optional hitsInGPU_; std::optional> hitsBuffers_; std::optional miniDoubletsDC_; - std::optional segmentsInGPU_; - std::optional> segmentsBuffers_; + std::optional segmentsDC_; std::optional tripletsInGPU_; std::optional> tripletsBuffers_; std::optional quintupletsInGPU_; @@ -64,7 +63,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { std::optional> rangesInCPU_; std::optional> hitsInCPU_; std::optional miniDoubletsHC_; - std::optional> segmentsInCPU_; + std::optional segmentsHC_; std::optional> tripletsInCPU_; std::optional trackCandidatesHC_; std::optional> modulesInCPU_; @@ -184,7 +183,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ObjectRangesBuffer& getRanges(bool sync = true); template typename TSoA::ConstView getMiniDoublets(bool sync = true); - SegmentsBuffer& getSegments(bool sync = true); + template + typename TSoA::ConstView getSegments(bool sync = true); TripletsBuffer& getTriplets(bool sync = true); QuintupletsBuffer& getQuintuplets(bool sync = true); PixelTripletsBuffer& getPixelTriplets(bool sync = true); diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h index bc284d052cc05..b4fecca8f90cf 100644 --- a/RecoTracker/LSTCore/src/alpaka/Kernels.h +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -30,10 +30,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = true; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(Segments& segmentsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(SegmentsPixel segmentsPixel, unsigned int pixelSegmentArrayIndex, bool secondpass = false) { - segmentsInGPU.isDup[pixelSegmentArrayIndex] |= 1 + secondpass; + segmentsPixel.isDup()[pixelSegmentArrayIndex] |= 1 + secondpass; } ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, @@ -331,40 +331,44 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct CheckHitspLS { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, Segments segmentsInGPU, bool secondpass) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixel segmentsPixel, + bool secondpass) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); int pixelModuleIndex = *modulesInGPU.nLowerModules; - unsigned int nPixelSegments = segmentsInGPU.nSegments[pixelModuleIndex]; + unsigned int nPixelSegments = segmentsOccupancy.nSegments()[pixelModuleIndex]; if (nPixelSegments > n_max_pixel_segments_per_module) nPixelSegments = n_max_pixel_segments_per_module; for (unsigned int ix = globalThreadIdx[1]; ix < nPixelSegments; ix += gridThreadExtent[1]) { - if (secondpass && (!segmentsInGPU.isQuad[ix] || (segmentsInGPU.isDup[ix] & 1))) + if (secondpass && (!segmentsPixel.isQuad()[ix] || (segmentsPixel.isDup()[ix] & 1))) continue; unsigned int phits1[Params_pLS::kHits]; - phits1[0] = segmentsInGPU.pLSHitsIdxs[ix].x; - phits1[1] = segmentsInGPU.pLSHitsIdxs[ix].y; - phits1[2] = segmentsInGPU.pLSHitsIdxs[ix].z; - phits1[3] = segmentsInGPU.pLSHitsIdxs[ix].w; - float eta_pix1 = segmentsInGPU.eta[ix]; - float phi_pix1 = segmentsInGPU.phi[ix]; + phits1[0] = segmentsPixel.pLSHitsIdxs()[ix].x; + phits1[1] = segmentsPixel.pLSHitsIdxs()[ix].y; + phits1[2] = segmentsPixel.pLSHitsIdxs()[ix].z; + phits1[3] = segmentsPixel.pLSHitsIdxs()[ix].w; + float eta_pix1 = segmentsPixel.eta()[ix]; + float phi_pix1 = segmentsPixel.phi()[ix]; for (unsigned int jx = ix + 1 + globalThreadIdx[2]; jx < nPixelSegments; jx += gridThreadExtent[2]) { - float eta_pix2 = segmentsInGPU.eta[jx]; - float phi_pix2 = segmentsInGPU.phi[jx]; + float eta_pix2 = segmentsPixel.eta()[jx]; + float phi_pix2 = segmentsPixel.phi()[jx]; if (alpaka::math::abs(acc, eta_pix2 - eta_pix1) > 0.1f) continue; - if (secondpass && (!segmentsInGPU.isQuad[jx] || (segmentsInGPU.isDup[jx] & 1))) + if (secondpass && (!segmentsPixel.isQuad()[jx] || (segmentsPixel.isDup()[jx] & 1))) continue; - int8_t quad_diff = segmentsInGPU.isQuad[ix] - segmentsInGPU.isQuad[jx]; - float score_diff = segmentsInGPU.score[ix] - segmentsInGPU.score[jx]; + int8_t quad_diff = segmentsPixel.isQuad()[ix] - segmentsPixel.isQuad()[jx]; + float score_diff = segmentsPixel.score()[ix] - segmentsPixel.score()[jx]; // Always keep quads over trips. If they are the same, we want the object with better score int idxToRemove; if (quad_diff > 0) @@ -379,10 +383,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { idxToRemove = ix; unsigned int phits2[Params_pLS::kHits]; - phits2[0] = segmentsInGPU.pLSHitsIdxs[jx].x; - phits2[1] = segmentsInGPU.pLSHitsIdxs[jx].y; - phits2[2] = segmentsInGPU.pLSHitsIdxs[jx].z; - phits2[3] = segmentsInGPU.pLSHitsIdxs[jx].w; + phits2[0] = segmentsPixel.pLSHitsIdxs()[jx].x; + phits2[1] = segmentsPixel.pLSHitsIdxs()[jx].y; + phits2[2] = segmentsPixel.pLSHitsIdxs()[jx].z; + phits2[3] = segmentsPixel.pLSHitsIdxs()[jx].w; int npMatched = 0; for (int i = 0; i < Params_pLS::kHits; i++) { @@ -402,7 +406,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } const int minNHitsForDup_pLS = 3; if (npMatched >= minNHitsForDup_pLS) { - rmPixelSegmentFromMemory(segmentsInGPU, idxToRemove, secondpass); + rmPixelSegmentFromMemory(segmentsPixel, idxToRemove, secondpass); } if (secondpass) { float dEta = alpaka::math::abs(acc, eta_pix1 - eta_pix2); @@ -410,7 +414,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dR2 = dEta * dEta + dPhi * dPhi; if ((npMatched >= 1) || (dR2 < 1e-5f)) { - rmPixelSegmentFromMemory(segmentsInGPU, idxToRemove, secondpass); + rmPixelSegmentFromMemory(segmentsPixel, idxToRemove, secondpass); } } } diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h index 72e273c970331..6a125b96070cb 100644 --- a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h @@ -18,7 +18,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE float runInference(TAcc const& acc, Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, Triplets const& tripletsInGPU, const float* xVec, const float* yVec, @@ -59,7 +59,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { layer2_adjustment = 1; // get upper segment to be in second layer } unsigned int md_idx_for_t5_eta_phi = - segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + layer2_adjustment]]; + segments.mdIndices()[tripletsInGPU.segmentIndices[2 * innerTripletIndex]][layer2_adjustment]; bool is_endcap1 = (modulesInGPU.subdets[lowerModuleIndex1] == 4); // true if anchor hit 1 is in the endcap bool is_endcap2 = (modulesInGPU.subdets[lowerModuleIndex2] == 4); // true if anchor hit 2 is in the endcap bool is_endcap3 = (modulesInGPU.subdets[lowerModuleIndex3] == 4); // true if anchor hit 3 is in the endcap diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index ebeb301cb230e..d33022cd112b1 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -108,7 +108,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelQuintupletToMemory(Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, Quintuplets const& quintupletsInGPU, PixelQuintuplets& pixelQuintupletsInGPU, unsigned int pixelIndex, @@ -150,9 +150,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 4]; pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex] = - segmentsInGPU.innerLowerModuleIndices[pixelIndex]; + segments.innerLowerModuleIndices()[pixelIndex]; pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 1] = - segmentsInGPU.outerLowerModuleIndices[pixelIndex]; + segments.outerLowerModuleIndices()[pixelIndex]; pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 2] = quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers]; pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 3] = @@ -164,8 +164,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 6] = quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 4]; - unsigned int pixelInnerMD = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelIndex]; - unsigned int pixelOuterMD = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelIndex + 1]; + unsigned int pixelInnerMD = segments.mdIndices()[pixelIndex][0]; + unsigned int pixelOuterMD = segments.mdIndices()[pixelIndex][1]; pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex] = mds.anchorHitIndices()[pixelInnerMD]; pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 1] = @@ -675,7 +675,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules const& modulesInGPU, ObjectRanges const& rangesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, Triplets const& tripletsInGPU, Quintuplets const& quintupletsInGPU, unsigned int pixelSegmentIndex, @@ -698,7 +699,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { modulesInGPU, rangesInGPU, mds, - segmentsInGPU, + segments, + segmentsPixel, tripletsInGPU, pixelSegmentIndex, T5InnerT3Index, @@ -717,13 +719,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int thirdSegmentIndex = tripletsInGPU.segmentIndices[2 * T5OuterT3Index]; unsigned int fourthSegmentIndex = tripletsInGPU.segmentIndices[2 * T5OuterT3Index + 1]; - unsigned int pixelInnerMDIndex = segmentsInGPU.mdIndices[2 * pixelSegmentIndex]; - unsigned int pixelOuterMDIndex = segmentsInGPU.mdIndices[2 * pixelSegmentIndex + 1]; - unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * firstSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1]; - unsigned int fourthMDIndex = segmentsInGPU.mdIndices[2 * thirdSegmentIndex + 1]; - unsigned int fifthMDIndex = segmentsInGPU.mdIndices[2 * fourthSegmentIndex + 1]; + unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; + unsigned int firstMDIndex = segments.mdIndices()[firstSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[secondSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[secondSegmentIndex][1]; + unsigned int fourthMDIndex = segments.mdIndices()[thirdSegmentIndex][1]; + unsigned int fifthMDIndex = segments.mdIndices()[fourthSegmentIndex][1]; uint16_t lowerModuleIndex1 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex]; uint16_t lowerModuleIndex2 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 1]; @@ -773,9 +775,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds.anchorY()[fifthMDIndex]}; //get the appropriate radii and centers - centerX = segmentsInGPU.circleCenterX[pixelSegmentArrayIndex]; - centerY = segmentsInGPU.circleCenterY[pixelSegmentArrayIndex]; - pixelRadius = segmentsInGPU.circleRadius[pixelSegmentArrayIndex]; + centerX = segmentsPixel.circleCenterX()[pixelSegmentArrayIndex]; + centerY = segmentsPixel.circleCenterY()[pixelSegmentArrayIndex]; + pixelRadius = segmentsPixel.circleRadius()[pixelSegmentArrayIndex]; float T5CenterX = quintupletsInGPU.regressionG[quintupletIndex]; float T5CenterY = quintupletsInGPU.regressionF[quintupletIndex]; @@ -821,7 +823,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, MiniDoubletsConst mds, - Segments segmentsInGPU, + SegmentsConst segments, + SegmentsPixel segmentsPixel, Triplets tripletsInGPU, Quintuplets quintupletsInGPU, PixelQuintuplets pixelQuintupletsInGPU, @@ -845,7 +848,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (modulesInGPU.moduleType[quintupletLowerModuleIndex] == TwoS) continue; uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules; - if (segmentsInGPU.isDup[i_pLS]) + if (segmentsPixel.isDup()[i_pLS]) continue; unsigned int nOuterQuintuplets = quintupletsInGPU.nQuintuplets[quintupletLowerModuleIndex]; @@ -870,7 +873,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { modulesInGPU, rangesInGPU, mds, - segmentsInGPU, + segments, + segmentsPixel, tripletsInGPU, quintupletsInGPU, pixelSegmentIndex, @@ -898,7 +902,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { addPixelQuintupletToMemory(modulesInGPU, mds, - segmentsInGPU, + segments, quintupletsInGPU, pixelQuintupletsInGPU, pixelSegmentIndex, @@ -917,7 +921,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { tripletsInGPU.partOfPT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex]] = true; tripletsInGPU.partOfPT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]] = true; - segmentsInGPU.partOfPT5[i_pLS] = true; + segmentsPixel.partOfPT5()[i_pLS] = true; quintupletsInGPU.partOfPT5[quintupletIndex] = true; } // tot occupancy } // end success diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 661dbe2250001..1401aefdf797d 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -130,7 +130,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { }; ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelTripletToMemory(MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, Triplets const& tripletsInGPU, PixelTriplets& pixelTripletsInGPU, unsigned int pixelSegmentIndex, @@ -173,9 +173,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 2]; pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex] = - segmentsInGPU.innerLowerModuleIndices[pixelSegmentIndex]; + segments.innerLowerModuleIndices()[pixelSegmentIndex]; pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 1] = - segmentsInGPU.outerLowerModuleIndices[pixelSegmentIndex]; + segments.outerLowerModuleIndices()[pixelSegmentIndex]; pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 2] = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex]; pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 3] = @@ -183,8 +183,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 4] = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 2]; - unsigned int pixelInnerMD = segmentsInGPU.mdIndices[2 * pixelSegmentIndex]; - unsigned int pixelOuterMD = segmentsInGPU.mdIndices[2 * pixelSegmentIndex + 1]; + unsigned int pixelInnerMD = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMD = segments.mdIndices()[pixelSegmentIndex][1]; pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex] = mds.anchorHitIndices()[pixelInnerMD]; pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 1] = mds.outerHitIndices()[pixelInnerMD]; @@ -213,7 +213,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules const& modulesInGPU, ObjectRanges const& rangesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, uint16_t pixelLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -222,11 +223,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { short outerInnerLowerModuleSubdet = modulesInGPU.subdets[outerInnerLowerModuleIndex]; short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex]; - unsigned int firstMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * innerSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * innerSegmentIndex + 1]; + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * outerSegmentIndex]; - unsigned int fourthMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * outerSegmentIndex + 1]; + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][0]; + unsigned int fourthMDIndex = segments.mdIndices()[outerSegmentIndex][1]; if (outerInnerLowerModuleSubdet == Barrel and (outerOuterLowerModuleSubdet == Barrel or outerOuterLowerModuleSubdet == Endcap)) { @@ -234,7 +235,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { modulesInGPU, rangesInGPU, mds, - segmentsInGPU, + segments, + segmentsPixel, pixelLowerModuleIndex, outerInnerLowerModuleIndex, outerOuterLowerModuleIndex, @@ -249,7 +251,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { modulesInGPU, rangesInGPU, mds, - segmentsInGPU, + segments, + segmentsPixel, pixelLowerModuleIndex, outerInnerLowerModuleIndex, outerOuterLowerModuleIndex, @@ -766,7 +769,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules const& modulesInGPU, ObjectRanges const& rangesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, Triplets const& tripletsInGPU, unsigned int pixelSegmentIndex, unsigned int tripletIndex, @@ -779,7 +783,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& rPhiChiSquaredInwards, bool runChiSquaredCuts = true) { //run pT4 compatibility between the pixel segment and inner segment, and between the pixel and outer segment of the triplet - uint16_t pixelModuleIndex = segmentsInGPU.innerLowerModuleIndices[pixelSegmentIndex]; + uint16_t pixelModuleIndex = segments.innerLowerModuleIndices()[pixelSegmentIndex]; uint16_t lowerModuleIndex = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex]; uint16_t middleModuleIndex = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 1]; @@ -791,7 +795,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { modulesInGPU, rangesInGPU, mds, - segmentsInGPU, + segments, + segmentsPixel, pixelModuleIndex, lowerModuleIndex, middleModuleIndex, @@ -804,7 +809,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { modulesInGPU, rangesInGPU, mds, - segmentsInGPU, + segments, + segmentsPixel, pixelModuleIndex, middleModuleIndex, upperModuleIndex, @@ -815,28 +821,28 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //pt matching between the pixel ptin and the triplet circle pt unsigned int pixelSegmentArrayIndex = pixelSegmentIndex - rangesInGPU.segmentModuleIndices[pixelModuleIndex]; - float pixelSegmentPt = segmentsInGPU.ptIn[pixelSegmentArrayIndex]; - float pixelSegmentPtError = segmentsInGPU.ptErr[pixelSegmentArrayIndex]; - float pixelSegmentPx = segmentsInGPU.px[pixelSegmentArrayIndex]; - float pixelSegmentPy = segmentsInGPU.py[pixelSegmentArrayIndex]; - float pixelSegmentPz = segmentsInGPU.pz[pixelSegmentArrayIndex]; - int pixelSegmentCharge = segmentsInGPU.charge[pixelSegmentArrayIndex]; + float pixelSegmentPt = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; + float pixelSegmentPtError = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float pixelSegmentPx = segmentsPixel.px()[pixelSegmentArrayIndex]; + float pixelSegmentPy = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pixelSegmentPz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + int pixelSegmentCharge = segmentsPixel.charge()[pixelSegmentArrayIndex]; - float pixelG = segmentsInGPU.circleCenterX[pixelSegmentArrayIndex]; - float pixelF = segmentsInGPU.circleCenterY[pixelSegmentArrayIndex]; - float pixelRadiusPCA = segmentsInGPU.circleRadius[pixelSegmentArrayIndex]; + float pixelG = segmentsPixel.circleCenterX()[pixelSegmentArrayIndex]; + float pixelF = segmentsPixel.circleCenterY()[pixelSegmentArrayIndex]; + float pixelRadiusPCA = segmentsPixel.circleRadius()[pixelSegmentArrayIndex]; - unsigned int pixelInnerMDIndex = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelSegmentIndex]; - unsigned int pixelOuterMDIndex = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelSegmentIndex + 1]; + unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; pixelRadius = pixelSegmentPt * kR1GeVf; float pixelRadiusError = pixelSegmentPtError * kR1GeVf; unsigned int tripletInnerSegmentIndex = tripletsInGPU.segmentIndices[2 * tripletIndex]; unsigned int tripletOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * tripletIndex + 1]; - unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * tripletInnerSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * tripletInnerSegmentIndex + 1]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * tripletOuterSegmentIndex + 1]; + unsigned int firstMDIndex = segments.mdIndices()[tripletInnerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[tripletInnerSegmentIndex][1]; + unsigned int thirdMDIndex = segments.mdIndices()[tripletOuterSegmentIndex][1]; float xs[Params_T3::kLayers] = { mds.anchorX()[firstMDIndex], mds.anchorX()[secondMDIndex], mds.anchorX()[thirdMDIndex]}; @@ -922,7 +928,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules modulesInGPU, ObjectRanges rangesInGPU, MiniDoubletsConst mds, - Segments segmentsInGPU, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, Triplets tripletsInGPU, PixelTriplets pixelTripletsInGPU, unsigned int* connectedPixelSize, @@ -960,9 +967,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int pixelSegmentIndex = rangesInGPU.segmentModuleIndices[pixelModuleIndex] + i_pLS; - if (segmentsInGPU.isDup[i_pLS]) + if (segmentsPixel.isDup()[i_pLS]) continue; - if (segmentsInGPU.partOfPT5[i_pLS]) + if (segmentsPixel.partOfPT5()[i_pLS]) continue; //don't make pT3s for those pixels that are part of pT5 short layer2_adjustment; @@ -992,7 +999,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { modulesInGPU, rangesInGPU, mds, - segmentsInGPU, + segments, + segmentsPixel, tripletsInGPU, pixelSegmentIndex, outerTripletIndex, @@ -1005,15 +1013,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { rPhiChiSquaredInwards); if (success) { - float phi = - mds.anchorPhi()[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * outerTripletIndex] + - layer2_adjustment]]; - float eta = - mds.anchorEta()[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * outerTripletIndex] + - layer2_adjustment]]; - float eta_pix = segmentsInGPU.eta[i_pLS]; - float phi_pix = segmentsInGPU.phi[i_pLS]; - float pt = segmentsInGPU.ptIn[i_pLS]; + float phi = mds.anchorPhi()[segments.mdIndices()[tripletsInGPU.segmentIndices[2 * outerTripletIndex]] + [layer2_adjustment]]; + float eta = mds.anchorEta()[segments.mdIndices()[tripletsInGPU.segmentIndices[2 * outerTripletIndex]] + [layer2_adjustment]]; + float eta_pix = segmentsPixel.eta()[i_pLS]; + float phi_pix = segmentsPixel.phi()[i_pLS]; + float pt = segmentsPixel.ptIn()[i_pLS]; float score = rPhiChiSquared + rPhiChiSquaredInwards; unsigned int totOccupancyPixelTriplets = alpaka::atomicAdd( acc, pixelTripletsInGPU.totOccupancyPixelTriplets, 1u, alpaka::hierarchy::Threads{}); @@ -1025,7 +1031,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int pixelTripletIndex = alpaka::atomicAdd(acc, pixelTripletsInGPU.nPixelTriplets, 1u, alpaka::hierarchy::Threads{}); addPixelTripletToMemory(mds, - segmentsInGPU, + segments, tripletsInGPU, pixelTripletsInGPU, pixelSegmentIndex, @@ -1157,7 +1163,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules const& modulesInGPU, ObjectRanges const& rangesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, uint16_t pixelModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -1195,13 +1202,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; unsigned int pixelSegmentArrayIndex = innerSegmentIndex - rangesInGPU.segmentModuleIndices[pixelModuleIndex]; - float ptIn = segmentsInGPU.ptIn[pixelSegmentArrayIndex]; + float ptIn = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; float ptSLo = ptIn; - float px = segmentsInGPU.px[pixelSegmentArrayIndex]; - float py = segmentsInGPU.py[pixelSegmentArrayIndex]; - float pz = segmentsInGPU.pz[pixelSegmentArrayIndex]; - float ptErr = segmentsInGPU.ptErr[pixelSegmentArrayIndex]; - float etaErr = segmentsInGPU.etaErr[pixelSegmentArrayIndex]; + float px = segmentsPixel.px()[pixelSegmentArrayIndex]; + float py = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + float ptErr = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float etaErr = segmentsPixel.etaErr()[pixelSegmentArrayIndex]; ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); @@ -1272,8 +1279,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //lots of array accesses below this... - float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); - float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == Endcap and modulesInGPU.moduleType[outerOuterLowerModuleIndex] == TwoS; @@ -1414,7 +1421,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules const& modulesInGPU, ObjectRanges const& rangesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, uint16_t pixelModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -1451,13 +1459,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int pixelSegmentArrayIndex = innerSegmentIndex - rangesInGPU.segmentModuleIndices[pixelModuleIndex]; - float ptIn = segmentsInGPU.ptIn[pixelSegmentArrayIndex]; + float ptIn = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; float ptSLo = ptIn; - float px = segmentsInGPU.px[pixelSegmentArrayIndex]; - float py = segmentsInGPU.py[pixelSegmentArrayIndex]; - float pz = segmentsInGPU.pz[pixelSegmentArrayIndex]; - float ptErr = segmentsInGPU.ptErr[pixelSegmentArrayIndex]; - float etaErr = segmentsInGPU.etaErr[pixelSegmentArrayIndex]; + float px = segmentsPixel.px()[pixelSegmentArrayIndex]; + float py = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + float ptErr = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float etaErr = segmentsPixel.etaErr()[pixelSegmentArrayIndex]; ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); @@ -1533,8 +1541,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; - float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); - float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == Endcap and modulesInGPU.moduleType[outerOuterLowerModuleIndex] == TwoS; diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 5a2f9991a4f1f..1d506c11c3d63 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -751,15 +751,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(Triplets const& tripletsInGPU, - Segments const& segmentsInGPU, + SegmentsConst segments, unsigned int innerTripletIndex, unsigned int outerTripletIndex) { unsigned int innerOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1]; unsigned int outerInnerSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex]; unsigned int innerOuterOuterMiniDoubletIndex = - segmentsInGPU.mdIndices[2 * innerOuterSegmentIndex + 1]; //inner triplet outer segment outer MD index + segments.mdIndices()[innerOuterSegmentIndex][1]; //inner triplet outer segment outer MD index unsigned int outerInnerInnerMiniDoubletIndex = - segmentsInGPU.mdIndices[2 * outerInnerSegmentIndex]; //outer triplet inner segment inner MD index + segments.mdIndices()[outerInnerSegmentIndex][0]; //outer triplet inner segment inner MD index return (innerOuterOuterMiniDoubletIndex == outerInnerInnerMiniDoubletIndex); } @@ -1343,7 +1343,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBBB(TAcc const& acc, Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1430,9 +1430,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; // First obtaining the raw betaIn and betaOut values without any correction and just purely based on the mini-doublet hit positions - - float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); - float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == Endcap and modulesInGPU.moduleType[outerOuterLowerModuleIndex] == TwoS; @@ -1586,7 +1585,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBEE(TAcc const& acc, Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1684,9 +1683,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; - float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); - float sdIn_alpha_min = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); - float sdIn_alpha_max = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float sdIn_alpha_min = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alpha_max = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); float sdOut_alpha = sdIn_alpha; float sdOut_alphaOut = phi_mpi_pi(acc, @@ -1696,9 +1695,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds.anchorPhi()[fourthMDIndex]); float sdOut_alphaOut_min = phi_mpi_pi( - acc, __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMins[outerSegmentIndex])); + acc, __H2F(segments.dPhiChangeMins()[outerSegmentIndex]) - __H2F(segments.dPhiMins()[outerSegmentIndex])); float sdOut_alphaOut_max = phi_mpi_pi( - acc, __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMaxs[outerSegmentIndex])); + acc, __H2F(segments.dPhiChangeMaxs()[outerSegmentIndex]) - __H2F(segments.dPhiMaxs()[outerSegmentIndex])); float tl_axis_x = mds.anchorX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; float tl_axis_y = mds.anchorY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; @@ -1831,7 +1830,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoEEEE(TAcc const& acc, Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1933,13 +1932,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; - float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); float sdOut_alpha = sdIn_alpha; //weird float sdOut_dPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[thirdMDIndex]); - float sdOut_dPhiChange = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); - float sdOut_dPhiChange_min = __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]); - float sdOut_dPhiChange_max = __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]); + float sdOut_dPhiChange = __H2F(segments.dPhiChanges()[outerSegmentIndex]); + float sdOut_dPhiChange_min = __H2F(segments.dPhiChangeMins()[outerSegmentIndex]); + float sdOut_dPhiChange_max = __H2F(segments.dPhiChangeMaxs()[outerSegmentIndex]); float sdOut_alphaOutRHmin = phi_mpi_pi(acc, sdOut_dPhiChange_min - sdOut_dPhiPos); float sdOut_alphaOutRHmax = phi_mpi_pi(acc, sdOut_dPhiChange_max - sdOut_dPhiPos); @@ -1950,8 +1949,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); - float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); - float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); + float sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; @@ -2056,7 +2055,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletAlgoSelector(TAcc const& acc, Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -2077,7 +2076,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return runQuintupletDefaultAlgoBBBB(acc, modulesInGPU, mds, - segmentsInGPU, + segments, innerInnerLowerModuleIndex, innerOuterLowerModuleIndex, outerInnerLowerModuleIndex, @@ -2093,7 +2092,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return runQuintupletDefaultAlgoBBEE(acc, modulesInGPU, mds, - segmentsInGPU, + segments, innerInnerLowerModuleIndex, innerOuterLowerModuleIndex, outerInnerLowerModuleIndex, @@ -2109,7 +2108,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return runQuintupletDefaultAlgoBBBB(acc, modulesInGPU, mds, - segmentsInGPU, + segments, innerInnerLowerModuleIndex, innerOuterLowerModuleIndex, outerInnerLowerModuleIndex, @@ -2125,7 +2124,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return runQuintupletDefaultAlgoBBEE(acc, modulesInGPU, mds, - segmentsInGPU, + segments, innerInnerLowerModuleIndex, innerOuterLowerModuleIndex, outerInnerLowerModuleIndex, @@ -2141,7 +2140,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return runQuintupletDefaultAlgoEEEE(acc, modulesInGPU, mds, - segmentsInGPU, + segments, innerInnerLowerModuleIndex, innerOuterLowerModuleIndex, outerInnerLowerModuleIndex, @@ -2161,7 +2160,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgo(TAcc const& acc, Modules& modulesInGPU, MiniDoubletsConst mds, - Segments& segmentsInGPU, + SegmentsConst segments, Triplets& tripletsInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, @@ -2186,24 +2185,24 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int fourthSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex + 1]; unsigned int innerOuterOuterMiniDoubletIndex = - segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1]; //inner triplet outer segment outer MD index + segments.mdIndices()[secondSegmentIndex][1]; //inner triplet outer segment outer MD index unsigned int outerInnerInnerMiniDoubletIndex = - segmentsInGPU.mdIndices[2 * thirdSegmentIndex]; //outer triplet inner segment inner MD index + segments.mdIndices()[thirdSegmentIndex][0]; //outer triplet inner segment inner MD index //this cut reduces the number of candidates by a factor of 3, i.e., 2 out of 3 warps can end right here! if (innerOuterOuterMiniDoubletIndex != outerInnerInnerMiniDoubletIndex) return false; - unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * firstSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1]; - unsigned int fourthMDIndex = segmentsInGPU.mdIndices[2 * thirdSegmentIndex + 1]; - unsigned int fifthMDIndex = segmentsInGPU.mdIndices[2 * fourthSegmentIndex + 1]; + unsigned int firstMDIndex = segments.mdIndices()[firstSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[secondSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[secondSegmentIndex][1]; + unsigned int fourthMDIndex = segments.mdIndices()[thirdSegmentIndex][1]; + unsigned int fifthMDIndex = segments.mdIndices()[fourthSegmentIndex][1]; if (not runQuintupletAlgoSelector(acc, modulesInGPU, mds, - segmentsInGPU, + segments, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, @@ -2219,7 +2218,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (not runQuintupletAlgoSelector(acc, modulesInGPU, mds, - segmentsInGPU, + segments, lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex4, @@ -2432,7 +2431,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float inference = t5dnn::runInference(acc, modulesInGPU, mds, - segmentsInGPU, + segments, tripletsInGPU, xVec, yVec, @@ -2504,7 +2503,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, MiniDoubletsConst mds, - Segments segmentsInGPU, + SegmentsConst segments, Triplets tripletsInGPU, Quintuplets quintupletsInGPU, ObjectRanges rangesInGPU, @@ -2545,7 +2544,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { bool success = runQuintupletDefaultAlgo(acc, modulesInGPU, mds, - segmentsInGPU, + segments, tripletsInGPU, lowerModule1, lowerModule2, @@ -2583,12 +2582,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } else { unsigned int quintupletIndex = rangesInGPU.quintupletModuleIndices[lowerModule1] + quintupletModuleIndex; - float phi = - mds.anchorPhi()[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + - layer2_adjustment]]]; - float eta = - mds.anchorEta()[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + - layer2_adjustment]]]; + float phi = mds.anchorPhi()[segments.mdIndices()[tripletsInGPU.segmentIndices[2 * innerTripletIndex]] + [layer2_adjustment]]; + float eta = mds.anchorEta()[segments.mdIndices()[tripletsInGPU.segmentIndices[2 * innerTripletIndex]] + [layer2_adjustment]]; float pt = (innerRadius + outerRadius) * k2Rinv1GeVf; float scores = chiSquared + nonAnchorChiSquared; addQuintupletToMemory(tripletsInGPU, diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index c7cf3b449c2e9..b795edbcbeb36 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -4,6 +4,8 @@ #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h" #include "RecoTracker/LSTCore/interface/Module.h" #include "RecoTracker/LSTCore/interface/EndcapGeometry.h" @@ -12,169 +14,6 @@ #include "ObjectRanges.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - struct Segments { - FPX* dPhis; - FPX* dPhiMins; - FPX* dPhiMaxs; - FPX* dPhiChanges; - FPX* dPhiChangeMins; - FPX* dPhiChangeMaxs; - uint16_t* innerLowerModuleIndices; - uint16_t* outerLowerModuleIndices; - unsigned int* seedIdx; - unsigned int* mdIndices; - unsigned int* nMemoryLocations; - unsigned int* innerMiniDoubletAnchorHitIndices; - unsigned int* outerMiniDoubletAnchorHitIndices; - int* charge; - int* superbin; - unsigned int* nSegments; //number of segments per inner lower module - unsigned int* totOccupancySegments; //number of segments per inner lower module - uint4* pLSHitsIdxs; - PixelType* pixelType; - char* isQuad; - char* isDup; - bool* partOfPT5; - float* ptIn; - float* ptErr; - float* px; - float* py; - float* pz; - float* etaErr; - float* eta; - float* phi; - float* score; - float* circleCenterX; - float* circleCenterY; - float* circleRadius; - - template - void setData(TBuff& buf) { - dPhis = buf.dPhis_buf.data(); - dPhiMins = buf.dPhiMins_buf.data(); - dPhiMaxs = buf.dPhiMaxs_buf.data(); - dPhiChanges = buf.dPhiChanges_buf.data(); - dPhiChangeMins = buf.dPhiChangeMins_buf.data(); - dPhiChangeMaxs = buf.dPhiChangeMaxs_buf.data(); - innerLowerModuleIndices = buf.innerLowerModuleIndices_buf.data(); - outerLowerModuleIndices = buf.outerLowerModuleIndices_buf.data(); - seedIdx = buf.seedIdx_buf.data(); - mdIndices = buf.mdIndices_buf.data(); - nMemoryLocations = buf.nMemoryLocations_buf.data(); - innerMiniDoubletAnchorHitIndices = buf.innerMiniDoubletAnchorHitIndices_buf.data(); - outerMiniDoubletAnchorHitIndices = buf.outerMiniDoubletAnchorHitIndices_buf.data(); - charge = buf.charge_buf.data(); - superbin = buf.superbin_buf.data(); - nSegments = buf.nSegments_buf.data(); - totOccupancySegments = buf.totOccupancySegments_buf.data(); - pLSHitsIdxs = buf.pLSHitsIdxs_buf.data(); - pixelType = buf.pixelType_buf.data(); - isQuad = buf.isQuad_buf.data(); - isDup = buf.isDup_buf.data(); - partOfPT5 = buf.partOfPT5_buf.data(); - ptIn = buf.ptIn_buf.data(); - ptErr = buf.ptErr_buf.data(); - px = buf.px_buf.data(); - py = buf.py_buf.data(); - pz = buf.pz_buf.data(); - etaErr = buf.etaErr_buf.data(); - eta = buf.eta_buf.data(); - phi = buf.phi_buf.data(); - score = buf.score_buf.data(); - circleCenterX = buf.circleCenterX_buf.data(); - circleCenterY = buf.circleCenterY_buf.data(); - circleRadius = buf.circleRadius_buf.data(); - } - }; - - template - struct SegmentsBuffer { - Buf dPhis_buf; - Buf dPhiMins_buf; - Buf dPhiMaxs_buf; - Buf dPhiChanges_buf; - Buf dPhiChangeMins_buf; - Buf dPhiChangeMaxs_buf; - Buf innerLowerModuleIndices_buf; - Buf outerLowerModuleIndices_buf; - Buf seedIdx_buf; - Buf mdIndices_buf; - Buf nMemoryLocations_buf; - Buf innerMiniDoubletAnchorHitIndices_buf; - Buf outerMiniDoubletAnchorHitIndices_buf; - Buf charge_buf; - Buf superbin_buf; - Buf nSegments_buf; - Buf totOccupancySegments_buf; - Buf pLSHitsIdxs_buf; - Buf pixelType_buf; - Buf isQuad_buf; - Buf isDup_buf; - Buf partOfPT5_buf; - Buf ptIn_buf; - Buf ptErr_buf; - Buf px_buf; - Buf py_buf; - Buf pz_buf; - Buf etaErr_buf; - Buf eta_buf; - Buf phi_buf; - Buf score_buf; - Buf circleCenterX_buf; - Buf circleCenterY_buf; - Buf circleRadius_buf; - - Segments data_; - - template - SegmentsBuffer(unsigned int nMemoryLocationsIn, - uint16_t nLowerModules, - unsigned int maxPixelSegments, - TDevAcc const& devAccIn, - TQueue& queue) - : dPhis_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - dPhiMins_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - dPhiMaxs_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - dPhiChanges_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - dPhiChangeMins_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - dPhiChangeMaxs_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - innerLowerModuleIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - outerLowerModuleIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - seedIdx_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - mdIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn * 2, queue)), - nMemoryLocations_buf(allocBufWrapper(devAccIn, 1, queue)), - innerMiniDoubletAnchorHitIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - outerMiniDoubletAnchorHitIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), - charge_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - superbin_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - nSegments_buf(allocBufWrapper(devAccIn, nLowerModules + 1, queue)), - totOccupancySegments_buf(allocBufWrapper(devAccIn, nLowerModules + 1, queue)), - pLSHitsIdxs_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - pixelType_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - isQuad_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - isDup_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - partOfPT5_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - ptIn_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - ptErr_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - px_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - py_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - pz_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - etaErr_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - eta_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - phi_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - score_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - circleCenterX_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - circleCenterY_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), - circleRadius_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)) { - alpaka::memset(queue, nSegments_buf, 0u); - alpaka::memset(queue, totOccupancySegments_buf, 0u); - alpaka::memset(queue, partOfPT5_buf, false); - alpaka::memset(queue, pLSHitsIdxs_buf, 0u); - } - - inline Segments const* data() const { return &data_; } - inline void setData(SegmentsBuffer& buf) { data_.setData(buf); } - }; ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(Modules const& modulesInGPU, unsigned int moduleIndex) { @@ -355,7 +194,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { dAlphaThresholdValues[2] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(Segments& segmentsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(Segments segments, unsigned int lowerMDIndex, unsigned int upperMDIndex, uint16_t innerLowerModuleIndex, @@ -369,24 +208,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dPhiChangeMin, float dPhiChangeMax, unsigned int idx) { - segmentsInGPU.mdIndices[idx * 2] = lowerMDIndex; - segmentsInGPU.mdIndices[idx * 2 + 1] = upperMDIndex; - segmentsInGPU.innerLowerModuleIndices[idx] = innerLowerModuleIndex; - segmentsInGPU.outerLowerModuleIndices[idx] = outerLowerModuleIndex; - segmentsInGPU.innerMiniDoubletAnchorHitIndices[idx] = innerMDAnchorHitIndex; - segmentsInGPU.outerMiniDoubletAnchorHitIndices[idx] = outerMDAnchorHitIndex; - - segmentsInGPU.dPhis[idx] = __F2H(dPhi); - segmentsInGPU.dPhiMins[idx] = __F2H(dPhiMin); - segmentsInGPU.dPhiMaxs[idx] = __F2H(dPhiMax); - segmentsInGPU.dPhiChanges[idx] = __F2H(dPhiChange); - segmentsInGPU.dPhiChangeMins[idx] = __F2H(dPhiChangeMin); - segmentsInGPU.dPhiChangeMaxs[idx] = __F2H(dPhiChangeMax); + segments.mdIndices()[idx][0] = lowerMDIndex; + segments.mdIndices()[idx][1] = upperMDIndex; + segments.innerLowerModuleIndices()[idx] = innerLowerModuleIndex; + segments.outerLowerModuleIndices()[idx] = outerLowerModuleIndex; + segments.innerMiniDoubletAnchorHitIndices()[idx] = innerMDAnchorHitIndex; + segments.outerMiniDoubletAnchorHitIndices()[idx] = outerMDAnchorHitIndex; + + segments.dPhis()[idx] = __F2H(dPhi); + segments.dPhiMins()[idx] = __F2H(dPhiMin); + segments.dPhiMaxs()[idx] = __F2H(dPhiMax); + segments.dPhiChanges()[idx] = __F2H(dPhiChange); + segments.dPhiChangeMins()[idx] = __F2H(dPhiChangeMin); + segments.dPhiChangeMaxs()[idx] = __F2H(dPhiChangeMax); } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelSegmentToMemory(TAcc const& acc, - Segments& segmentsInGPU, + Segments segments, + SegmentsPixel segmentsPixel, MiniDoubletsConst mds, unsigned int innerMDIndex, unsigned int outerMDIndex, @@ -398,20 +238,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int idx, unsigned int pixelSegmentArrayIndex, float score) { - segmentsInGPU.mdIndices[idx * 2] = innerMDIndex; - segmentsInGPU.mdIndices[idx * 2 + 1] = outerMDIndex; - segmentsInGPU.innerLowerModuleIndices[idx] = pixelModuleIndex; - segmentsInGPU.outerLowerModuleIndices[idx] = pixelModuleIndex; - segmentsInGPU.innerMiniDoubletAnchorHitIndices[idx] = innerAnchorHitIndex; - segmentsInGPU.outerMiniDoubletAnchorHitIndices[idx] = outerAnchorHitIndex; - segmentsInGPU.dPhiChanges[idx] = __F2H(dPhiChange); - segmentsInGPU.isDup[pixelSegmentArrayIndex] = false; - segmentsInGPU.score[pixelSegmentArrayIndex] = score; - - segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].x = hitIdxs[0]; - segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].y = hitIdxs[1]; - segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].z = hitIdxs[2]; - segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].w = hitIdxs[3]; + segments.mdIndices()[idx][0] = innerMDIndex; + segments.mdIndices()[idx][1] = outerMDIndex; + segments.innerLowerModuleIndices()[idx] = pixelModuleIndex; + segments.outerLowerModuleIndices()[idx] = pixelModuleIndex; + segments.innerMiniDoubletAnchorHitIndices()[idx] = innerAnchorHitIndex; + segments.outerMiniDoubletAnchorHitIndices()[idx] = outerAnchorHitIndex; + segments.dPhiChanges()[idx] = __F2H(dPhiChange); + + segmentsPixel.isDup()[pixelSegmentArrayIndex] = false; + segmentsPixel.partOfPT5()[pixelSegmentArrayIndex] = false; + segmentsPixel.score()[pixelSegmentArrayIndex] = score; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].x = hitIdxs[0]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].y = hitIdxs[1]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].z = hitIdxs[2]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].w = hitIdxs[3]; //computing circle parameters /* @@ -441,9 +282,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { bestIndex = i; } } - segmentsInGPU.circleCenterX[pixelSegmentArrayIndex] = candidateCenterXs[bestIndex]; - segmentsInGPU.circleCenterY[pixelSegmentArrayIndex] = candidateCenterYs[bestIndex]; - segmentsInGPU.circleRadius[pixelSegmentArrayIndex] = circleRadius; + segmentsPixel.circleCenterX()[pixelSegmentArrayIndex] = candidateCenterXs[bestIndex]; + segmentsPixel.circleCenterY()[pixelSegmentArrayIndex] = candidateCenterYs[bestIndex]; + segmentsPixel.circleRadius()[pixelSegmentArrayIndex] = circleRadius; } template @@ -702,7 +543,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules modulesInGPU, MiniDoubletsConst mds, MiniDoubletsOccupancyConst mdsOccupancy, - Segments segmentsInGPU, + Segments segments, + SegmentsOccupancy segmentsOccupancy, ObjectRanges rangesInGPU) const { auto const globalBlockIdx = alpaka::getIdx(acc); auto const blockThreadIdx = alpaka::getIdx(acc); @@ -758,18 +600,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { dPhiChange, dPhiChangeMin, dPhiChangeMax)) { - unsigned int totOccupancySegments = alpaka::atomicAdd( - acc, &segmentsInGPU.totOccupancySegments[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int totOccupancySegments = + alpaka::atomicAdd(acc, + &segmentsOccupancy.totOccupancySegments()[innerLowerModuleIndex], + 1u, + alpaka::hierarchy::Threads{}); if (static_cast(totOccupancySegments) >= rangesInGPU.segmentModuleOccupancy[innerLowerModuleIndex]) { #ifdef WARNINGS printf("Segment excess alert! Module index = %d\n", innerLowerModuleIndex); #endif } else { unsigned int segmentModuleIdx = alpaka::atomicAdd( - acc, &segmentsInGPU.nSegments[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + acc, &segmentsOccupancy.nSegments()[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); unsigned int segmentIdx = rangesInGPU.segmentModuleIndices[innerLowerModuleIndex] + segmentModuleIdx; - addSegmentToMemory(segmentsInGPU, + addSegmentToMemory(segments, innerMDIndex, outerMDIndex, innerLowerModuleIndex, @@ -899,7 +744,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, - Segments segmentsInGPU, + SegmentsOccupancyConst segmentsOccupancy, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); @@ -909,12 +754,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const gridThreadExtent = alpaka::getWorkDiv(acc); for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { - if (segmentsInGPU.nSegments[i] == 0) { + if (segmentsOccupancy.nSegments()[i] == 0) { rangesInGPU.segmentRanges[i * 2] = -1; rangesInGPU.segmentRanges[i * 2 + 1] = -1; } else { rangesInGPU.segmentRanges[i * 2] = rangesInGPU.segmentModuleIndices[i]; - rangesInGPU.segmentRanges[i * 2 + 1] = rangesInGPU.segmentModuleIndices[i] + segmentsInGPU.nSegments[i] - 1; + rangesInGPU.segmentRanges[i * 2 + 1] = + rangesInGPU.segmentModuleIndices[i] + segmentsOccupancy.nSegments()[i] - 1; } } } @@ -927,7 +773,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ObjectRanges rangesInGPU, Hits hitsInGPU, MiniDoublets mds, - Segments segmentsInGPU, + Segments segments, + SegmentsPixel segmentsPixel, unsigned int* hitIndices0, unsigned int* hitIndices1, unsigned int* hitIndices2, @@ -990,7 +837,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { hits1[2] = hitsInGPU.idxs[mds.outerHitIndices()[innerMDIndex]]; hits1[3] = hitsInGPU.idxs[mds.outerHitIndices()[outerMDIndex]]; addPixelSegmentToMemory(acc, - segmentsInGPU, + segments, + segmentsPixel, mds, innerMDIndex, outerMDIndex, diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index cd267c57a91d0..5ff4b7ad478cf 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -73,19 +73,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits( - unsigned int ix, unsigned int jx, MiniDoubletsConst mds, Segments const& segmentsInGPU, Hits const& hitsInGPU) { + unsigned int ix, unsigned int jx, MiniDoubletsConst mds, SegmentsConst segments, Hits const& hitsInGPU) { int phits1[Params_pLS::kHits]; int phits2[Params_pLS::kHits]; - phits1[0] = hitsInGPU.idxs[mds.anchorHitIndices()[segmentsInGPU.mdIndices[2 * ix]]]; - phits1[1] = hitsInGPU.idxs[mds.anchorHitIndices()[segmentsInGPU.mdIndices[2 * ix + 1]]]; - phits1[2] = hitsInGPU.idxs[mds.outerHitIndices()[segmentsInGPU.mdIndices[2 * ix]]]; - phits1[3] = hitsInGPU.idxs[mds.outerHitIndices()[segmentsInGPU.mdIndices[2 * ix + 1]]]; + phits1[0] = hitsInGPU.idxs[mds.anchorHitIndices()[segments.mdIndices()[ix][0]]]; + phits1[1] = hitsInGPU.idxs[mds.anchorHitIndices()[segments.mdIndices()[ix][1]]]; + phits1[2] = hitsInGPU.idxs[mds.outerHitIndices()[segments.mdIndices()[ix][0]]]; + phits1[3] = hitsInGPU.idxs[mds.outerHitIndices()[segments.mdIndices()[ix][1]]]; - phits2[0] = hitsInGPU.idxs[mds.anchorHitIndices()[segmentsInGPU.mdIndices[2 * jx]]]; - phits2[1] = hitsInGPU.idxs[mds.anchorHitIndices()[segmentsInGPU.mdIndices[2 * jx + 1]]]; - phits2[2] = hitsInGPU.idxs[mds.outerHitIndices()[segmentsInGPU.mdIndices[2 * jx]]]; - phits2[3] = hitsInGPU.idxs[mds.outerHitIndices()[segmentsInGPU.mdIndices[2 * jx + 1]]]; + phits2[0] = hitsInGPU.idxs[mds.anchorHitIndices()[segments.mdIndices()[jx][0]]]; + phits2[1] = hitsInGPU.idxs[mds.anchorHitIndices()[segments.mdIndices()[jx][1]]]; + phits2[2] = hitsInGPU.idxs[mds.outerHitIndices()[segments.mdIndices()[jx][0]]]; + phits2[3] = hitsInGPU.idxs[mds.outerHitIndices()[segments.mdIndices()[jx][1]]]; int npMatched = 0; @@ -115,7 +115,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { Modules modulesInGPU, ObjectRanges rangesInGPU, PixelTriplets pixelTripletsInGPU, - Segments segmentsInGPU, + SegmentsPixelConst segmentsPixel, PixelQuintuplets pixelQuintupletsInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -137,8 +137,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { for (unsigned int pixelQuintupletIndex = globalThreadIdx[1]; pixelQuintupletIndex < nPixelQuintuplets; pixelQuintupletIndex += gridThreadExtent[1]) { unsigned int pLS_jx = pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex]; - float eta2 = segmentsInGPU.eta[pLS_jx - prefix]; - float phi2 = segmentsInGPU.phi[pLS_jx - prefix]; + float eta2 = segmentsPixel.eta()[pLS_jx - prefix]; + float phi2 = segmentsPixel.phi()[pLS_jx - prefix]; float dEta = alpaka::math::abs(acc, (eta1 - eta2)); float dPhi = calculate_dPhi(phi1, phi2); @@ -212,7 +212,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ObjectRanges rangesInGPU, PixelTriplets pixelTripletsInGPU, TrackCandidates cands, - Segments segmentsInGPU, + SegmentsConst segments, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixel segmentsPixel, MiniDoubletsConst mds, Hits hitsInGPU, Quintuplets quintupletsInGPU) const { @@ -220,14 +222,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { auto const gridThreadExtent = alpaka::getWorkDiv(acc); int pixelModuleIndex = *modulesInGPU.nLowerModules; - unsigned int nPixels = segmentsInGPU.nSegments[pixelModuleIndex]; + unsigned int nPixels = segmentsOccupancy.nSegments()[pixelModuleIndex]; for (unsigned int pixelArrayIndex = globalThreadIdx[2]; pixelArrayIndex < nPixels; pixelArrayIndex += gridThreadExtent[2]) { - if (!segmentsInGPU.isQuad[pixelArrayIndex] || segmentsInGPU.isDup[pixelArrayIndex]) + if (!segmentsPixel.isQuad()[pixelArrayIndex] || segmentsPixel.isDup()[pixelArrayIndex]) continue; - float eta1 = segmentsInGPU.eta[pixelArrayIndex]; - float phi1 = segmentsInGPU.phi[pixelArrayIndex]; + float eta1 = segmentsPixel.eta()[pixelArrayIndex]; + float phi1 = segmentsPixel.phi()[pixelArrayIndex]; unsigned int prefix = rangesInGPU.segmentModuleIndices[pixelModuleIndex]; unsigned int nTrackCandidates = cands.nTrackCandidates(); @@ -245,14 +247,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-3f) - segmentsInGPU.isDup[pixelArrayIndex] = true; + segmentsPixel.isDup()[pixelArrayIndex] = true; } if (type == 5) // pT3 { int pLSIndex = pixelTripletsInGPU.pixelSegmentIndices[innerTrackletIdx]; - int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segmentsInGPU, hitsInGPU); + int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hitsInGPU); if (npMatched > 0) - segmentsInGPU.isDup[pixelArrayIndex] = true; + segmentsPixel.isDup()[pixelArrayIndex] = true; int pT3Index = innerTrackletIdx; float eta2 = __H2F(pixelTripletsInGPU.eta_pix[pT3Index]); @@ -262,24 +264,24 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 0.000001f) - segmentsInGPU.isDup[pixelArrayIndex] = true; + segmentsPixel.isDup()[pixelArrayIndex] = true; } if (type == 7) // pT5 { unsigned int pLSIndex = innerTrackletIdx; - int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segmentsInGPU, hitsInGPU); + int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hitsInGPU); if (npMatched > 0) { - segmentsInGPU.isDup[pixelArrayIndex] = true; + segmentsPixel.isDup()[pixelArrayIndex] = true; } - float eta2 = segmentsInGPU.eta[pLSIndex - prefix]; - float phi2 = segmentsInGPU.phi[pLSIndex - prefix]; + float eta2 = segmentsPixel.eta()[pLSIndex - prefix]; + float phi2 = segmentsPixel.phi()[pLSIndex - prefix]; float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 0.000001f) - segmentsInGPU.isDup[pixelArrayIndex] = true; + segmentsPixel.isDup()[pixelArrayIndex] = true; } } } @@ -292,7 +294,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t nLowerModules, PixelTriplets pixelTripletsInGPU, TrackCandidates cands, - Segments segmentsInGPU, + SegmentsPixelConst segmentsPixel, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); @@ -331,7 +333,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { &pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex], &pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex], &pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex], - segmentsInGPU.seedIdx[pT3PixelIndex - pLS_offset], + segmentsPixel.seedIdx()[pT3PixelIndex - pLS_offset], __H2F(pixelTripletsInGPU.centerX[pixelTripletIndex]), __H2F(pixelTripletsInGPU.centerY[pixelTripletIndex]), radius, @@ -400,15 +402,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, TrackCandidates cands, - Segments segmentsInGPU, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixelConst segmentsPixel, bool tc_pls_triplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - unsigned int nPixels = segmentsInGPU.nSegments[nLowerModules]; + unsigned int nPixels = segmentsOccupancy.nSegments()[nLowerModules]; for (unsigned int pixelArrayIndex = globalThreadIdx[2]; pixelArrayIndex < nPixels; pixelArrayIndex += gridThreadExtent[2]) { - if ((tc_pls_triplets ? 0 : !segmentsInGPU.isQuad[pixelArrayIndex]) || (segmentsInGPU.isDup[pixelArrayIndex])) + if ((tc_pls_triplets ? 0 : !segmentsPixel.isQuad()[pixelArrayIndex]) || + (segmentsPixel.isDup()[pixelArrayIndex])) continue; unsigned int trackCandidateIdx = @@ -427,8 +431,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { addpLSTrackCandidateToMemory(cands, pixelArrayIndex, trackCandidateIdx, - segmentsInGPU.pLSHitsIdxs[pixelArrayIndex], - segmentsInGPU.seedIdx[pixelArrayIndex]); + segmentsPixel.pLSHitsIdxs()[pixelArrayIndex], + segmentsPixel.seedIdx()[pixelArrayIndex]); } } } @@ -440,7 +444,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t nLowerModules, PixelQuintuplets pixelQuintupletsInGPU, TrackCandidates cands, - Segments segmentsInGPU, + SegmentsPixelConst segmentsPixel, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); @@ -480,7 +484,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { &pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex], &pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex], &pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex], - segmentsInGPU.seedIdx[pT5PixelIndex - pLS_offset], + segmentsPixel.seedIdx()[pT5PixelIndex - pLS_offset], __H2F(pixelQuintupletsInGPU.centerX[pixelQuintupletIndex]), __H2F(pixelQuintupletsInGPU.centerY[pixelQuintupletIndex]), radius, diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index 2aba1c8ecf855..3c8b4cddbe4ab 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -138,7 +138,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { #ifdef CUT_VALUE_DEBUG ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, Triplets& tripletsInGPU, unsigned int innerSegmentIndex, unsigned int outerSegmentIndex, @@ -156,7 +156,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { #else ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, Triplets& tripletsInGPU, unsigned int innerSegmentIndex, unsigned int outerSegmentIndex, @@ -187,9 +187,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 2] = modulesInGPU.layers[outerOuterLowerModuleIndex] + (modulesInGPU.subdets[outerOuterLowerModuleIndex] == 4) * 6; //get the hits - unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * innerSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * innerSegmentIndex + 1]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * outerSegmentIndex + 1]; + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits] = mds.anchorHitIndices()[firstMDIndex]; tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 1] = mds.outerHitIndices()[firstMDIndex]; @@ -208,7 +208,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc, Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -268,7 +267,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBB(TAcc const& acc, Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -337,7 +336,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; // raw betaIn value without any correction, based on the mini-doublet hit positions - float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); @@ -363,7 +362,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBE(TAcc const& acc, Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -445,7 +444,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float rt_InLo = mds.anchorRt()[firstMDIndex]; float rt_InOut = mds.anchorRt()[secondMDIndex]; - float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; @@ -482,7 +481,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintEEE(TAcc const& acc, Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -565,15 +564,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float rt_InLo = mds.anchorRt()[firstMDIndex]; float rt_InOut = mds.anchorRt()[secondMDIndex]; - float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); - float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); - float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); + float sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; @@ -603,7 +602,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc, Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -626,7 +625,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return passPointingConstraintBBB(acc, modulesInGPU, mds, - segmentsInGPU, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -643,7 +642,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return passPointingConstraintBBE(acc, modulesInGPU, mds, - segmentsInGPU, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -662,7 +661,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return passPointingConstraintBBE(acc, modulesInGPU, mds, - segmentsInGPU, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -684,7 +683,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return passPointingConstraintEEE(acc, modulesInGPU, mds, - segmentsInGPU, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -738,7 +737,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletConstraintsAndAlgo(TAcc const& acc, Modules const& modulesInGPU, MiniDoubletsConst mds, - Segments const& segmentsInGPU, + SegmentsConst segments, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -752,17 +751,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& circleCenterX, float& circleCenterY) { //this cut reduces the number of candidates by a factor of 4, i.e., 3 out of 4 warps can end right here! - if (segmentsInGPU.mdIndices[2 * innerSegmentIndex + 1] != segmentsInGPU.mdIndices[2 * outerSegmentIndex]) + if (segments.mdIndices()[innerSegmentIndex][1] != segments.mdIndices()[outerSegmentIndex][0]) return false; - unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * innerSegmentIndex]; - unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * outerSegmentIndex]; - unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * outerSegmentIndex + 1]; + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[outerSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; if (not(passRZConstraint(acc, modulesInGPU, mds, - segmentsInGPU, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -773,7 +771,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (not(passPointingConstraint(acc, modulesInGPU, mds, - segmentsInGPU, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -805,7 +803,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, MiniDoubletsConst mds, - Segments segmentsInGPU, + SegmentsConst segments, + SegmentsOccupancyConst segmentsOccupancy, Triplets tripletsInGPU, ObjectRanges rangesInGPU, uint16_t* index_gpu, @@ -823,29 +822,29 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (nConnectedModules == 0) continue; - unsigned int nInnerSegments = segmentsInGPU.nSegments[innerInnerLowerModuleIndex]; + unsigned int nInnerSegments = segmentsOccupancy.nSegments()[innerInnerLowerModuleIndex]; for (unsigned int innerSegmentArrayIndex = globalThreadIdx[1]; innerSegmentArrayIndex < nInnerSegments; innerSegmentArrayIndex += gridThreadExtent[1]) { unsigned int innerSegmentIndex = rangesInGPU.segmentRanges[innerInnerLowerModuleIndex * 2] + innerSegmentArrayIndex; // middle lower module - outer lower module of inner segment - uint16_t middleLowerModuleIndex = segmentsInGPU.outerLowerModuleIndices[innerSegmentIndex]; + uint16_t middleLowerModuleIndex = segments.outerLowerModuleIndices()[innerSegmentIndex]; - unsigned int nOuterSegments = segmentsInGPU.nSegments[middleLowerModuleIndex]; + unsigned int nOuterSegments = segmentsOccupancy.nSegments()[middleLowerModuleIndex]; for (unsigned int outerSegmentArrayIndex = globalThreadIdx[2]; outerSegmentArrayIndex < nOuterSegments; outerSegmentArrayIndex += gridThreadExtent[2]) { unsigned int outerSegmentIndex = rangesInGPU.segmentRanges[2 * middleLowerModuleIndex] + outerSegmentArrayIndex; - uint16_t outerOuterLowerModuleIndex = segmentsInGPU.outerLowerModuleIndices[outerSegmentIndex]; + uint16_t outerOuterLowerModuleIndex = segments.outerLowerModuleIndices()[outerSegmentIndex]; float zOut, rtOut, betaIn, betaInCut, circleRadius, circleCenterX, circleCenterY; bool success = runTripletConstraintsAndAlgo(acc, modulesInGPU, mds, - segmentsInGPU, + segments, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, @@ -878,7 +877,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { #ifdef CUT_VALUE_DEBUG addTripletToMemory(modulesInGPU, mds, - segmentsInGPU, + segments, tripletsInGPU, innerSegmentIndex, outerSegmentIndex, @@ -896,7 +895,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { #else addTripletToMemory(modulesInGPU, mds, - segmentsInGPU, + segments, tripletsInGPU, innerSegmentIndex, outerSegmentIndex, @@ -922,7 +921,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, ObjectRanges rangesInGPU, - Segments segmentsInGPU) const { + SegmentsOccupancyConst segmentsOccupancy) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -941,7 +940,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { int occupancy, category_number, eta_number; for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { - if (segmentsInGPU.nSegments[i] == 0) { + if (segmentsOccupancy.nSegments()[i] == 0) { rangesInGPU.tripletModuleIndices[i] = nTotalTriplets; rangesInGPU.tripletModuleOccupancy[i] = 0; continue; diff --git a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc index d0496e7067703..4c325a7be72e4 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc +++ b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc @@ -28,13 +28,13 @@ std::tuple, std::vector> convertHitsToHi //____________________________________________________________________________________________ std::vector getPixelHitsFrompLS(Event* event, unsigned int pLS) { - Segments const* segments = event->getSegments().data(); + SegmentsConst segments = event->getSegments(); MiniDoubletsConst miniDoublets = event->getMiniDoublets(); ObjectRanges const* rangesEvt = event->getRanges().data(); Modules const* modulesEvt = event->getModules().data(); const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; - unsigned int MD_1 = segments->mdIndices[2 * (pLS + pLS_offset)]; - unsigned int MD_2 = segments->mdIndices[2 * (pLS + pLS_offset) + 1]; + unsigned int MD_1 = segments.mdIndices()[pLS + pLS_offset][0]; + unsigned int MD_2 = segments.mdIndices()[pLS + pLS_offset][1]; unsigned int hit_1 = miniDoublets.anchorHitIndices()[MD_1]; unsigned int hit_2 = miniDoublets.outerHitIndices()[MD_1]; unsigned int hit_3 = miniDoublets.anchorHitIndices()[MD_2]; @@ -92,9 +92,9 @@ std::tuple, std::vector> getHitIdxsAndHi //____________________________________________________________________________________________ std::vector getMDsFromLS(Event* event, unsigned int LS) { - Segments const* segments = event->getSegments().data(); - unsigned int MD_1 = segments->mdIndices[2 * LS]; - unsigned int MD_2 = segments->mdIndices[2 * LS + 1]; + SegmentsConst segments = event->getSegments(); + unsigned int MD_1 = segments.mdIndices()[LS][0]; + unsigned int MD_2 = segments.mdIndices()[LS][1]; return {MD_1, MD_2}; } diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc index ef7b698c1afce..abcb324be0dc9 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc @@ -293,7 +293,7 @@ void setPixelQuintupletOutputBranches(Event* event) { // ============ pT5 ============= PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets().data(); Quintuplets const* quintuplets = event->getQuintuplets().data(); - Segments const* segments = event->getSegments().data(); + SegmentsPixelConst segmentsPixel = event->getSegments(); Modules const* modules = event->getModules().data(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); @@ -305,9 +305,9 @@ void setPixelQuintupletOutputBranches(Event* event) { for (unsigned int pT5 = 0; pT5 < nPixelQuintuplets; pT5++) { unsigned int T5Index = getT5FrompT5(event, pT5); unsigned int pLSIndex = getPixelLSFrompT5(event, pT5); - float pt = (__H2F(quintuplets->innerRadius[T5Index]) * k2Rinv1GeVf * 2 + segments->ptIn[pLSIndex]) / 2; - float eta = segments->eta[pLSIndex]; - float phi = segments->phi[pLSIndex]; + float pt = (__H2F(quintuplets->innerRadius[T5Index]) * k2Rinv1GeVf * 2 + segmentsPixel.ptIn()[pLSIndex]) / 2; + float eta = segmentsPixel.eta()[pLSIndex]; + float phi = segmentsPixel.phi()[pLSIndex]; std::vector hit_idx = getHitIdxsFrompT5(event, pT5); std::vector module_idx = getModuleIdxsFrompT5(event, pT5); @@ -439,7 +439,7 @@ void setQuintupletOutputBranches(Event* event) { void setPixelTripletOutputBranches(Event* event) { PixelTriplets const* pixelTriplets = event->getPixelTriplets().data(); Modules const* modules = event->getModules().data(); - Segments const* segments = event->getSegments().data(); + SegmentsPixelConst segmentsPixel = event->getSegments(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); unsigned int nPixelTriplets = *pixelTriplets->nPixelTriplets; @@ -449,10 +449,10 @@ void setPixelTripletOutputBranches(Event* event) { for (unsigned int pT3 = 0; pT3 < nPixelTriplets; pT3++) { unsigned int T3Index = getT3FrompT3(event, pT3); unsigned int pLSIndex = getPixelLSFrompT3(event, pT3); - const float pt = segments->ptIn[pLSIndex]; + const float pt = segmentsPixel.ptIn()[pLSIndex]; - float eta = segments->eta[pLSIndex]; - float phi = segments->phi[pLSIndex]; + float eta = segmentsPixel.eta()[pLSIndex]; + float phi = segmentsPixel.phi()[pLSIndex]; std::vector hit_idx = getHitIdxsFrompT3(event, pT3); std::vector hit_type = getHitTypesFrompT3(event, pT3); @@ -501,7 +501,7 @@ void setPixelTripletOutputBranches(Event* event) { //________________________________________________________________________________________________________________________________ void setGnnNtupleBranches(Event* event) { // Get relevant information - Segments const* segments = event->getSegments().data(); + SegmentsOccupancyConst segmentsOccupancy = event->getSegments(); MiniDoubletsOccupancyConst miniDoublets = event->getMiniDoublets(); Hits const* hitsEvt = event->getHits().data(); Modules const* modules = event->getModules().data(); @@ -517,7 +517,7 @@ void setGnnNtupleBranches(Event* event) { unsigned int nTotalLS = 0; for (unsigned int idx = 0; idx < *(modules->nLowerModules); ++idx) { nTotalMD += miniDoublets.nMDs()[idx]; - nTotalLS += segments->nSegments[idx]; + nTotalLS += segmentsOccupancy.nSegments()[idx]; } std::set lss_used_in_true_tc; @@ -556,7 +556,7 @@ void setGnnNtupleBranches(Event* event) { // } // Loop over segments - for (unsigned int jdx = 0; jdx < segments->nSegments[idx]; jdx++) { + for (unsigned int jdx = 0; jdx < segmentsOccupancy.nSegments()[idx]; jdx++) { // Get the actual index to the segments using ranges unsigned int sgIdx = ranges->segmentModuleIndices[idx] + jdx; @@ -746,7 +746,7 @@ std::tuple, std::vectorgetTrackCandidates().const_view(); Quintuplets const* quintuplets = event->getQuintuplets().data(); - Segments const* segments = event->getSegments().data(); + SegmentsPixelConst segmentsPixel = event->getSegments(); // // pictorial representation of a pT5 @@ -839,9 +839,9 @@ std::tuple, std::vectorptIn[pLS]; - const float eta_pLS = segments->eta[pLS]; - const float phi_pLS = segments->phi[pLS]; + const float pt_pLS = segmentsPixel.ptIn()[pLS]; + const float eta_pLS = segmentsPixel.eta()[pLS]; + const float phi_pLS = segmentsPixel.phi()[pLS]; float pt_T5 = __H2F(quintuplets->innerRadius[T5Index]) * 2 * k2Rinv1GeVf; const float pt = (pt_T5 + pt_pLS) / 2; @@ -858,7 +858,7 @@ std::tuple, std::vectorgetTrackCandidates().const_view(); Triplets const* triplets = event->getTriplets().data(); - Segments const* segments = event->getSegments().data(); + SegmentsPixelConst segmentsPixel = event->getSegments(); // // pictorial representation of a pT3 @@ -872,9 +872,9 @@ std::tuple, std::vectorptIn[pLS]; - const float eta_pLS = segments->eta[pLS]; - const float phi_pLS = segments->phi[pLS]; + const float pt_pLS = segmentsPixel.ptIn()[pLS]; + const float eta_pLS = segmentsPixel.eta()[pLS]; + const float phi_pLS = segmentsPixel.phi()[pLS]; float pt_T3 = triplets->circleRadius[T3] * 2 * k2Rinv1GeVf; // average pt @@ -925,15 +925,15 @@ std::tuple, std::vector, std::vector> parsepLS(Event* event, unsigned int idx) { auto const& trackCandidates = event->getTrackCandidates().const_view(); - Segments const* segments = event->getSegments().data(); + SegmentsPixelConst segmentsPixel = event->getSegments(); // Getting pLS index unsigned int pLS = trackCandidates.directObjectIndices()[idx]; // Getting pt eta and phi - float pt = segments->ptIn[pLS]; - float eta = segments->eta[pLS]; - float phi = segments->phi[pLS]; + float pt = segmentsPixel.ptIn()[pLS]; + float eta = segmentsPixel.eta()[pLS]; + float phi = segmentsPixel.phi()[pLS]; // Getting hit indices and types std::vector hit_idx = getPixelHitIdxsFrompLS(event, pLS); @@ -1009,7 +1009,8 @@ void printMDs(Event* event) { //________________________________________________________________________________________________________________________________ void printLSs(Event* event) { - Segments const* segments = event->getSegments().data(); + SegmentsConst segments = event->getSegments(); + SegmentsOccupancyConst segmentsOccupancy = event->getSegments(); MiniDoubletsConst miniDoublets = event->getMiniDoublets(); Hits const* hitsEvt = event->getHits().data(); Modules const* modules = event->getModules().data(); @@ -1018,11 +1019,11 @@ void printLSs(Event* event) { int nSegments = 0; for (unsigned int i = 0; i < *(modules->nLowerModules); ++i) { unsigned int idx = i; //modules->lowerModuleIndices[i]; - nSegments += segments->nSegments[idx]; - for (unsigned int jdx = 0; jdx < segments->nSegments[idx]; jdx++) { + nSegments += segmentsOccupancy.nSegments()[idx]; + for (unsigned int jdx = 0; jdx < segmentsOccupancy.nSegments()[idx]; jdx++) { unsigned int sgIdx = ranges->segmentModuleIndices[idx] + jdx; - unsigned int InnerMiniDoubletIndex = segments->mdIndices[2 * sgIdx]; - unsigned int OuterMiniDoubletIndex = segments->mdIndices[2 * sgIdx + 1]; + unsigned int InnerMiniDoubletIndex = segments.mdIndices()[sgIdx][0]; + unsigned int OuterMiniDoubletIndex = segments.mdIndices()[sgIdx][1]; unsigned int InnerMiniDoubletLowerHitIndex = miniDoublets.anchorHitIndices()[InnerMiniDoubletIndex]; unsigned int InnerMiniDoubletUpperHitIndex = miniDoublets.outerHitIndices()[InnerMiniDoubletIndex]; unsigned int OuterMiniDoubletLowerHitIndex = miniDoublets.anchorHitIndices()[OuterMiniDoubletIndex]; @@ -1041,7 +1042,8 @@ void printLSs(Event* event) { //________________________________________________________________________________________________________________________________ void printpLSs(Event* event) { - Segments const* segments = event->getSegments().data(); + SegmentsConst segments = event->getSegments(); + SegmentsOccupancyConst segmentsOccupancy = event->getSegments(); MiniDoubletsConst miniDoublets = event->getMiniDoublets(); Hits const* hitsEvt = event->getHits().data(); Modules const* modules = event->getModules().data(); @@ -1049,11 +1051,11 @@ void printpLSs(Event* event) { unsigned int i = *(modules->nLowerModules); unsigned int idx = i; //modules->lowerModuleIndices[i]; - int npLS = segments->nSegments[idx]; - for (unsigned int jdx = 0; jdx < segments->nSegments[idx]; jdx++) { + int npLS = segmentsOccupancy.nSegments()[idx]; + for (unsigned int jdx = 0; jdx < segmentsOccupancy.nSegments()[idx]; jdx++) { unsigned int sgIdx = ranges->segmentModuleIndices[idx] + jdx; - unsigned int InnerMiniDoubletIndex = segments->mdIndices[2 * sgIdx]; - unsigned int OuterMiniDoubletIndex = segments->mdIndices[2 * sgIdx + 1]; + unsigned int InnerMiniDoubletIndex = segments.mdIndices()[sgIdx][0]; + unsigned int OuterMiniDoubletIndex = segments.mdIndices()[sgIdx][1]; unsigned int InnerMiniDoubletLowerHitIndex = miniDoublets.anchorHitIndices()[InnerMiniDoubletIndex]; unsigned int InnerMiniDoubletUpperHitIndex = miniDoublets.outerHitIndices()[InnerMiniDoubletIndex]; unsigned int OuterMiniDoubletLowerHitIndex = miniDoublets.anchorHitIndices()[OuterMiniDoubletIndex]; @@ -1072,7 +1074,7 @@ void printpLSs(Event* event) { //________________________________________________________________________________________________________________________________ void printT3s(Event* event) { Triplets const* triplets = event->getTriplets().data(); - Segments const* segments = event->getSegments().data(); + SegmentsConst segments = event->getSegments(); MiniDoubletsConst miniDoublets = event->getMiniDoublets(); Hits const* hitsEvt = event->getHits().data(); Modules const* modules = event->getModules().data(); @@ -1085,9 +1087,9 @@ void printT3s(Event* event) { unsigned int tpIdx = idx * 5000 + jdx; unsigned int InnerSegmentIndex = triplets->segmentIndices[2 * tpIdx]; unsigned int OuterSegmentIndex = triplets->segmentIndices[2 * tpIdx + 1]; - unsigned int InnerSegmentInnerMiniDoubletIndex = segments->mdIndices[2 * InnerSegmentIndex]; - unsigned int InnerSegmentOuterMiniDoubletIndex = segments->mdIndices[2 * InnerSegmentIndex + 1]; - unsigned int OuterSegmentOuterMiniDoubletIndex = segments->mdIndices[2 * OuterSegmentIndex + 1]; + unsigned int InnerSegmentInnerMiniDoubletIndex = segments.mdIndices()[InnerSegmentIndex][0]; + unsigned int InnerSegmentOuterMiniDoubletIndex = segments.mdIndices()[InnerSegmentIndex][1]; + unsigned int OuterSegmentOuterMiniDoubletIndex = segments.mdIndices()[OuterSegmentIndex][1]; unsigned int hit_idx0 = miniDoublets.anchorHitIndices()[InnerSegmentInnerMiniDoubletIndex]; unsigned int hit_idx1 = miniDoublets.outerHitIndices()[InnerSegmentInnerMiniDoubletIndex];