Skip to content

Commit

Permalink
Merge pull request #101 from SegmentLinking/mds_soa
Browse files Browse the repository at this point in the history
Migrate MDs to SoA+PortableCollection
  • Loading branch information
slava77 authored Oct 16, 2024
2 parents 8338254 + d3156d1 commit fc3fc6a
Show file tree
Hide file tree
Showing 14 changed files with 751 additions and 846 deletions.
60 changes: 60 additions & 0 deletions RecoTracker/LSTCore/interface/MiniDoubletsSoA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#ifndef RecoTracker_LSTCore_interface_MiniDoubletsSoA_h
#define RecoTracker_LSTCore_interface_MiniDoubletsSoA_h

#include "DataFormats/SoATemplate/interface/SoALayout.h"
#include "DataFormats/Portable/interface/PortableCollection.h"

namespace lst {

GENERATE_SOA_LAYOUT(MiniDoubletsSoALayout,
SOA_COLUMN(unsigned int, anchorHitIndices),
SOA_COLUMN(unsigned int, outerHitIndices),
SOA_COLUMN(uint16_t, moduleIndices),
SOA_COLUMN(float, dphichanges),
SOA_COLUMN(float, dzs),
SOA_COLUMN(float, dphis),
SOA_COLUMN(float, shiftedXs),
SOA_COLUMN(float, shiftedYs),
SOA_COLUMN(float, shiftedZs),
SOA_COLUMN(float, noShiftedDphis),
SOA_COLUMN(float, noShiftedDphiChanges),
SOA_COLUMN(float, anchorX),
SOA_COLUMN(float, anchorY),
SOA_COLUMN(float, anchorZ),
SOA_COLUMN(float, anchorRt),
SOA_COLUMN(float, anchorPhi),
SOA_COLUMN(float, anchorEta),
SOA_COLUMN(float, anchorHighEdgeX),
SOA_COLUMN(float, anchorHighEdgeY),
SOA_COLUMN(float, anchorLowEdgeX),
SOA_COLUMN(float, anchorLowEdgeY),
SOA_COLUMN(float, anchorLowEdgePhi),
SOA_COLUMN(float, anchorHighEdgePhi),
SOA_COLUMN(float, outerX),
SOA_COLUMN(float, outerY),
SOA_COLUMN(float, outerZ),
SOA_COLUMN(float, outerRt),
SOA_COLUMN(float, outerPhi),
SOA_COLUMN(float, outerEta),
SOA_COLUMN(float, outerHighEdgeX),
SOA_COLUMN(float, outerHighEdgeY),
SOA_COLUMN(float, outerLowEdgeX),
SOA_COLUMN(float, outerLowEdgeY));

GENERATE_SOA_LAYOUT(MiniDoubletsOccupancySoALayout,
SOA_COLUMN(unsigned int, nMDs),
SOA_COLUMN(unsigned int, totOccupancyMDs));

using MiniDoubletsSoA = MiniDoubletsSoALayout<>;
using MiniDoubletsOccupancySoA = MiniDoubletsOccupancySoALayout<>;

using MiniDoublets = MiniDoubletsSoA::View;
using MiniDoubletsConst = MiniDoubletsSoA::ConstView;
using MiniDoubletsOccupancy = MiniDoubletsOccupancySoA::View;
using MiniDoubletsOccupancyConst = MiniDoubletsOccupancySoA::ConstView;

using MiniDoubletsHostCollection = PortableHostMultiCollection<MiniDoubletsSoA, MiniDoubletsOccupancySoA>;

} // namespace lst

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef RecoTracker_LSTCore_interface_alpaka_MiniDoubletsSoA_h
#define RecoTracker_LSTCore_interface_alpaka_MiniDoubletsSoA_h

#include "DataFormats/Portable/interface/alpaka/PortableCollection.h"

#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
using MiniDoubletsDeviceCollection = PortableCollection2<MiniDoubletsSoA, MiniDoubletsOccupancySoA>;
} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst

#endif
104 changes: 58 additions & 46 deletions RecoTracker/LSTCore/src/alpaka/Event.dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ void Event::resetEventSync() {
}
hitsInGPU_.reset();
hitsBuffers_.reset();
mdsInGPU_.reset();
miniDoubletsBuffers_.reset();
miniDoubletsDC_.reset();
rangesInGPU_.reset();
rangesBuffers_.reset();
segmentsInGPU_.reset();
Expand All @@ -71,7 +70,7 @@ void Event::resetEventSync() {

hitsInCPU_.reset();
rangesInCPU_.reset();
mdsInCPU_.reset();
miniDoubletsHC_.reset();
segmentsInCPU_.reset();
tripletsInCPU_.reset();
quintupletsInCPU_.reset();
Expand Down Expand Up @@ -171,7 +170,7 @@ void Event::addPixelSegmentToEvent(std::vector<unsigned int> const& hitIndices0,
unsigned int mdSize = 2 * size;
uint16_t pixelModuleIndex = pixelMapping_.pixelModuleIndex;

if (!mdsInGPU_) {
if (!miniDoubletsDC_) {
// Create a view for the element nLowerModules_ inside rangesBuffers_->miniDoubletModuleOccupancy
auto dst_view_miniDoubletModuleOccupancy =
alpaka::createSubView(rangesBuffers_->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_);
Expand All @@ -194,11 +193,15 @@ void Event::addPixelSegmentToEvent(std::vector<unsigned int> const& hitIndices0,
*nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules;
unsigned int nTotalMDs = *nTotalMDs_buf_h.data();

mdsInGPU_.emplace();
miniDoubletsBuffers_.emplace(nTotalMDs, nLowerModules_, devAcc_, queue_);
mdsInGPU_->setData(*miniDoubletsBuffers_);
std::array<int, 2> const mds_sizes{{static_cast<int>(nTotalMDs), static_cast<int>(nLowerModules_ + 1)}};
miniDoubletsDC_.emplace(mds_sizes, queue_);

alpaka::memcpy(queue_, miniDoubletsBuffers_->nMemoryLocations_buf, nTotalMDs_buf_h);
auto mdsOccupancy = miniDoubletsDC_->view<MiniDoubletsOccupancySoA>();
auto nMDs_view = alpaka::createView(devAcc_, mdsOccupancy.nMDs(), mdsOccupancy.metadata().size());
auto totOccupancyMDs_view =
alpaka::createView(devAcc_, mdsOccupancy.totOccupancyMDs(), mdsOccupancy.metadata().size());
alpaka::memset(queue_, nMDs_view, 0u);
alpaka::memset(queue_, totOccupancyMDs_view, 0u);
}
if (!segmentsInGPU_) {
// can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously.
Expand All @@ -211,7 +214,7 @@ void Event::addPixelSegmentToEvent(std::vector<unsigned int> const& hitIndices0,
CreateSegmentArrayRanges{},
*modulesBuffers_.data(),
*rangesInGPU_,
*mdsInGPU_);
miniDoubletsDC_->const_view<MiniDoubletsSoA>());

auto nTotalSegments_view = alpaka::createView(cms::alpakatools::host(), &nTotalSegments_, (Idx)1u);

Expand Down Expand Up @@ -264,11 +267,14 @@ void Event::addPixelSegmentToEvent(std::vector<unsigned int> const& hitIndices0,
alpaka::createSubView(segmentsBuffers_->totOccupancySegments_buf, (Idx)1u, (Idx)pixelModuleIndex);
alpaka::memcpy(queue_, dst_view_totOccupancySegments, src_view_size);

auto dst_view_nMDs = alpaka::createSubView(miniDoubletsBuffers_->nMDs_buf, (Idx)1u, (Idx)pixelModuleIndex);
auto mdsOccupancy = miniDoubletsDC_->view<MiniDoubletsOccupancySoA>();
auto nMDs_view = alpaka::createView(devAcc_, mdsOccupancy.nMDs(), (Idx)mdsOccupancy.metadata().size());
auto dst_view_nMDs = alpaka::createSubView(nMDs_view, (Idx)1u, (Idx)pixelModuleIndex);
alpaka::memcpy(queue_, dst_view_nMDs, src_view_mdSize);

auto dst_view_totOccupancyMDs =
alpaka::createSubView(miniDoubletsBuffers_->totOccupancyMDs_buf, (Idx)1u, (Idx)pixelModuleIndex);
auto totOccupancyMDs_view =
alpaka::createView(devAcc_, mdsOccupancy.totOccupancyMDs(), (Idx)mdsOccupancy.metadata().size());
auto dst_view_totOccupancyMDs = alpaka::createSubView(totOccupancyMDs_view, (Idx)1u, (Idx)pixelModuleIndex);
alpaka::memcpy(queue_, dst_view_totOccupancyMDs, src_view_mdSize);

alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory
Expand All @@ -283,7 +289,7 @@ void Event::addPixelSegmentToEvent(std::vector<unsigned int> const& hitIndices0,
*modulesBuffers_.data(),
*rangesInGPU_,
*hitsInGPU_,
*mdsInGPU_,
miniDoubletsDC_->view<MiniDoubletsSoA>(),
*segmentsInGPU_,
hitIndices0_dev.data(),
hitIndices1_dev.data(),
Expand Down Expand Up @@ -317,10 +323,16 @@ void Event::createMiniDoublets() {
*nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules;
unsigned int nTotalMDs = *nTotalMDs_buf_h.data();

if (!mdsInGPU_) {
mdsInGPU_.emplace();
miniDoubletsBuffers_.emplace(nTotalMDs, nLowerModules_, devAcc_, queue_);
mdsInGPU_->setData(*miniDoubletsBuffers_);
if (!miniDoubletsDC_) {
std::array<int, 2> const mds_sizes{{static_cast<int>(nTotalMDs), static_cast<int>(nLowerModules_ + 1)}};
miniDoubletsDC_.emplace(mds_sizes, queue_);

auto mdsOccupancy = miniDoubletsDC_->view<MiniDoubletsOccupancySoA>();
auto nMDs_view = alpaka::createView(devAcc_, mdsOccupancy.nMDs(), mdsOccupancy.metadata().size());
auto totOccupancyMDs_view =
alpaka::createView(devAcc_, mdsOccupancy.totOccupancyMDs(), mdsOccupancy.metadata().size());
alpaka::memset(queue_, nMDs_view, 0u);
alpaka::memset(queue_, totOccupancyMDs_view, 0u);
}

Vec3D const threadsPerBlockCreateMDInGPU{1, 16, 32};
Expand All @@ -333,7 +345,8 @@ void Event::createMiniDoublets() {
CreateMiniDoubletsInGPUv2{},
*modulesBuffers_.data(),
*hitsInGPU_,
*mdsInGPU_,
miniDoubletsDC_->view<MiniDoubletsSoA>(),
miniDoubletsDC_->view<MiniDoubletsOccupancySoA>(),
*rangesInGPU_);

WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv<Vec1D>({1}, {1024}, {1});
Expand All @@ -342,7 +355,7 @@ void Event::createMiniDoublets() {
addMiniDoubletRangesToEventExplicit_workDiv,
AddMiniDoubletRangesToEventExplicit{},
*modulesBuffers_.data(),
*mdsInGPU_,
miniDoubletsDC_->view<MiniDoubletsOccupancySoA>(),
*rangesInGPU_,
*hitsInGPU_);

Expand All @@ -367,7 +380,8 @@ void Event::createSegmentsWithModuleMap() {
createSegmentsInGPUv2_workDiv,
CreateSegmentsInGPUv2{},
*modulesBuffers_.data(),
*mdsInGPU_,
miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
miniDoubletsDC_->const_view<MiniDoubletsOccupancySoA>(),
*segmentsInGPU_,
*rangesInGPU_);

Expand Down Expand Up @@ -453,7 +467,7 @@ void Event::createTriplets() {
createTripletsInGPUv2_workDiv,
CreateTripletsInGPUv2{},
*modulesBuffers_.data(),
*mdsInGPU_,
miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
*segmentsInGPU_,
*tripletsInGPU_,
*rangesInGPU_,
Expand Down Expand Up @@ -572,7 +586,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) {
*pixelTripletsInGPU_,
trackCandidatesDC_->view(),
*segmentsInGPU_,
*mdsInGPU_,
miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
*hitsInGPU_,
*quintupletsInGPU_);

Expand Down Expand Up @@ -707,7 +721,7 @@ void Event::createPixelTriplets() {
CreatePixelTripletsInGPUFromMapv2{},
*modulesBuffers_.data(),
*rangesInGPU_,
*mdsInGPU_,
miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
*segmentsInGPU_,
*tripletsInGPU_,
*pixelTripletsInGPU_,
Expand Down Expand Up @@ -772,7 +786,7 @@ void Event::createQuintuplets() {
createQuintupletsInGPUv2_workDiv,
CreateQuintupletsInGPUv2{},
*modulesBuffers_.data(),
*mdsInGPU_,
miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
*segmentsInGPU_,
*tripletsInGPU_,
*quintupletsInGPU_,
Expand Down Expand Up @@ -907,7 +921,7 @@ void Event::createPixelQuintuplets() {
createPixelQuintupletsInGPUFromMapv2_workDiv,
CreatePixelQuintupletsInGPUFromMapv2{},
*modulesBuffers_.data(),
*mdsInGPU_,
miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
*segmentsInGPU_,
*tripletsInGPU_,
*quintupletsInGPU_,
Expand Down Expand Up @@ -950,7 +964,9 @@ void Event::createPixelQuintuplets() {

void Event::addMiniDoubletsToEventExplicit() {
auto nMDsCPU_buf = allocBufWrapper<unsigned int>(cms::alpakatools::host(), nLowerModules_, queue_);
alpaka::memcpy(queue_, nMDsCPU_buf, miniDoubletsBuffers_->nMDs_buf, nLowerModules_);
auto mdsOccupancy = miniDoubletsDC_->const_view<MiniDoubletsOccupancySoA>();
auto nMDs_view = alpaka::createView(devAcc_, mdsOccupancy.nMDs(), nLowerModules_); // exclude pixel part
alpaka::memcpy(queue_, nMDsCPU_buf, nMDs_view, nLowerModules_);

// FIXME: replace by ES host data
auto module_subdets_buf = allocBufWrapper<short>(cms::alpakatools::host(), nLowerModules_, queue_);
Expand Down Expand Up @@ -1333,28 +1349,24 @@ ObjectRangesBuffer<alpaka_common::DevHost>& Event::getRanges(bool sync) {
return rangesInCPU_.value();
}

MiniDoubletsBuffer<alpaka_common::DevHost>& Event::getMiniDoublets(bool sync) {
if (!mdsInCPU_) {
// Get nMemoryLocations parameter to initialize host based mdsInCPU_
auto nMemHost_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, 1u);
alpaka::memcpy(queue_, nMemHost_buf_h, miniDoubletsBuffers_->nMemoryLocations_buf);
alpaka::wait(queue_); // wait for the value before using

auto const nMemHost = *nMemHost_buf_h.data();
mdsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_);
mdsInCPU_->setData(*mdsInCPU_);

alpaka::memcpy(queue_, mdsInCPU_->nMemoryLocations_buf, miniDoubletsBuffers_->nMemoryLocations_buf);
alpaka::memcpy(queue_, mdsInCPU_->anchorHitIndices_buf, miniDoubletsBuffers_->anchorHitIndices_buf, nMemHost);
alpaka::memcpy(queue_, mdsInCPU_->outerHitIndices_buf, miniDoubletsBuffers_->outerHitIndices_buf, nMemHost);
alpaka::memcpy(queue_, mdsInCPU_->dphichanges_buf, miniDoubletsBuffers_->dphichanges_buf, nMemHost);
alpaka::memcpy(queue_, mdsInCPU_->nMDs_buf, miniDoubletsBuffers_->nMDs_buf);
alpaka::memcpy(queue_, mdsInCPU_->totOccupancyMDs_buf, miniDoubletsBuffers_->totOccupancyMDs_buf);
if (sync)
alpaka::wait(queue_); // host consumers expect filled data
template <typename TSoA, typename TDev>
typename TSoA::ConstView Event::getMiniDoublets(bool sync) {
if constexpr (std::is_same_v<TDev, DevHost>) {
return miniDoubletsDC_->const_view<TSoA>();
} else {
if (!miniDoubletsHC_) {
miniDoubletsHC_.emplace(
cms::alpakatools::CopyToHost<
PortableMultiCollection<TDev, MiniDoubletsSoA, MiniDoubletsOccupancySoA>>::copyAsync(queue_,
*miniDoubletsDC_));
if (sync)
alpaka::wait(queue_); // host consumers expect filled data
}
return miniDoubletsHC_->const_view<TSoA>();
}
return mdsInCPU_.value();
}
template MiniDoubletsConst Event::getMiniDoublets<MiniDoubletsSoA>(bool);
template MiniDoubletsOccupancyConst Event::getMiniDoublets<MiniDoubletsOccupancySoA>(bool);

SegmentsBuffer<alpaka_common::DevHost>& Event::getSegments(bool sync) {
if (!segmentsInCPU_) {
Expand Down
8 changes: 4 additions & 4 deletions RecoTracker/LSTCore/src/alpaka/Event.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
std::optional<ObjectRangesBuffer<Device>> rangesBuffers_;
std::optional<Hits> hitsInGPU_;
std::optional<HitsBuffer<Device>> hitsBuffers_;
std::optional<MiniDoublets> mdsInGPU_;
std::optional<MiniDoubletsBuffer<Device>> miniDoubletsBuffers_;
std::optional<MiniDoubletsDeviceCollection> miniDoubletsDC_;
std::optional<Segments> segmentsInGPU_;
std::optional<SegmentsBuffer<Device>> segmentsBuffers_;
std::optional<Triplets> tripletsInGPU_;
Expand All @@ -64,7 +63,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
//CPU interface stuff
std::optional<ObjectRangesBuffer<DevHost>> rangesInCPU_;
std::optional<HitsBuffer<DevHost>> hitsInCPU_;
std::optional<MiniDoubletsBuffer<DevHost>> mdsInCPU_;
std::optional<MiniDoubletsHostCollection> miniDoubletsHC_;
std::optional<SegmentsBuffer<DevHost>> segmentsInCPU_;
std::optional<TripletsBuffer<DevHost>> tripletsInCPU_;
std::optional<TrackCandidatesHostCollection> trackCandidatesHC_;
Expand Down Expand Up @@ -183,7 +182,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
HitsBuffer<DevHost>& getHits(bool sync = true);
HitsBuffer<DevHost>& getHitsInCMSSW(bool sync = true);
ObjectRangesBuffer<DevHost>& getRanges(bool sync = true);
MiniDoubletsBuffer<DevHost>& getMiniDoublets(bool sync = true);
template <typename TSoA, typename TDev = Device>
typename TSoA::ConstView getMiniDoublets(bool sync = true);
SegmentsBuffer<DevHost>& getSegments(bool sync = true);
TripletsBuffer<DevHost>& getTriplets(bool sync = true);
QuintupletsBuffer<DevHost>& getQuintuplets(bool sync = true);
Expand Down
Loading

0 comments on commit fc3fc6a

Please sign in to comment.