diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index f0c914a41d5e3..659591b836ec9 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -10,8 +10,8 @@ using Acc3D = ALPAKA_ACCELERATOR_NAMESPACE::Acc3D; using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; void Event::initSync(bool verbose) { - alpaka::wait(queue); // other calls can be asynchronous - addObjects = verbose; + alpaka::wait(queue_); // other calls can be asynchronous + addObjects_ = verbose; //reset the arrays for (int i = 0; i < 6; i++) { @@ -33,7 +33,7 @@ void Event::initSync(bool verbose) { } void Event::resetEventSync() { - alpaka::wait(queue); // synchronize to reset consistently + alpaka::wait(queue_); // synchronize to reset consistently //reset the arrays for (int i = 0; i < 6; i++) { n_hits_by_layer_barrel_[i] = 0; @@ -51,35 +51,35 @@ void Event::resetEventSync() { n_quintuplets_by_layer_endcap_[i] = 0; } } - hitsInGPU.reset(); - hitsBuffers.reset(); - mdsInGPU.reset(); - miniDoubletsBuffers.reset(); - rangesInGPU.reset(); - rangesBuffers.reset(); - segmentsInGPU.reset(); - segmentsBuffers.reset(); - tripletsInGPU.reset(); - tripletsBuffers.reset(); - quintupletsInGPU.reset(); - quintupletsBuffers.reset(); - trackCandidatesInGPU.reset(); - trackCandidatesBuffers.reset(); - pixelTripletsInGPU.reset(); - pixelTripletsBuffers.reset(); - pixelQuintupletsInGPU.reset(); - pixelQuintupletsBuffers.reset(); - - hitsInCPU.reset(); - rangesInCPU.reset(); - mdsInCPU.reset(); - segmentsInCPU.reset(); - tripletsInCPU.reset(); - quintupletsInCPU.reset(); - pixelTripletsInCPU.reset(); - pixelQuintupletsInCPU.reset(); - trackCandidatesInCPU.reset(); - modulesInCPU.reset(); + hitsInGPU_.reset(); + hitsBuffers_.reset(); + mdsInGPU_.reset(); + miniDoubletsBuffers_.reset(); + rangesInGPU_.reset(); + rangesBuffers_.reset(); + segmentsInGPU_.reset(); + segmentsBuffers_.reset(); + tripletsInGPU_.reset(); + tripletsBuffers_.reset(); + quintupletsInGPU_.reset(); + quintupletsBuffers_.reset(); + trackCandidatesInGPU_.reset(); + trackCandidatesBuffers_.reset(); + pixelTripletsInGPU_.reset(); + pixelTripletsBuffers_.reset(); + pixelQuintupletsInGPU_.reset(); + pixelQuintupletsBuffers_.reset(); + + hitsInCPU_.reset(); + rangesInCPU_.reset(); + mdsInCPU_.reset(); + segmentsInCPU_.reset(); + tripletsInCPU_.reset(); + quintupletsInCPU_.reset(); + pixelTripletsInCPU_.reset(); + pixelQuintupletsInCPU_.reset(); + trackCandidatesInCPU_.reset(); + modulesInCPU_.reset(); } void Event::addHitToEvent(std::vector const& x, @@ -91,35 +91,35 @@ void Event::addHitToEvent(std::vector const& x, unsigned int nHits = x.size(); // Initialize space on device/host for next event. - if (!hitsInGPU) { - hitsInGPU.emplace(); - hitsBuffers.emplace(nModules_, nHits, devAcc, queue); - hitsInGPU->setData(*hitsBuffers); + if (!hitsInGPU_) { + hitsInGPU_.emplace(); + hitsBuffers_.emplace(nModules_, nHits, devAcc_, queue_); + hitsInGPU_->setData(*hitsBuffers_); } - if (!rangesInGPU) { - rangesInGPU.emplace(); - rangesBuffers.emplace(nModules_, nLowerModules_, devAcc, queue); - rangesInGPU->setData(*rangesBuffers); + if (!rangesInGPU_) { + rangesInGPU_.emplace(); + rangesBuffers_.emplace(nModules_, nLowerModules_, devAcc_, queue_); + rangesInGPU_->setData(*rangesBuffers_); } // Need a view here before transferring to the device. - auto nHits_view = alpaka::createView(devHost, &nHits, (Idx)1u); + auto nHits_view = alpaka::createView(cms::alpakatools::host(), &nHits, (Idx)1u); // Copy the host arrays to the GPU. - alpaka::memcpy(queue, hitsBuffers->xs_buf, x, nHits); - alpaka::memcpy(queue, hitsBuffers->ys_buf, y, nHits); - alpaka::memcpy(queue, hitsBuffers->zs_buf, z, nHits); - alpaka::memcpy(queue, hitsBuffers->detid_buf, detId, nHits); - alpaka::memcpy(queue, hitsBuffers->idxs_buf, idxInNtuple, nHits); - alpaka::memcpy(queue, hitsBuffers->nHits_buf, nHits_view); - alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory + alpaka::memcpy(queue_, hitsBuffers_->xs_buf, x, nHits); + alpaka::memcpy(queue_, hitsBuffers_->ys_buf, y, nHits); + alpaka::memcpy(queue_, hitsBuffers_->zs_buf, z, nHits); + alpaka::memcpy(queue_, hitsBuffers_->detid_buf, detId, nHits); + alpaka::memcpy(queue_, hitsBuffers_->idxs_buf, idxInNtuple, nHits); + alpaka::memcpy(queue_, hitsBuffers_->nHits_buf, nHits_view); + alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock1{1, 1, 256}; Vec3D const blocksPerGrid1{1, 1, max_blocks}; WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, hit_loop_workdiv, HitLoopKernel{}, Endcap, @@ -129,7 +129,7 @@ void Event::addHitToEvent(std::vector const& x, endcapGeometryBuffers_.geoMapDetId_buf.data(), endcapGeometryBuffers_.geoMapPhi_buf.data(), *modulesBuffers_.data(), - *hitsInGPU, + *hitsInGPU_, nHits); Vec3D const threadsPerBlock2{1, 1, 256}; @@ -137,7 +137,7 @@ void Event::addHitToEvent(std::vector const& x, WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); alpaka::exec( - queue, module_ranges_workdiv, ModuleRangesKernel{}, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); + queue_, module_ranges_workdiv, ModuleRangesKernel{}, *modulesBuffers_.data(), *hitsInGPU_, nLowerModules_); } void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, @@ -172,120 +172,120 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, unsigned int mdSize = 2 * size; uint16_t pixelModuleIndex = pixelMapping_.pixelModuleIndex; - if (!mdsInGPU) { - // Create a view for the element nLowerModules_ inside rangesBuffers->miniDoubletModuleOccupancy + if (!mdsInGPU_) { + // Create a view for the element nLowerModules_ inside rangesBuffers_->miniDoubletModuleOccupancy auto dst_view_miniDoubletModuleOccupancy = - alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); + alpaka::createSubView(rangesBuffers_->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); // Create a host buffer for a value to be passed to the device - auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; - alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); + alpaka::memcpy(queue_, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec( - queue, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); + queue_, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU_); - auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); - alpaka::wait(queue); // wait to get the data before manipulation + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); + alpaka::memcpy(queue_, nTotalMDs_buf_h, rangesBuffers_->device_nTotalMDs_buf); + alpaka::wait(queue_); // wait to get the data before manipulation *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); - mdsInGPU.emplace(); - miniDoubletsBuffers.emplace(nTotalMDs, nLowerModules_, devAcc, queue); - mdsInGPU->setData(*miniDoubletsBuffers); + mdsInGPU_.emplace(); + miniDoubletsBuffers_.emplace(nTotalMDs, nLowerModules_, devAcc_, queue_); + mdsInGPU_->setData(*miniDoubletsBuffers_); - alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_buf_h); + alpaka::memcpy(queue_, miniDoubletsBuffers_->nMemoryLocations_buf, nTotalMDs_buf_h); } - if (!segmentsInGPU) { + if (!segmentsInGPU_) { // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); - alpaka::exec(queue, + alpaka::exec(queue_, createSegmentArrayRanges_workDiv, CreateSegmentArrayRanges{}, *modulesBuffers_.data(), - *rangesInGPU, - *mdsInGPU); + *rangesInGPU_, + *mdsInGPU_); - auto nTotalSegments_view = alpaka::createView(devHost, &nTotalSegments_, (Idx)1u); + auto nTotalSegments_view = alpaka::createView(cms::alpakatools::host(), &nTotalSegments_, (Idx)1u); - alpaka::memcpy(queue, nTotalSegments_view, rangesBuffers->device_nTotalSegs_buf); - alpaka::wait(queue); // wait to get the value before manipulation + alpaka::memcpy(queue_, nTotalSegments_view, rangesBuffers_->device_nTotalSegs_buf); + alpaka::wait(queue_); // wait to get the value before manipulation nTotalSegments_ += n_max_pixel_segments_per_module; - segmentsInGPU.emplace(); - segmentsBuffers.emplace(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); - segmentsInGPU->setData(*segmentsBuffers); + segmentsInGPU_.emplace(); + segmentsBuffers_.emplace(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc_, queue_); + segmentsInGPU_->setData(*segmentsBuffers_); - alpaka::memcpy(queue, segmentsBuffers->nMemoryLocations_buf, nTotalSegments_view); + alpaka::memcpy(queue_, segmentsBuffers_->nMemoryLocations_buf, nTotalSegments_view); } - auto hitIndices0_dev = allocBufWrapper(devAcc, size, queue); - auto hitIndices1_dev = allocBufWrapper(devAcc, size, queue); - auto hitIndices2_dev = allocBufWrapper(devAcc, size, queue); - auto hitIndices3_dev = allocBufWrapper(devAcc, size, queue); - auto dPhiChange_dev = allocBufWrapper(devAcc, size, queue); - - alpaka::memcpy(queue, hitIndices0_dev, hitIndices0, size); - alpaka::memcpy(queue, hitIndices1_dev, hitIndices1, size); - alpaka::memcpy(queue, hitIndices2_dev, hitIndices2, size); - alpaka::memcpy(queue, hitIndices3_dev, hitIndices3, size); - alpaka::memcpy(queue, dPhiChange_dev, dPhiChange, size); - - alpaka::memcpy(queue, segmentsBuffers->ptIn_buf, ptIn, size); - alpaka::memcpy(queue, segmentsBuffers->ptErr_buf, ptErr, size); - alpaka::memcpy(queue, segmentsBuffers->px_buf, px, size); - alpaka::memcpy(queue, segmentsBuffers->py_buf, py, size); - alpaka::memcpy(queue, segmentsBuffers->pz_buf, pz, size); - alpaka::memcpy(queue, segmentsBuffers->etaErr_buf, etaErr, size); - alpaka::memcpy(queue, segmentsBuffers->isQuad_buf, isQuad, size); - alpaka::memcpy(queue, segmentsBuffers->eta_buf, eta, size); - alpaka::memcpy(queue, segmentsBuffers->phi_buf, phi, size); - alpaka::memcpy(queue, segmentsBuffers->charge_buf, charge, size); - alpaka::memcpy(queue, segmentsBuffers->seedIdx_buf, seedIdx, size); - alpaka::memcpy(queue, segmentsBuffers->superbin_buf, superbin, size); - alpaka::memcpy(queue, segmentsBuffers->pixelType_buf, pixelType, size); + auto hitIndices0_dev = allocBufWrapper(devAcc_, size, queue_); + auto hitIndices1_dev = allocBufWrapper(devAcc_, size, queue_); + auto hitIndices2_dev = allocBufWrapper(devAcc_, size, queue_); + auto hitIndices3_dev = allocBufWrapper(devAcc_, size, queue_); + auto dPhiChange_dev = allocBufWrapper(devAcc_, size, queue_); + + alpaka::memcpy(queue_, hitIndices0_dev, hitIndices0, size); + alpaka::memcpy(queue_, hitIndices1_dev, hitIndices1, size); + alpaka::memcpy(queue_, hitIndices2_dev, hitIndices2, size); + alpaka::memcpy(queue_, hitIndices3_dev, hitIndices3, size); + alpaka::memcpy(queue_, dPhiChange_dev, dPhiChange, size); + + alpaka::memcpy(queue_, segmentsBuffers_->ptIn_buf, ptIn, size); + alpaka::memcpy(queue_, segmentsBuffers_->ptErr_buf, ptErr, size); + alpaka::memcpy(queue_, segmentsBuffers_->px_buf, px, size); + alpaka::memcpy(queue_, segmentsBuffers_->py_buf, py, size); + alpaka::memcpy(queue_, segmentsBuffers_->pz_buf, pz, size); + alpaka::memcpy(queue_, segmentsBuffers_->etaErr_buf, etaErr, size); + alpaka::memcpy(queue_, segmentsBuffers_->isQuad_buf, isQuad, size); + alpaka::memcpy(queue_, segmentsBuffers_->eta_buf, eta, size); + alpaka::memcpy(queue_, segmentsBuffers_->phi_buf, phi, size); + alpaka::memcpy(queue_, segmentsBuffers_->charge_buf, charge, size); + alpaka::memcpy(queue_, segmentsBuffers_->seedIdx_buf, seedIdx, size); + alpaka::memcpy(queue_, segmentsBuffers_->superbin_buf, superbin, size); + alpaka::memcpy(queue_, segmentsBuffers_->pixelType_buf, pixelType, size); // Create source views for size and mdSize - auto src_view_size = alpaka::createView(devHost, &size, (Idx)1u); - auto src_view_mdSize = alpaka::createView(devHost, &mdSize, (Idx)1u); + auto src_view_size = alpaka::createView(cms::alpakatools::host(), &size, (Idx)1u); + auto src_view_mdSize = alpaka::createView(cms::alpakatools::host(), &mdSize, (Idx)1u); - auto dst_view_segments = alpaka::createSubView(segmentsBuffers->nSegments_buf, (Idx)1u, (Idx)pixelModuleIndex); - alpaka::memcpy(queue, dst_view_segments, src_view_size); + auto dst_view_segments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)pixelModuleIndex); + alpaka::memcpy(queue_, dst_view_segments, src_view_size); auto dst_view_totOccupancySegments = - alpaka::createSubView(segmentsBuffers->totOccupancySegments_buf, (Idx)1u, (Idx)pixelModuleIndex); - alpaka::memcpy(queue, dst_view_totOccupancySegments, src_view_size); + alpaka::createSubView(segmentsBuffers_->totOccupancySegments_buf, (Idx)1u, (Idx)pixelModuleIndex); + alpaka::memcpy(queue_, dst_view_totOccupancySegments, src_view_size); - auto dst_view_nMDs = alpaka::createSubView(miniDoubletsBuffers->nMDs_buf, (Idx)1u, (Idx)pixelModuleIndex); - alpaka::memcpy(queue, dst_view_nMDs, src_view_mdSize); + auto dst_view_nMDs = alpaka::createSubView(miniDoubletsBuffers_->nMDs_buf, (Idx)1u, (Idx)pixelModuleIndex); + alpaka::memcpy(queue_, dst_view_nMDs, src_view_mdSize); auto dst_view_totOccupancyMDs = - alpaka::createSubView(miniDoubletsBuffers->totOccupancyMDs_buf, (Idx)1u, (Idx)pixelModuleIndex); - alpaka::memcpy(queue, dst_view_totOccupancyMDs, src_view_mdSize); + alpaka::createSubView(miniDoubletsBuffers_->totOccupancyMDs_buf, (Idx)1u, (Idx)pixelModuleIndex); + alpaka::memcpy(queue_, dst_view_totOccupancyMDs, src_view_mdSize); - alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory + alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock{1, 1, 256}; Vec3D const blocksPerGrid{1, 1, max_blocks}; WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, addPixelSegmentToEvent_workdiv, AddPixelSegmentToEventKernel{}, *modulesBuffers_.data(), - *rangesInGPU, - *hitsInGPU, - *mdsInGPU, - *segmentsInGPU, + *rangesInGPU_, + *hitsInGPU_, + *mdsInGPU_, + *segmentsInGPU_, hitIndices0_dev.data(), hitIndices1_dev.data(), hitIndices2_dev.data(), @@ -296,32 +296,32 @@ void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, } void Event::createMiniDoublets() { - // Create a view for the element nLowerModules_ inside rangesBuffers->miniDoubletModuleOccupancy + // Create a view for the element nLowerModules_ inside rangesBuffers_->miniDoubletModuleOccupancy auto dst_view_miniDoubletModuleOccupancy = - alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); + alpaka::createSubView(rangesBuffers_->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); // Create a host buffer for a value to be passed to the device - auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; - alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); + alpaka::memcpy(queue_, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec( - queue, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); + queue_, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU_); - auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); - alpaka::wait(queue); // wait to get the data before manipulation + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); + alpaka::memcpy(queue_, nTotalMDs_buf_h, rangesBuffers_->device_nTotalMDs_buf); + alpaka::wait(queue_); // wait to get the data before manipulation *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); - if (!mdsInGPU) { - mdsInGPU.emplace(); - miniDoubletsBuffers.emplace(nTotalMDs, nLowerModules_, devAcc, queue); - mdsInGPU->setData(*miniDoubletsBuffers); + if (!mdsInGPU_) { + mdsInGPU_.emplace(); + miniDoubletsBuffers_.emplace(nTotalMDs, nLowerModules_, devAcc_, queue_); + mdsInGPU_->setData(*miniDoubletsBuffers_); } Vec3D const threadsPerBlockCreateMDInGPU{1, 16, 32}; @@ -329,34 +329,34 @@ void Event::createMiniDoublets() { WorkDiv3D const createMiniDoubletsInGPUv2_workDiv = createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, createMiniDoubletsInGPUv2_workDiv, CreateMiniDoubletsInGPUv2{}, *modulesBuffers_.data(), - *hitsInGPU, - *mdsInGPU, - *rangesInGPU); + *hitsInGPU_, + *mdsInGPU_, + *rangesInGPU_); WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); - alpaka::exec(queue, + alpaka::exec(queue_, addMiniDoubletRangesToEventExplicit_workDiv, AddMiniDoubletRangesToEventExplicit{}, *modulesBuffers_.data(), - *mdsInGPU, - *rangesInGPU, - *hitsInGPU); + *mdsInGPU_, + *rangesInGPU_, + *hitsInGPU_); - if (addObjects) { + if (addObjects_) { addMiniDoubletsToEventExplicit(); } } void Event::createSegmentsWithModuleMap() { - if (!segmentsInGPU) { - segmentsInGPU.emplace(); - segmentsBuffers.emplace(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); - segmentsInGPU->setData(*segmentsBuffers); + if (!segmentsInGPU_) { + segmentsInGPU_.emplace(); + segmentsBuffers_.emplace(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc_, queue_); + segmentsInGPU_->setData(*segmentsBuffers_); } Vec3D const threadsPerBlockCreateSeg{1, 1, 64}; @@ -364,71 +364,71 @@ void Event::createSegmentsWithModuleMap() { WorkDiv3D const createSegmentsInGPUv2_workDiv = createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, createSegmentsInGPUv2_workDiv, CreateSegmentsInGPUv2{}, *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *rangesInGPU); + *mdsInGPU_, + *segmentsInGPU_, + *rangesInGPU_); WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); - alpaka::exec(queue, + alpaka::exec(queue_, addSegmentRangesToEventExplicit_workDiv, AddSegmentRangesToEventExplicit{}, *modulesBuffers_.data(), - *segmentsInGPU, - *rangesInGPU); + *segmentsInGPU_, + *rangesInGPU_); - if (addObjects) { + if (addObjects_) { addSegmentsToEventExplicit(); } } void Event::createTriplets() { - if (!tripletsInGPU) { + if (!tripletsInGPU_) { WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); - alpaka::exec(queue, + alpaka::exec(queue_, createTripletArrayRanges_workDiv, CreateTripletArrayRanges{}, *modulesBuffers_.data(), - *rangesInGPU, - *segmentsInGPU); + *rangesInGPU_, + *segmentsInGPU_); // TODO: Why are we pulling this back down only to put it back on the device in a new struct? - auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); - alpaka::memcpy(queue, maxTriplets_buf_h, rangesBuffers->device_nTotalTrips_buf); - alpaka::wait(queue); // wait to get the value before using it + alpaka::memcpy(queue_, maxTriplets_buf_h, rangesBuffers_->device_nTotalTrips_buf); + alpaka::wait(queue_); // wait to get the value before using it - tripletsInGPU.emplace(); - tripletsBuffers.emplace(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); - tripletsInGPU->setData(*tripletsBuffers); + tripletsInGPU_.emplace(); + tripletsBuffers_.emplace(*maxTriplets_buf_h.data(), nLowerModules_, devAcc_, queue_); + tripletsInGPU_->setData(*tripletsBuffers_); - alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf_h); + alpaka::memcpy(queue_, tripletsBuffers_->nMemoryLocations_buf, maxTriplets_buf_h); } uint16_t nonZeroModules = 0; unsigned int max_InnerSeg = 0; // Allocate and copy nSegments from device to host (only nLowerModules in OT, not the +1 with pLSs) - auto nSegments_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); - alpaka::memcpy(queue, nSegments_buf_h, segmentsBuffers->nSegments_buf, nLowerModules_); + auto nSegments_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + alpaka::memcpy(queue_, nSegments_buf_h, segmentsBuffers_->nSegments_buf, nLowerModules_); // ... same for module_nConnectedModules // FIXME: replace by ES host data - auto module_nConnectedModules_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); - alpaka::memcpy(queue, module_nConnectedModules_buf_h, modulesBuffers_.nConnectedModules_buf, nLowerModules_); + auto module_nConnectedModules_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + alpaka::memcpy(queue_, module_nConnectedModules_buf_h, modulesBuffers_.nConnectedModules_buf, nLowerModules_); - alpaka::wait(queue); // wait for nSegments and module_nConnectedModules before using + alpaka::wait(queue_); // wait for nSegments and module_nConnectedModules before using auto const* nSegments = nSegments_buf_h.data(); auto const* module_nConnectedModules = module_nConnectedModules_buf_h.data(); // Allocate host index and fill it directly - auto index_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); + auto index_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); auto* index = index_buf_h.data(); for (uint16_t innerLowerModuleIndex = 0; innerLowerModuleIndex < nLowerModules_; innerLowerModuleIndex++) { @@ -442,44 +442,44 @@ void Event::createTriplets() { } // Allocate and copy to device index - auto index_gpu_buf = allocBufWrapper(devAcc, nLowerModules_, queue); - alpaka::memcpy(queue, index_gpu_buf, index_buf_h, nonZeroModules); + auto index_gpu_buf = allocBufWrapper(devAcc_, nLowerModules_, queue_); + alpaka::memcpy(queue_, index_gpu_buf, index_buf_h, nonZeroModules); Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; WorkDiv3D const createTripletsInGPUv2_workDiv = createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, createTripletsInGPUv2_workDiv, CreateTripletsInGPUv2{}, *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *rangesInGPU, + *mdsInGPU_, + *segmentsInGPU_, + *tripletsInGPU_, + *rangesInGPU_, index_gpu_buf.data(), nonZeroModules); WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); - alpaka::exec(queue, + alpaka::exec(queue_, addTripletRangesToEventExplicit_workDiv, AddTripletRangesToEventExplicit{}, *modulesBuffers_.data(), - *tripletsInGPU, - *rangesInGPU); + *tripletsInGPU_, + *rangesInGPU_); - if (addObjects) { + if (addObjects_) { addTripletsToEventExplicit(); } } void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { - if (!trackCandidatesInGPU) { - trackCandidatesInGPU.emplace(); - trackCandidatesBuffers.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc, queue); - trackCandidatesInGPU->setData(*trackCandidatesBuffers); + if (!trackCandidatesInGPU_) { + trackCandidatesInGPU_.emplace(); + trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_); + trackCandidatesInGPU_->setData(*trackCandidatesBuffers_); } Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; @@ -487,30 +487,30 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { WorkDiv3D const crossCleanpT3_workDiv = createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, crossCleanpT3_workDiv, CrossCleanpT3{}, *modulesBuffers_.data(), - *rangesInGPU, - *pixelTripletsInGPU, - *segmentsInGPU, - *pixelQuintupletsInGPU); + *rangesInGPU_, + *pixelTripletsInGPU_, + *segmentsInGPU_, + *pixelQuintupletsInGPU_); WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); - alpaka::exec(queue, + alpaka::exec(queue_, addpT3asTrackCandidatesInGPU_workDiv, AddpT3asTrackCandidatesInGPU{}, nLowerModules_, - *pixelTripletsInGPU, - *trackCandidatesInGPU, - *segmentsInGPU, - *rangesInGPU); + *pixelTripletsInGPU_, + *trackCandidatesInGPU_, + *segmentsInGPU_, + *rangesInGPU_); // Pull nEligibleT5Modules from the device. - auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nEligibleModules_buf_h, rangesBuffers->nEligibleT5Modules_buf); - alpaka::wait(queue); // wait to get the value before using + auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nEligibleModules_buf_h, rangesBuffers_->nEligibleT5Modules_buf); + alpaka::wait(queue_); // wait to get the value before using auto const nEligibleModules = *nEligibleModules_buf_h.data(); Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; @@ -518,38 +518,38 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, removeDupQuintupletsInGPUBeforeTC_workDiv, RemoveDupQuintupletsInGPUBeforeTC{}, - *quintupletsInGPU, - *rangesInGPU); + *quintupletsInGPU_, + *rangesInGPU_); Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks}; WorkDiv3D const crossCleanT5_workDiv = createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, crossCleanT5_workDiv, CrossCleanT5{}, *modulesBuffers_.data(), - *quintupletsInGPU, - *pixelQuintupletsInGPU, - *pixelTripletsInGPU, - *rangesInGPU); + *quintupletsInGPU_, + *pixelQuintupletsInGPU_, + *pixelTripletsInGPU_, + *rangesInGPU_); Vec3D const threadsPerBlock_addT5asTrackCandidateInGPU{1, 8, 128}; Vec3D const blocksPerGrid_addT5asTrackCandidateInGPU{1, 8, 10}; WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = createWorkDiv( blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, addT5asTrackCandidateInGPU_workDiv, AddT5asTrackCandidateInGPU{}, nLowerModules_, - *quintupletsInGPU, - *trackCandidatesInGPU, - *rangesInGPU); + *quintupletsInGPU_, + *trackCandidatesInGPU_, + *rangesInGPU_); if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; @@ -557,7 +557,7 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { WorkDiv3D const checkHitspLS_workDiv = createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - alpaka::exec(queue, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, true); + alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU_, true); } Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; @@ -565,41 +565,41 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { WorkDiv3D const crossCleanpLS_workDiv = createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, crossCleanpLS_workDiv, CrossCleanpLS{}, *modulesBuffers_.data(), - *rangesInGPU, - *pixelTripletsInGPU, - *trackCandidatesInGPU, - *segmentsInGPU, - *mdsInGPU, - *hitsInGPU, - *quintupletsInGPU); + *rangesInGPU_, + *pixelTripletsInGPU_, + *trackCandidatesInGPU_, + *segmentsInGPU_, + *mdsInGPU_, + *hitsInGPU_, + *quintupletsInGPU_); Vec3D const threadsPerBlock_addpLSasTrackCandidateInGPU{1, 1, 384}; Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, max_blocks}; WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = createWorkDiv( blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, addpLSasTrackCandidateInGPU_workDiv, AddpLSasTrackCandidateInGPU{}, nLowerModules_, - *trackCandidatesInGPU, - *segmentsInGPU, + *trackCandidatesInGPU_, + *segmentsInGPU_, tc_pls_triplets); // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached - auto nTrackCanpT5Host_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCanpT3Host_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCanpLSHost_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCanT5Host_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nTrackCanpT5Host_buf, trackCandidatesBuffers->nTrackCandidatespT5_buf); - alpaka::memcpy(queue, nTrackCanpT3Host_buf, trackCandidatesBuffers->nTrackCandidatespT3_buf); - alpaka::memcpy(queue, nTrackCanpLSHost_buf, trackCandidatesBuffers->nTrackCandidatespLS_buf); - alpaka::memcpy(queue, nTrackCanT5Host_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); // wait to get the values before using them + auto nTrackCanpT5Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + auto nTrackCanpT3Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + auto nTrackCanpLSHost_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + auto nTrackCanT5Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + alpaka::memcpy(queue_, nTrackCanpT5Host_buf, trackCandidatesBuffers_->nTrackCandidatespT5_buf); + alpaka::memcpy(queue_, nTrackCanpT3Host_buf, trackCandidatesBuffers_->nTrackCandidatespT3_buf); + alpaka::memcpy(queue_, nTrackCanpLSHost_buf, trackCandidatesBuffers_->nTrackCandidatespLS_buf); + alpaka::memcpy(queue_, nTrackCanT5Host_buf, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::wait(queue_); // wait to get the values before using them auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data(); auto nTrackCandidatespT3 = *nTrackCanpT3Host_buf.data(); @@ -617,33 +617,33 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { } void Event::createPixelTriplets() { - if (!pixelTripletsInGPU) { - pixelTripletsInGPU.emplace(); - pixelTripletsBuffers.emplace(n_max_pixel_triplets, devAcc, queue); - pixelTripletsInGPU->setData(*pixelTripletsBuffers); + if (!pixelTripletsInGPU_) { + pixelTripletsInGPU_.emplace(); + pixelTripletsBuffers_.emplace(n_max_pixel_triplets, devAcc_, queue_); + pixelTripletsInGPU_->setData(*pixelTripletsBuffers_); } - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); + auto pixelTypes_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); - alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); - alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); + alpaka::memcpy(queue_, superbins_buf, segmentsBuffers_->superbin_buf); + alpaka::memcpy(queue_, pixelTypes_buf, segmentsBuffers_->pixelType_buf); auto const* superbins = superbins_buf.data(); auto const* pixelTypes = pixelTypes_buf.data(); unsigned int nInnerSegments; - auto nInnerSegments_src_view = alpaka::createView(devHost, &nInnerSegments, (size_t)1u); + auto nInnerSegments_src_view = alpaka::createView(cms::alpakatools::host(), &nInnerSegments, (size_t)1u); // Create a sub-view for the device buffer - auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); + auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); - alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); - alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using + alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); + alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using - auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelSize_host_buf = allocBufWrapper(cms::alpakatools::host(), nInnerSegments, queue_); + auto connectedPixelIndex_host_buf = allocBufWrapper(cms::alpakatools::host(), nInnerSegments, queue_); + auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc_, nInnerSegments, queue_); + auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc_, nInnerSegments, queue_); unsigned int* connectedPixelSize_host = connectedPixelSize_host_buf.data(); unsigned int* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); @@ -691,32 +691,32 @@ void Event::createPixelTriplets() { } } - alpaka::memcpy(queue, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); - alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); + alpaka::memcpy(queue_, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); + alpaka::memcpy(queue_, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); Vec3D const threadsPerBlock{1, 4, 32}; Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; WorkDiv3D const createPixelTripletsInGPUFromMapv2_workDiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, createPixelTripletsInGPUFromMapv2_workDiv, CreatePixelTripletsInGPUFromMapv2{}, *modulesBuffers_.data(), - *rangesInGPU, - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *pixelTripletsInGPU, + *rangesInGPU_, + *mdsInGPU_, + *segmentsInGPU_, + *tripletsInGPU_, + *pixelTripletsInGPU_, connectedPixelSize_dev_buf.data(), connectedPixelIndex_dev_buf.data(), nInnerSegments); #ifdef WARNINGS - auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); + auto nPixelTriplets_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); - alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); // wait to get the value before using it + alpaka::memcpy(queue_, nPixelTriplets_buf, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::wait(queue_); // wait to get the value before using it std::cout << "number of pixel triplets = " << *nPixelTriplets_buf.data() << std::endl; #endif @@ -729,35 +729,35 @@ void Event::createPixelTriplets() { createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); alpaka::exec( - queue, removeDupPixelTripletsInGPUFromMap_workDiv, RemoveDupPixelTripletsInGPUFromMap{}, *pixelTripletsInGPU); + queue_, removeDupPixelTripletsInGPUFromMap_workDiv, RemoveDupPixelTripletsInGPUFromMap{}, *pixelTripletsInGPU_); } void Event::createQuintuplets() { WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = createWorkDiv({1}, {1024}, {1}); - alpaka::exec(queue, + alpaka::exec(queue_, createEligibleModulesListForQuintupletsGPU_workDiv, CreateEligibleModulesListForQuintupletsGPU{}, *modulesBuffers_.data(), - *tripletsInGPU, - *rangesInGPU); + *tripletsInGPU_, + *rangesInGPU_); - auto nEligibleT5Modules_buf = allocBufWrapper(devHost, 1, queue); - auto nTotalQuintuplets_buf = allocBufWrapper(devHost, 1, queue); + auto nEligibleT5Modules_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + auto nTotalQuintuplets_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); - alpaka::memcpy(queue, nEligibleT5Modules_buf, rangesBuffers->nEligibleT5Modules_buf); - alpaka::memcpy(queue, nTotalQuintuplets_buf, rangesBuffers->device_nTotalQuints_buf); - alpaka::wait(queue); // wait for the values before using them + alpaka::memcpy(queue_, nEligibleT5Modules_buf, rangesBuffers_->nEligibleT5Modules_buf); + alpaka::memcpy(queue_, nTotalQuintuplets_buf, rangesBuffers_->device_nTotalQuints_buf); + alpaka::wait(queue_); // wait for the values before using them auto nEligibleT5Modules = *nEligibleT5Modules_buf.data(); auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); - if (!quintupletsInGPU) { - quintupletsInGPU.emplace(); - quintupletsBuffers.emplace(nTotalQuintuplets, nLowerModules_, devAcc, queue); - quintupletsInGPU->setData(*quintupletsBuffers); + if (!quintupletsInGPU_) { + quintupletsInGPU_.emplace(); + quintupletsBuffers_.emplace(nTotalQuintuplets, nLowerModules_, devAcc_, queue_); + quintupletsInGPU_->setData(*quintupletsBuffers_); - alpaka::memcpy(queue, quintupletsBuffers->nMemoryLocations_buf, nTotalQuintuplets_buf); + alpaka::memcpy(queue_, quintupletsBuffers_->nMemoryLocations_buf, nTotalQuintuplets_buf); } Vec3D const threadsPerBlockQuints{1, 8, 32}; @@ -765,15 +765,15 @@ void Event::createQuintuplets() { WorkDiv3D const createQuintupletsInGPUv2_workDiv = createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, createQuintupletsInGPUv2_workDiv, CreateQuintupletsInGPUv2{}, *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *quintupletsInGPU, - *rangesInGPU, + *mdsInGPU_, + *segmentsInGPU_, + *tripletsInGPU_, + *quintupletsInGPU_, + *rangesInGPU_, nEligibleT5Modules); Vec3D const threadsPerBlockDupQuint{1, 16, 16}; @@ -781,23 +781,23 @@ void Event::createQuintuplets() { WorkDiv3D const removeDupQuintupletsInGPUAfterBuild_workDiv = createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, removeDupQuintupletsInGPUAfterBuild_workDiv, RemoveDupQuintupletsInGPUAfterBuild{}, *modulesBuffers_.data(), - *quintupletsInGPU, - *rangesInGPU); + *quintupletsInGPU_, + *rangesInGPU_); WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); - alpaka::exec(queue, + alpaka::exec(queue_, addQuintupletRangesToEventExplicit_workDiv, AddQuintupletRangesToEventExplicit{}, *modulesBuffers_.data(), - *quintupletsInGPU, - *rangesInGPU); + *quintupletsInGPU_, + *rangesInGPU_); - if (addObjects) { + if (addObjects_) { addQuintupletsToEventExplicit(); } } @@ -809,43 +809,43 @@ void Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { WorkDiv3D const checkHitspLS_workDiv = createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - alpaka::exec(queue, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, false); + alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU_, false); } } void Event::createPixelQuintuplets() { - if (!pixelQuintupletsInGPU) { - pixelQuintupletsInGPU.emplace(); - pixelQuintupletsBuffers.emplace(n_max_pixel_quintuplets, devAcc, queue); - pixelQuintupletsInGPU->setData(*pixelQuintupletsBuffers); + if (!pixelQuintupletsInGPU_) { + pixelQuintupletsInGPU_.emplace(); + pixelQuintupletsBuffers_.emplace(n_max_pixel_quintuplets, devAcc_, queue_); + pixelQuintupletsInGPU_->setData(*pixelQuintupletsBuffers_); } - if (!trackCandidatesInGPU) { - trackCandidatesInGPU.emplace(); - trackCandidatesBuffers.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc, queue); - trackCandidatesInGPU->setData(*trackCandidatesBuffers); + if (!trackCandidatesInGPU_) { + trackCandidatesInGPU_.emplace(); + trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_); + trackCandidatesInGPU_->setData(*trackCandidatesBuffers_); } - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); + auto pixelTypes_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); - alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); - alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); + alpaka::memcpy(queue_, superbins_buf, segmentsBuffers_->superbin_buf); + alpaka::memcpy(queue_, pixelTypes_buf, segmentsBuffers_->pixelType_buf); auto const* superbins = superbins_buf.data(); auto const* pixelTypes = pixelTypes_buf.data(); unsigned int nInnerSegments; - auto nInnerSegments_src_view = alpaka::createView(devHost, &nInnerSegments, (size_t)1u); + auto nInnerSegments_src_view = alpaka::createView(cms::alpakatools::host(), &nInnerSegments, (size_t)1u); // Create a sub-view for the device buffer - auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); + auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); - alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); - alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using + alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); + alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using - auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelSize_host_buf = allocBufWrapper(cms::alpakatools::host(), nInnerSegments, queue_); + auto connectedPixelIndex_host_buf = allocBufWrapper(cms::alpakatools::host(), nInnerSegments, queue_); + auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc_, nInnerSegments, queue_); + auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc_, nInnerSegments, queue_); auto* connectedPixelSize_host = connectedPixelSize_host_buf.data(); auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); @@ -892,74 +892,74 @@ void Event::createPixelQuintuplets() { } } - alpaka::memcpy(queue, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); - alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); + alpaka::memcpy(queue_, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); + alpaka::memcpy(queue_, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; WorkDiv3D const createPixelQuintupletsInGPUFromMapv2_workDiv = createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, createPixelQuintupletsInGPUFromMapv2_workDiv, CreatePixelQuintupletsInGPUFromMapv2{}, *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *quintupletsInGPU, - *pixelQuintupletsInGPU, + *mdsInGPU_, + *segmentsInGPU_, + *tripletsInGPU_, + *quintupletsInGPU_, + *pixelQuintupletsInGPU_, connectedPixelSize_dev_buf.data(), connectedPixelIndex_dev_buf.data(), nInnerSegments, - *rangesInGPU); + *rangesInGPU_); Vec3D const threadsPerBlockDupPix{1, 16, 16}; Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; WorkDiv3D const removeDupPixelQuintupletsInGPUFromMap_workDiv = createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); - alpaka::exec(queue, + alpaka::exec(queue_, removeDupPixelQuintupletsInGPUFromMap_workDiv, RemoveDupPixelQuintupletsInGPUFromMap{}, - *pixelQuintupletsInGPU); + *pixelQuintupletsInGPU_); WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = createWorkDiv({1}, {256}, {1}); - alpaka::exec(queue, + alpaka::exec(queue_, addpT5asTrackCandidateInGPU_workDiv, AddpT5asTrackCandidateInGPU{}, nLowerModules_, - *pixelQuintupletsInGPU, - *trackCandidatesInGPU, - *segmentsInGPU, - *rangesInGPU); + *pixelQuintupletsInGPU_, + *trackCandidatesInGPU_, + *segmentsInGPU_, + *rangesInGPU_); #ifdef WARNINGS - auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); + auto nPixelQuintuplets_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); - alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); // wait to get the value before using it + alpaka::memcpy(queue_, nPixelQuintuplets_buf, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::wait(queue_); // wait to get the value before using it std::cout << "number of pixel quintuplets = " << *nPixelQuintuplets_buf.data() << std::endl; #endif } void Event::addMiniDoubletsToEventExplicit() { - auto nMDsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, nMDsCPU_buf, miniDoubletsBuffers->nMDs_buf, nLowerModules_); + auto nMDsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, nMDsCPU_buf, miniDoubletsBuffers_->nMDs_buf, nLowerModules_); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); + auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - auto module_hitRanges_buf = allocBufWrapper(devHost, nLowerModules_ * 2, queue); - alpaka::memcpy(queue, module_hitRanges_buf, hitsBuffers->hitRanges_buf, nLowerModules_ * 2u); + auto module_hitRanges_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_ * 2, queue_); + alpaka::memcpy(queue_, module_hitRanges_buf, hitsBuffers_->hitRanges_buf, nLowerModules_ * 2u); - alpaka::wait(queue); // wait for inputs before using them + alpaka::wait(queue_); // wait for inputs before using them auto const* nMDsCPU = nMDsCPU_buf.data(); auto const* module_subdets = module_subdets_buf.data(); @@ -978,17 +978,17 @@ void Event::addMiniDoubletsToEventExplicit() { } void Event::addSegmentsToEventExplicit() { - auto nSegmentsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, nSegmentsCPU_buf, segmentsBuffers->nSegments_buf, nLowerModules_); + auto nSegmentsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, nSegmentsCPU_buf, segmentsBuffers_->nSegments_buf, nLowerModules_); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); + auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - alpaka::wait(queue); // wait for inputs before using them + alpaka::wait(queue_); // wait for inputs before using them auto const* nSegmentsCPU = nSegmentsCPU_buf.data(); auto const* module_subdets = module_subdets_buf.data(); @@ -1006,20 +1006,20 @@ void Event::addSegmentsToEventExplicit() { } void Event::addQuintupletsToEventExplicit() { - auto nQuintupletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, nQuintupletsCPU_buf, quintupletsBuffers->nQuintuplets_buf); + auto nQuintupletsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, nQuintupletsCPU_buf, quintupletsBuffers_->nQuintuplets_buf); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nModules_, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); + auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nModules_, queue_); + alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - auto module_quintupletModuleIndices_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_quintupletModuleIndices_buf, rangesBuffers->quintupletModuleIndices_buf); + auto module_quintupletModuleIndices_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_quintupletModuleIndices_buf, rangesBuffers_->quintupletModuleIndices_buf); - alpaka::wait(queue); // wait for inputs before using them + alpaka::wait(queue_); // wait for inputs before using them auto const* nQuintupletsCPU = nQuintupletsCPU_buf.data(); auto const* module_subdets = module_subdets_buf.data(); @@ -1038,17 +1038,17 @@ void Event::addQuintupletsToEventExplicit() { } void Event::addTripletsToEventExplicit() { - auto nTripletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, nTripletsCPU_buf, tripletsBuffers->nTriplets_buf); + auto nTripletsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, nTripletsCPU_buf, tripletsBuffers_->nTriplets_buf); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); + auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - alpaka::wait(queue); // wait for inputs before using them + alpaka::wait(queue_); // wait for inputs before using them auto const* nTripletsCPU = nTripletsCPU_buf.data(); auto const* module_subdets = module_subdets_buf.data(); @@ -1162,19 +1162,19 @@ unsigned int Event::getNumberOfTripletsByLayerBarrel(unsigned int layer) { retur unsigned int Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { return n_triplets_by_layer_endcap_[layer]; } int Event::getNumberOfPixelTriplets() { - auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); + alpaka::memcpy(queue_, nPixelTriplets_buf_h, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::wait(queue_); return *nPixelTriplets_buf_h.data(); } int Event::getNumberOfPixelQuintuplets() { - auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); + alpaka::memcpy(queue_, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::wait(queue_); return *nPixelQuintuplets_buf_h.data(); } @@ -1207,403 +1207,417 @@ unsigned int Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { } int Event::getNumberOfTrackCandidates() { - auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); - alpaka::wait(queue); + alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); + alpaka::wait(queue_); return *nTrackCandidates_buf_h.data(); } int Event::getNumberOfPT5TrackCandidates() { - auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers->nTrackCandidatespT5_buf); - alpaka::wait(queue); + alpaka::memcpy(queue_, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers_->nTrackCandidatespT5_buf); + alpaka::wait(queue_); return *nTrackCandidatesPT5_buf_h.data(); } int Event::getNumberOfPT3TrackCandidates() { - auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers->nTrackCandidatespT3_buf); - alpaka::wait(queue); + alpaka::memcpy(queue_, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers_->nTrackCandidatespT3_buf); + alpaka::wait(queue_); return *nTrackCandidatesPT3_buf_h.data(); } int Event::getNumberOfPLSTrackCandidates() { - auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers->nTrackCandidatespLS_buf); - alpaka::wait(queue); + alpaka::memcpy(queue_, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers_->nTrackCandidatespLS_buf); + alpaka::wait(queue_); return *nTrackCandidatesPLS_buf_h.data(); } int Event::getNumberOfPixelTrackCandidates() { - auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); - alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); + alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); + alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::wait(queue_); return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); } int Event::getNumberOfT5TrackCandidates() { - auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); - alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); + alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::wait(queue_); return *nTrackCandidatesT5_buf_h.data(); } HitsBuffer& Event::getHits(bool sync) //std::shared_ptr should take care of garbage collection { - if (!hitsInCPU) { - auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); - alpaka::wait(queue); // wait for the value before using + if (!hitsInCPU_) { + auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nHits_buf_h, hitsBuffers_->nHits_buf); + alpaka::wait(queue_); // wait for the value before using auto const nHits = *nHits_buf_h.data(); - hitsInCPU.emplace(nModules_, nHits, devHost, queue); - hitsInCPU->setData(*hitsInCPU); - - alpaka::memcpy(queue, hitsInCPU->nHits_buf, hitsBuffers->nHits_buf); - alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits); - alpaka::memcpy(queue, hitsInCPU->detid_buf, hitsBuffers->detid_buf, nHits); - alpaka::memcpy(queue, hitsInCPU->xs_buf, hitsBuffers->xs_buf, nHits); - alpaka::memcpy(queue, hitsInCPU->ys_buf, hitsBuffers->ys_buf, nHits); - alpaka::memcpy(queue, hitsInCPU->zs_buf, hitsBuffers->zs_buf, nHits); - alpaka::memcpy(queue, hitsInCPU->moduleIndices_buf, hitsBuffers->moduleIndices_buf, nHits); + hitsInCPU_.emplace(nModules_, nHits, cms::alpakatools::host(), queue_); + hitsInCPU_->setData(*hitsInCPU_); + + alpaka::memcpy(queue_, hitsInCPU_->nHits_buf, hitsBuffers_->nHits_buf); + alpaka::memcpy(queue_, hitsInCPU_->idxs_buf, hitsBuffers_->idxs_buf, nHits); + alpaka::memcpy(queue_, hitsInCPU_->detid_buf, hitsBuffers_->detid_buf, nHits); + alpaka::memcpy(queue_, hitsInCPU_->xs_buf, hitsBuffers_->xs_buf, nHits); + alpaka::memcpy(queue_, hitsInCPU_->ys_buf, hitsBuffers_->ys_buf, nHits); + alpaka::memcpy(queue_, hitsInCPU_->zs_buf, hitsBuffers_->zs_buf, nHits); + alpaka::memcpy(queue_, hitsInCPU_->moduleIndices_buf, hitsBuffers_->moduleIndices_buf, nHits); if (sync) - alpaka::wait(queue); // host consumers expect filled data + alpaka::wait(queue_); // host consumers expect filled data } - return hitsInCPU.value(); + return hitsInCPU_.value(); } HitsBuffer& Event::getHitsInCMSSW(bool sync) { - if (!hitsInCPU) { - auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); - alpaka::wait(queue); // wait for the value before using + if (!hitsInCPU_) { + auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nHits_buf_h, hitsBuffers_->nHits_buf); + alpaka::wait(queue_); // wait for the value before using auto const nHits = *nHits_buf_h.data(); - hitsInCPU.emplace(nModules_, nHits, devHost, queue); - hitsInCPU->setData(*hitsInCPU); + hitsInCPU_.emplace(nModules_, nHits, cms::alpakatools::host(), queue_); + hitsInCPU_->setData(*hitsInCPU_); - alpaka::memcpy(queue, hitsInCPU->nHits_buf, hitsBuffers->nHits_buf); - alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits); + alpaka::memcpy(queue_, hitsInCPU_->nHits_buf, hitsBuffers_->nHits_buf); + alpaka::memcpy(queue_, hitsInCPU_->idxs_buf, hitsBuffers_->idxs_buf, nHits); if (sync) - alpaka::wait(queue); // host consumers expect filled data + alpaka::wait(queue_); // host consumers expect filled data } - return hitsInCPU.value(); + return hitsInCPU_.value(); } ObjectRangesBuffer& Event::getRanges(bool sync) { - if (!rangesInCPU) { - rangesInCPU.emplace(nModules_, nLowerModules_, devHost, queue); - rangesInCPU->setData(*rangesInCPU); - - alpaka::memcpy(queue, rangesInCPU->hitRanges_buf, rangesBuffers->hitRanges_buf); - alpaka::memcpy(queue, rangesInCPU->quintupletModuleIndices_buf, rangesBuffers->quintupletModuleIndices_buf); - alpaka::memcpy(queue, rangesInCPU->miniDoubletModuleIndices_buf, rangesBuffers->miniDoubletModuleIndices_buf); - alpaka::memcpy(queue, rangesInCPU->segmentModuleIndices_buf, rangesBuffers->segmentModuleIndices_buf); - alpaka::memcpy(queue, rangesInCPU->tripletModuleIndices_buf, rangesBuffers->tripletModuleIndices_buf); + if (!rangesInCPU_) { + rangesInCPU_.emplace(nModules_, nLowerModules_, cms::alpakatools::host(), queue_); + rangesInCPU_->setData(*rangesInCPU_); + + alpaka::memcpy(queue_, rangesInCPU_->hitRanges_buf, rangesBuffers_->hitRanges_buf); + alpaka::memcpy(queue_, rangesInCPU_->quintupletModuleIndices_buf, rangesBuffers_->quintupletModuleIndices_buf); + alpaka::memcpy(queue_, rangesInCPU_->miniDoubletModuleIndices_buf, rangesBuffers_->miniDoubletModuleIndices_buf); + alpaka::memcpy(queue_, rangesInCPU_->segmentModuleIndices_buf, rangesBuffers_->segmentModuleIndices_buf); + alpaka::memcpy(queue_, rangesInCPU_->tripletModuleIndices_buf, rangesBuffers_->tripletModuleIndices_buf); if (sync) - alpaka::wait(queue); // wait to get completed host data + alpaka::wait(queue_); // wait to get completed host data } - return rangesInCPU.value(); + return rangesInCPU_.value(); } MiniDoubletsBuffer& Event::getMiniDoublets(bool sync) { - if (!mdsInCPU) { - // Get nMemoryLocations parameter to initialize host based mdsInCPU - auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nMemHost_buf_h, miniDoubletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); // wait for the value before using + if (!mdsInCPU_) { + // Get nMemoryLocations parameter to initialize host based mdsInCPU_ + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nMemHost_buf_h, miniDoubletsBuffers_->nMemoryLocations_buf); + alpaka::wait(queue_); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - mdsInCPU.emplace(nMemHost, nLowerModules_, devHost, queue); - mdsInCPU->setData(*mdsInCPU); - - alpaka::memcpy(queue, mdsInCPU->nMemoryLocations_buf, miniDoubletsBuffers->nMemoryLocations_buf); - alpaka::memcpy(queue, mdsInCPU->anchorHitIndices_buf, miniDoubletsBuffers->anchorHitIndices_buf, nMemHost); - alpaka::memcpy(queue, mdsInCPU->outerHitIndices_buf, miniDoubletsBuffers->outerHitIndices_buf, nMemHost); - alpaka::memcpy(queue, mdsInCPU->dphichanges_buf, miniDoubletsBuffers->dphichanges_buf, nMemHost); - alpaka::memcpy(queue, mdsInCPU->nMDs_buf, miniDoubletsBuffers->nMDs_buf); - alpaka::memcpy(queue, mdsInCPU->totOccupancyMDs_buf, miniDoubletsBuffers->totOccupancyMDs_buf); + mdsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_); + mdsInCPU_->setData(*mdsInCPU_); + + alpaka::memcpy(queue_, mdsInCPU_->nMemoryLocations_buf, miniDoubletsBuffers_->nMemoryLocations_buf); + alpaka::memcpy(queue_, mdsInCPU_->anchorHitIndices_buf, miniDoubletsBuffers_->anchorHitIndices_buf, nMemHost); + alpaka::memcpy(queue_, mdsInCPU_->outerHitIndices_buf, miniDoubletsBuffers_->outerHitIndices_buf, nMemHost); + alpaka::memcpy(queue_, mdsInCPU_->dphichanges_buf, miniDoubletsBuffers_->dphichanges_buf, nMemHost); + alpaka::memcpy(queue_, mdsInCPU_->nMDs_buf, miniDoubletsBuffers_->nMDs_buf); + alpaka::memcpy(queue_, mdsInCPU_->totOccupancyMDs_buf, miniDoubletsBuffers_->totOccupancyMDs_buf); if (sync) - alpaka::wait(queue); // host consumers expect filled data + alpaka::wait(queue_); // host consumers expect filled data } - return mdsInCPU.value(); + return mdsInCPU_.value(); } SegmentsBuffer& Event::getSegments(bool sync) { - if (!segmentsInCPU) { - // Get nMemoryLocations parameter to initialize host based segmentsInCPU - auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nMemHost_buf_h, segmentsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); // wait for the value before using + if (!segmentsInCPU_) { + // Get nMemoryLocations parameter to initialize host based segmentsInCPU_ + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nMemHost_buf_h, segmentsBuffers_->nMemoryLocations_buf); + alpaka::wait(queue_); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - segmentsInCPU.emplace(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, devHost, queue); - segmentsInCPU->setData(*segmentsInCPU); - - alpaka::memcpy(queue, segmentsInCPU->nMemoryLocations_buf, segmentsBuffers->nMemoryLocations_buf); - alpaka::memcpy(queue, segmentsInCPU->nSegments_buf, segmentsBuffers->nSegments_buf); - alpaka::memcpy(queue, segmentsInCPU->mdIndices_buf, segmentsBuffers->mdIndices_buf, 2u * nMemHost); - alpaka::memcpy(queue, - segmentsInCPU->innerMiniDoubletAnchorHitIndices_buf, - segmentsBuffers->innerMiniDoubletAnchorHitIndices_buf, + segmentsInCPU_.emplace(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, cms::alpakatools::host(), queue_); + segmentsInCPU_->setData(*segmentsInCPU_); + + alpaka::memcpy(queue_, segmentsInCPU_->nMemoryLocations_buf, segmentsBuffers_->nMemoryLocations_buf); + alpaka::memcpy(queue_, segmentsInCPU_->nSegments_buf, segmentsBuffers_->nSegments_buf); + alpaka::memcpy(queue_, segmentsInCPU_->mdIndices_buf, segmentsBuffers_->mdIndices_buf, 2u * nMemHost); + alpaka::memcpy(queue_, + segmentsInCPU_->innerMiniDoubletAnchorHitIndices_buf, + segmentsBuffers_->innerMiniDoubletAnchorHitIndices_buf, nMemHost); - alpaka::memcpy(queue, - segmentsInCPU->outerMiniDoubletAnchorHitIndices_buf, - segmentsBuffers->outerMiniDoubletAnchorHitIndices_buf, + alpaka::memcpy(queue_, + segmentsInCPU_->outerMiniDoubletAnchorHitIndices_buf, + segmentsBuffers_->outerMiniDoubletAnchorHitIndices_buf, nMemHost); - alpaka::memcpy(queue, segmentsInCPU->totOccupancySegments_buf, segmentsBuffers->totOccupancySegments_buf); - alpaka::memcpy(queue, segmentsInCPU->ptIn_buf, segmentsBuffers->ptIn_buf); - alpaka::memcpy(queue, segmentsInCPU->eta_buf, segmentsBuffers->eta_buf); - alpaka::memcpy(queue, segmentsInCPU->phi_buf, segmentsBuffers->phi_buf); - alpaka::memcpy(queue, segmentsInCPU->seedIdx_buf, segmentsBuffers->seedIdx_buf); - alpaka::memcpy(queue, segmentsInCPU->isDup_buf, segmentsBuffers->isDup_buf); - alpaka::memcpy(queue, segmentsInCPU->isQuad_buf, segmentsBuffers->isQuad_buf); - alpaka::memcpy(queue, segmentsInCPU->score_buf, segmentsBuffers->score_buf); + alpaka::memcpy(queue_, segmentsInCPU_->totOccupancySegments_buf, segmentsBuffers_->totOccupancySegments_buf); + alpaka::memcpy(queue_, segmentsInCPU_->ptIn_buf, segmentsBuffers_->ptIn_buf); + alpaka::memcpy(queue_, segmentsInCPU_->eta_buf, segmentsBuffers_->eta_buf); + alpaka::memcpy(queue_, segmentsInCPU_->phi_buf, segmentsBuffers_->phi_buf); + alpaka::memcpy(queue_, segmentsInCPU_->seedIdx_buf, segmentsBuffers_->seedIdx_buf); + alpaka::memcpy(queue_, segmentsInCPU_->isDup_buf, segmentsBuffers_->isDup_buf); + alpaka::memcpy(queue_, segmentsInCPU_->isQuad_buf, segmentsBuffers_->isQuad_buf); + alpaka::memcpy(queue_, segmentsInCPU_->score_buf, segmentsBuffers_->score_buf); if (sync) - alpaka::wait(queue); // host consumers expect filled data + alpaka::wait(queue_); // host consumers expect filled data } - return segmentsInCPU.value(); + return segmentsInCPU_.value(); } TripletsBuffer& Event::getTriplets(bool sync) { - if (!tripletsInCPU) { - // Get nMemoryLocations parameter to initialize host based tripletsInCPU - auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nMemHost_buf_h, tripletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); // wait for the value before using + if (!tripletsInCPU_) { + // Get nMemoryLocations parameter to initialize host based tripletsInCPU_ + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nMemHost_buf_h, tripletsBuffers_->nMemoryLocations_buf); + alpaka::wait(queue_); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - tripletsInCPU.emplace(nMemHost, nLowerModules_, devHost, queue); - tripletsInCPU->setData(*tripletsInCPU); + tripletsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_); + tripletsInCPU_->setData(*tripletsInCPU_); - alpaka::memcpy(queue, tripletsInCPU->nMemoryLocations_buf, tripletsBuffers->nMemoryLocations_buf); + alpaka::memcpy(queue_, tripletsInCPU_->nMemoryLocations_buf, tripletsBuffers_->nMemoryLocations_buf); #ifdef CUT_VALUE_DEBUG - alpaka::memcpy(queue, tripletsInCPU->zOut_buf, tripletsBuffers->zOut_buf, nMemHost); - alpaka::memcpy(queue, tripletsInCPU->zLo_buf, tripletsBuffers->zLo_buf, nMemHost); - alpaka::memcpy(queue, tripletsInCPU->zHi_buf, tripletsBuffers->zHi_buf, nMemHost); - alpaka::memcpy(queue, tripletsInCPU->zLoPointed_buf, tripletsBuffers->zLoPointed_buf, nMemHost); - alpaka::memcpy(queue, tripletsInCPU->zHiPointed_buf, tripletsBuffers->zHiPointed_buf, nMemHost); - alpaka::memcpy(queue, tripletsInCPU->dPhiCut_buf, tripletsBuffers->dPhiCut_buf, nMemHost); - alpaka::memcpy(queue, tripletsInCPU->betaInCut_buf, tripletsBuffers->betaInCut_buf, nMemHost); - alpaka::memcpy(queue, tripletsInCPU->rtLo_buf, tripletsBuffers->rtLo_buf, nMemHost); - alpaka::memcpy(queue, tripletsInCPU->rtHi_buf, tripletsBuffers->rtHi_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->zOut_buf, tripletsBuffers_->zOut_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->zLo_buf, tripletsBuffers_->zLo_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->zHi_buf, tripletsBuffers_->zHi_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->zLoPointed_buf, tripletsBuffers_->zLoPointed_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->zHiPointed_buf, tripletsBuffers_->zHiPointed_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->dPhiCut_buf, tripletsBuffers_->dPhiCut_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->betaInCut_buf, tripletsBuffers_->betaInCut_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->rtLo_buf, tripletsBuffers_->rtLo_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->rtHi_buf, tripletsBuffers_->rtHi_buf, nMemHost); #endif - alpaka::memcpy(queue, tripletsInCPU->hitIndices_buf, tripletsBuffers->hitIndices_buf, Params_T3::kHits * nMemHost); alpaka::memcpy( - queue, tripletsInCPU->logicalLayers_buf, tripletsBuffers->logicalLayers_buf, Params_T3::kLayers * nMemHost); - alpaka::memcpy(queue, tripletsInCPU->segmentIndices_buf, tripletsBuffers->segmentIndices_buf, 2 * nMemHost); - alpaka::memcpy(queue, tripletsInCPU->betaIn_buf, tripletsBuffers->betaIn_buf, nMemHost); - alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost); - alpaka::memcpy(queue, tripletsInCPU->nTriplets_buf, tripletsBuffers->nTriplets_buf); - alpaka::memcpy(queue, tripletsInCPU->totOccupancyTriplets_buf, tripletsBuffers->totOccupancyTriplets_buf); + queue_, tripletsInCPU_->hitIndices_buf, tripletsBuffers_->hitIndices_buf, Params_T3::kHits * nMemHost); + alpaka::memcpy( + queue_, tripletsInCPU_->logicalLayers_buf, tripletsBuffers_->logicalLayers_buf, Params_T3::kLayers * nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->segmentIndices_buf, tripletsBuffers_->segmentIndices_buf, 2 * nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->betaIn_buf, tripletsBuffers_->betaIn_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->circleRadius_buf, tripletsBuffers_->circleRadius_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->nTriplets_buf, tripletsBuffers_->nTriplets_buf); + alpaka::memcpy(queue_, tripletsInCPU_->totOccupancyTriplets_buf, tripletsBuffers_->totOccupancyTriplets_buf); if (sync) - alpaka::wait(queue); // host consumers expect filled data + alpaka::wait(queue_); // host consumers expect filled data } - return tripletsInCPU.value(); + return tripletsInCPU_.value(); } QuintupletsBuffer& Event::getQuintuplets(bool sync) { - if (!quintupletsInCPU) { - // Get nMemoryLocations parameter to initialize host based quintupletsInCPU - auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nMemHost_buf_h, quintupletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); // wait for the value before using + if (!quintupletsInCPU_) { + // Get nMemoryLocations parameter to initialize host based quintupletsInCPU_ + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nMemHost_buf_h, quintupletsBuffers_->nMemoryLocations_buf); + alpaka::wait(queue_); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - quintupletsInCPU.emplace(nMemHost, nLowerModules_, devHost, queue); - quintupletsInCPU->setData(*quintupletsInCPU); + quintupletsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_); + quintupletsInCPU_->setData(*quintupletsInCPU_); - alpaka::memcpy(queue, quintupletsInCPU->nMemoryLocations_buf, quintupletsBuffers->nMemoryLocations_buf); - alpaka::memcpy(queue, quintupletsInCPU->nQuintuplets_buf, quintupletsBuffers->nQuintuplets_buf); + alpaka::memcpy(queue_, quintupletsInCPU_->nMemoryLocations_buf, quintupletsBuffers_->nMemoryLocations_buf); + alpaka::memcpy(queue_, quintupletsInCPU_->nQuintuplets_buf, quintupletsBuffers_->nQuintuplets_buf); + alpaka::memcpy( + queue_, quintupletsInCPU_->totOccupancyQuintuplets_buf, quintupletsBuffers_->totOccupancyQuintuplets_buf); alpaka::memcpy( - queue, quintupletsInCPU->totOccupancyQuintuplets_buf, quintupletsBuffers->totOccupancyQuintuplets_buf); - alpaka::memcpy(queue, quintupletsInCPU->tripletIndices_buf, quintupletsBuffers->tripletIndices_buf, 2 * nMemHost); - alpaka::memcpy(queue, - quintupletsInCPU->lowerModuleIndices_buf, - quintupletsBuffers->lowerModuleIndices_buf, + queue_, quintupletsInCPU_->tripletIndices_buf, quintupletsBuffers_->tripletIndices_buf, 2 * nMemHost); + alpaka::memcpy(queue_, + quintupletsInCPU_->lowerModuleIndices_buf, + quintupletsBuffers_->lowerModuleIndices_buf, Params_T5::kLayers * nMemHost); - alpaka::memcpy(queue, quintupletsInCPU->innerRadius_buf, quintupletsBuffers->innerRadius_buf, nMemHost); - alpaka::memcpy(queue, quintupletsInCPU->bridgeRadius_buf, quintupletsBuffers->bridgeRadius_buf, nMemHost); - alpaka::memcpy(queue, quintupletsInCPU->outerRadius_buf, quintupletsBuffers->outerRadius_buf, nMemHost); - alpaka::memcpy(queue, quintupletsInCPU->isDup_buf, quintupletsBuffers->isDup_buf, nMemHost); - alpaka::memcpy(queue, quintupletsInCPU->score_rphisum_buf, quintupletsBuffers->score_rphisum_buf, nMemHost); - alpaka::memcpy(queue, quintupletsInCPU->eta_buf, quintupletsBuffers->eta_buf, nMemHost); - alpaka::memcpy(queue, quintupletsInCPU->phi_buf, quintupletsBuffers->phi_buf, nMemHost); - alpaka::memcpy(queue, quintupletsInCPU->chiSquared_buf, quintupletsBuffers->chiSquared_buf, nMemHost); - alpaka::memcpy(queue, quintupletsInCPU->rzChiSquared_buf, quintupletsBuffers->rzChiSquared_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->innerRadius_buf, quintupletsBuffers_->innerRadius_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->bridgeRadius_buf, quintupletsBuffers_->bridgeRadius_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->outerRadius_buf, quintupletsBuffers_->outerRadius_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->isDup_buf, quintupletsBuffers_->isDup_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->score_rphisum_buf, quintupletsBuffers_->score_rphisum_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->eta_buf, quintupletsBuffers_->eta_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->phi_buf, quintupletsBuffers_->phi_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->chiSquared_buf, quintupletsBuffers_->chiSquared_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->rzChiSquared_buf, quintupletsBuffers_->rzChiSquared_buf, nMemHost); alpaka::memcpy( - queue, quintupletsInCPU->nonAnchorChiSquared_buf, quintupletsBuffers->nonAnchorChiSquared_buf, nMemHost); + queue_, quintupletsInCPU_->nonAnchorChiSquared_buf, quintupletsBuffers_->nonAnchorChiSquared_buf, nMemHost); if (sync) - alpaka::wait(queue); // host consumers expect filled data + alpaka::wait(queue_); // host consumers expect filled data } - return quintupletsInCPU.value(); + return quintupletsInCPU_.value(); } PixelTripletsBuffer& Event::getPixelTriplets(bool sync) { - if (!pixelTripletsInCPU) { - // Get nPixelTriplets parameter to initialize host based quintupletsInCPU - auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); // wait for the value before using + if (!pixelTripletsInCPU_) { + // Get nPixelTriplets parameter to initialize host based quintupletsInCPU_ + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nPixelTriplets_buf_h, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::wait(queue_); // wait for the value before using auto const nPixelTriplets = *nPixelTriplets_buf_h.data(); - pixelTripletsInCPU.emplace(nPixelTriplets, devHost, queue); - pixelTripletsInCPU->setData(*pixelTripletsInCPU); + pixelTripletsInCPU_.emplace(nPixelTriplets, cms::alpakatools::host(), queue_); + pixelTripletsInCPU_->setData(*pixelTripletsInCPU_); - alpaka::memcpy(queue, pixelTripletsInCPU->nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); + alpaka::memcpy(queue_, pixelTripletsInCPU_->nPixelTriplets_buf, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::memcpy(queue_, + pixelTripletsInCPU_->totOccupancyPixelTriplets_buf, + pixelTripletsBuffers_->totOccupancyPixelTriplets_buf); alpaka::memcpy( - queue, pixelTripletsInCPU->totOccupancyPixelTriplets_buf, pixelTripletsBuffers->totOccupancyPixelTriplets_buf); - alpaka::memcpy(queue, pixelTripletsInCPU->rzChiSquared_buf, pixelTripletsBuffers->rzChiSquared_buf, nPixelTriplets); + queue_, pixelTripletsInCPU_->rzChiSquared_buf, pixelTripletsBuffers_->rzChiSquared_buf, nPixelTriplets); alpaka::memcpy( - queue, pixelTripletsInCPU->rPhiChiSquared_buf, pixelTripletsBuffers->rPhiChiSquared_buf, nPixelTriplets); - alpaka::memcpy(queue, - pixelTripletsInCPU->rPhiChiSquaredInwards_buf, - pixelTripletsBuffers->rPhiChiSquaredInwards_buf, + queue_, pixelTripletsInCPU_->rPhiChiSquared_buf, pixelTripletsBuffers_->rPhiChiSquared_buf, nPixelTriplets); + alpaka::memcpy(queue_, + pixelTripletsInCPU_->rPhiChiSquaredInwards_buf, + pixelTripletsBuffers_->rPhiChiSquaredInwards_buf, nPixelTriplets); alpaka::memcpy( - queue, pixelTripletsInCPU->tripletIndices_buf, pixelTripletsBuffers->tripletIndices_buf, nPixelTriplets); - alpaka::memcpy(queue, - pixelTripletsInCPU->pixelSegmentIndices_buf, - pixelTripletsBuffers->pixelSegmentIndices_buf, + queue_, pixelTripletsInCPU_->tripletIndices_buf, pixelTripletsBuffers_->tripletIndices_buf, nPixelTriplets); + alpaka::memcpy(queue_, + pixelTripletsInCPU_->pixelSegmentIndices_buf, + pixelTripletsBuffers_->pixelSegmentIndices_buf, nPixelTriplets); - alpaka::memcpy(queue, pixelTripletsInCPU->pixelRadius_buf, pixelTripletsBuffers->pixelRadius_buf, nPixelTriplets); alpaka::memcpy( - queue, pixelTripletsInCPU->tripletRadius_buf, pixelTripletsBuffers->tripletRadius_buf, nPixelTriplets); - alpaka::memcpy(queue, pixelTripletsInCPU->isDup_buf, pixelTripletsBuffers->isDup_buf, nPixelTriplets); - alpaka::memcpy(queue, pixelTripletsInCPU->eta_buf, pixelTripletsBuffers->eta_buf, nPixelTriplets); - alpaka::memcpy(queue, pixelTripletsInCPU->phi_buf, pixelTripletsBuffers->phi_buf, nPixelTriplets); - alpaka::memcpy(queue, pixelTripletsInCPU->score_buf, pixelTripletsBuffers->score_buf, nPixelTriplets); + queue_, pixelTripletsInCPU_->pixelRadius_buf, pixelTripletsBuffers_->pixelRadius_buf, nPixelTriplets); + alpaka::memcpy( + queue_, pixelTripletsInCPU_->tripletRadius_buf, pixelTripletsBuffers_->tripletRadius_buf, nPixelTriplets); + alpaka::memcpy(queue_, pixelTripletsInCPU_->isDup_buf, pixelTripletsBuffers_->isDup_buf, nPixelTriplets); + alpaka::memcpy(queue_, pixelTripletsInCPU_->eta_buf, pixelTripletsBuffers_->eta_buf, nPixelTriplets); + alpaka::memcpy(queue_, pixelTripletsInCPU_->phi_buf, pixelTripletsBuffers_->phi_buf, nPixelTriplets); + alpaka::memcpy(queue_, pixelTripletsInCPU_->score_buf, pixelTripletsBuffers_->score_buf, nPixelTriplets); if (sync) - alpaka::wait(queue); // host consumers expect filled data + alpaka::wait(queue_); // host consumers expect filled data } - return pixelTripletsInCPU.value(); + return pixelTripletsInCPU_.value(); } PixelQuintupletsBuffer& Event::getPixelQuintuplets(bool sync) { - if (!pixelQuintupletsInCPU) { - // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU - auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); // wait for the value before using + if (!pixelQuintupletsInCPU_) { + // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU_ + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::wait(queue_); // wait for the value before using auto const nPixelQuintuplets = *nPixelQuintuplets_buf_h.data(); - pixelQuintupletsInCPU.emplace(nPixelQuintuplets, devHost, queue); - pixelQuintupletsInCPU->setData(*pixelQuintupletsInCPU); + pixelQuintupletsInCPU_.emplace(nPixelQuintuplets, cms::alpakatools::host(), queue_); + pixelQuintupletsInCPU_->setData(*pixelQuintupletsInCPU_); - alpaka::memcpy(queue, pixelQuintupletsInCPU->nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::memcpy(queue, - pixelQuintupletsInCPU->totOccupancyPixelQuintuplets_buf, - pixelQuintupletsBuffers->totOccupancyPixelQuintuplets_buf); alpaka::memcpy( - queue, pixelQuintupletsInCPU->rzChiSquared_buf, pixelQuintupletsBuffers->rzChiSquared_buf, nPixelQuintuplets); - alpaka::memcpy(queue, - pixelQuintupletsInCPU->rPhiChiSquared_buf, - pixelQuintupletsBuffers->rPhiChiSquared_buf, + queue_, pixelQuintupletsInCPU_->nPixelQuintuplets_buf, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::memcpy(queue_, + pixelQuintupletsInCPU_->totOccupancyPixelQuintuplets_buf, + pixelQuintupletsBuffers_->totOccupancyPixelQuintuplets_buf); + alpaka::memcpy(queue_, + pixelQuintupletsInCPU_->rzChiSquared_buf, + pixelQuintupletsBuffers_->rzChiSquared_buf, nPixelQuintuplets); - alpaka::memcpy(queue, - pixelQuintupletsInCPU->rPhiChiSquaredInwards_buf, - pixelQuintupletsBuffers->rPhiChiSquaredInwards_buf, + alpaka::memcpy(queue_, + pixelQuintupletsInCPU_->rPhiChiSquared_buf, + pixelQuintupletsBuffers_->rPhiChiSquared_buf, + nPixelQuintuplets); + alpaka::memcpy(queue_, + pixelQuintupletsInCPU_->rPhiChiSquaredInwards_buf, + pixelQuintupletsBuffers_->rPhiChiSquaredInwards_buf, + nPixelQuintuplets); + alpaka::memcpy(queue_, + pixelQuintupletsInCPU_->pixelIndices_buf, + pixelQuintupletsBuffers_->pixelIndices_buf, nPixelQuintuplets); alpaka::memcpy( - queue, pixelQuintupletsInCPU->pixelIndices_buf, pixelQuintupletsBuffers->pixelIndices_buf, nPixelQuintuplets); - alpaka::memcpy( - queue, pixelQuintupletsInCPU->T5Indices_buf, pixelQuintupletsBuffers->T5Indices_buf, nPixelQuintuplets); - alpaka::memcpy(queue, pixelQuintupletsInCPU->isDup_buf, pixelQuintupletsBuffers->isDup_buf, nPixelQuintuplets); - alpaka::memcpy(queue, pixelQuintupletsInCPU->score_buf, pixelQuintupletsBuffers->score_buf, nPixelQuintuplets); + queue_, pixelQuintupletsInCPU_->T5Indices_buf, pixelQuintupletsBuffers_->T5Indices_buf, nPixelQuintuplets); + alpaka::memcpy(queue_, pixelQuintupletsInCPU_->isDup_buf, pixelQuintupletsBuffers_->isDup_buf, nPixelQuintuplets); + alpaka::memcpy(queue_, pixelQuintupletsInCPU_->score_buf, pixelQuintupletsBuffers_->score_buf, nPixelQuintuplets); if (sync) - alpaka::wait(queue); // host consumers expect filled data + alpaka::wait(queue_); // host consumers expect filled data } - return pixelQuintupletsInCPU.value(); + return pixelQuintupletsInCPU_.value(); } TrackCandidatesBuffer& Event::getTrackCandidates(bool sync) { - if (!trackCandidatesInCPU) { - // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU - auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nTrackCanHost_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); - trackCandidatesInCPU.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); - trackCandidatesInCPU->setData(*trackCandidatesInCPU); - alpaka::wait(queue); // wait here before we get nTrackCanHost and trackCandidatesInCPU becomes usable + if (!trackCandidatesInCPU_) { + // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_ + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); + trackCandidatesInCPU_.emplace( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_); + trackCandidatesInCPU_->setData(*trackCandidatesInCPU_); + alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU_ becomes usable auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; - alpaka::memcpy(queue, - trackCandidatesInCPU->hitIndices_buf, - trackCandidatesBuffers->hitIndices_buf, + *trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost; + alpaka::memcpy(queue_, + trackCandidatesInCPU_->hitIndices_buf, + trackCandidatesBuffers_->hitIndices_buf, Params_pT5::kHits * nTrackCanHost); alpaka::memcpy( - queue, trackCandidatesInCPU->pixelSeedIndex_buf, trackCandidatesBuffers->pixelSeedIndex_buf, nTrackCanHost); - alpaka::memcpy(queue, - trackCandidatesInCPU->logicalLayers_buf, - trackCandidatesBuffers->logicalLayers_buf, + queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost); + alpaka::memcpy(queue_, + trackCandidatesInCPU_->logicalLayers_buf, + trackCandidatesBuffers_->logicalLayers_buf, Params_pT5::kLayers * nTrackCanHost); - alpaka::memcpy(queue, - trackCandidatesInCPU->directObjectIndices_buf, - trackCandidatesBuffers->directObjectIndices_buf, + alpaka::memcpy(queue_, + trackCandidatesInCPU_->directObjectIndices_buf, + trackCandidatesBuffers_->directObjectIndices_buf, nTrackCanHost); - alpaka::memcpy( - queue, trackCandidatesInCPU->objectIndices_buf, trackCandidatesBuffers->objectIndices_buf, 2 * nTrackCanHost); - alpaka::memcpy(queue, - trackCandidatesInCPU->trackCandidateType_buf, - trackCandidatesBuffers->trackCandidateType_buf, + alpaka::memcpy(queue_, + trackCandidatesInCPU_->objectIndices_buf, + trackCandidatesBuffers_->objectIndices_buf, + 2 * nTrackCanHost); + alpaka::memcpy(queue_, + trackCandidatesInCPU_->trackCandidateType_buf, + trackCandidatesBuffers_->trackCandidateType_buf, nTrackCanHost); if (sync) - alpaka::wait(queue); // host consumers expect filled data + alpaka::wait(queue_); // host consumers expect filled data } - return trackCandidatesInCPU.value(); + return trackCandidatesInCPU_.value(); } TrackCandidatesBuffer& Event::getTrackCandidatesInCMSSW(bool sync) { - if (!trackCandidatesInCPU) { - // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU - auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nTrackCanHost_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); - trackCandidatesInCPU.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); - trackCandidatesInCPU->setData(*trackCandidatesInCPU); - alpaka::wait(queue); // wait for the value before using and trackCandidatesInCPU becomes usable + if (!trackCandidatesInCPU_) { + // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_ + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); + trackCandidatesInCPU_.emplace( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_); + trackCandidatesInCPU_->setData(*trackCandidatesInCPU_); + alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU_ becomes usable auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; - alpaka::memcpy(queue, - trackCandidatesInCPU->hitIndices_buf, - trackCandidatesBuffers->hitIndices_buf, + *trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost; + alpaka::memcpy(queue_, + trackCandidatesInCPU_->hitIndices_buf, + trackCandidatesBuffers_->hitIndices_buf, Params_pT5::kHits * nTrackCanHost); alpaka::memcpy( - queue, trackCandidatesInCPU->pixelSeedIndex_buf, trackCandidatesBuffers->pixelSeedIndex_buf, nTrackCanHost); - alpaka::memcpy(queue, - trackCandidatesInCPU->trackCandidateType_buf, - trackCandidatesBuffers->trackCandidateType_buf, + queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost); + alpaka::memcpy(queue_, + trackCandidatesInCPU_->trackCandidateType_buf, + trackCandidatesBuffers_->trackCandidateType_buf, nTrackCanHost); if (sync) - alpaka::wait(queue); // host consumers expect filled data + alpaka::wait(queue_); // host consumers expect filled data } - return trackCandidatesInCPU.value(); + return trackCandidatesInCPU_.value(); } ModulesBuffer& Event::getModules(bool isFull, bool sync) { - if (!modulesInCPU) { + if (!modulesInCPU_) { // The last input here is just a small placeholder for the allocation. - modulesInCPU.emplace(devHost, nModules_, nPixels_); + modulesInCPU_.emplace(cms::alpakatools::host(), nModules_, nPixels_); - modulesInCPU->copyFromSrc(queue, modulesBuffers_, isFull); + modulesInCPU_->copyFromSrc(queue_, modulesBuffers_, isFull); if (sync) - alpaka::wait(queue); // host consumers expect filled data + alpaka::wait(queue_); // host consumers expect filled data } - return modulesInCPU.value(); + return modulesInCPU_.value(); } diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index ceaeea1de2ddc..2b09565cf4176 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -23,10 +23,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { class Event { private: - Queue queue; - Device devAcc; - DevHost devHost; - bool addObjects; + Queue& queue_; + Device devAcc_; + bool addObjects_; std::array n_hits_by_layer_barrel_; std::array n_hits_by_layer_endcap_; @@ -43,36 +42,36 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int nTotalSegments_; //Device stuff - std::optional rangesInGPU; - std::optional> rangesBuffers; - std::optional hitsInGPU; - std::optional> hitsBuffers; - std::optional mdsInGPU; - std::optional> miniDoubletsBuffers; - std::optional segmentsInGPU; - std::optional> segmentsBuffers; - std::optional tripletsInGPU; - std::optional> tripletsBuffers; - std::optional quintupletsInGPU; - std::optional> quintupletsBuffers; - std::optional trackCandidatesInGPU; - std::optional> trackCandidatesBuffers; - std::optional pixelTripletsInGPU; - std::optional> pixelTripletsBuffers; - std::optional pixelQuintupletsInGPU; - std::optional> pixelQuintupletsBuffers; + std::optional rangesInGPU_; + std::optional> rangesBuffers_; + std::optional hitsInGPU_; + std::optional> hitsBuffers_; + std::optional mdsInGPU_; + std::optional> miniDoubletsBuffers_; + std::optional segmentsInGPU_; + std::optional> segmentsBuffers_; + std::optional tripletsInGPU_; + std::optional> tripletsBuffers_; + std::optional quintupletsInGPU_; + std::optional> quintupletsBuffers_; + std::optional trackCandidatesInGPU_; + std::optional> trackCandidatesBuffers_; + std::optional pixelTripletsInGPU_; + std::optional> pixelTripletsBuffers_; + std::optional pixelQuintupletsInGPU_; + std::optional> pixelQuintupletsBuffers_; //CPU interface stuff - std::optional> rangesInCPU; - std::optional> hitsInCPU; - std::optional> mdsInCPU; - std::optional> segmentsInCPU; - std::optional> tripletsInCPU; - std::optional> trackCandidatesInCPU; - std::optional> modulesInCPU; - std::optional> quintupletsInCPU; - std::optional> pixelTripletsInCPU; - std::optional> pixelQuintupletsInCPU; + std::optional> rangesInCPU_; + std::optional> hitsInCPU_; + std::optional> mdsInCPU_; + std::optional> segmentsInCPU_; + std::optional> tripletsInCPU_; + std::optional> trackCandidatesInCPU_; + std::optional> modulesInCPU_; + std::optional> quintupletsInCPU_; + std::optional> pixelTripletsInCPU_; + std::optional> pixelQuintupletsInCPU_; void initSync(bool verbose); @@ -86,10 +85,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { public: // Constructor used for CMSSW integration. Uses an external queue. - Event(bool verbose, Queue const& q, const LSTESData* deviceESData) - : queue(q), - devAcc(alpaka::getDev(q)), - devHost(cms::alpakatools::host()), + Event(bool verbose, Queue& q, const LSTESData* deviceESData) + : queue_(q), + devAcc_(alpaka::getDev(q)), nModules_(deviceESData->nModules), nLowerModules_(deviceESData->nLowerModules), nPixels_(deviceESData->nPixels), @@ -100,7 +98,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { initSync(verbose); } void resetEventSync(); // synchronizes - void wait() const { alpaka::wait(queue); } + void wait() const { alpaka::wait(queue_); } // Calls the appropriate hit function, then increments the counter void addHitToEvent(std::vector const& x,