From 0bcb06cc65252f0fad82bec228c72051d78d9209 Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Wed, 31 Jul 2024 14:00:24 -0700 Subject: [PATCH 01/20] Removed alpaka functions from host ES code --- RecoTracker/LSTCore/interface/LSTESData.h | 22 +-- RecoTracker/LSTCore/src/LSTESData.cc | 44 ++--- RecoTracker/LSTCore/src/ModuleMethods.h | 191 ++++++-------------- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 72 ++++---- RecoTracker/LSTCore/src/alpaka/Event.h | 6 +- 5 files changed, 128 insertions(+), 207 deletions(-) diff --git a/RecoTracker/LSTCore/interface/LSTESData.h b/RecoTracker/LSTCore/interface/LSTESData.h index 833770e631d2d..321c2f371cc32 100644 --- a/RecoTracker/LSTCore/interface/LSTESData.h +++ b/RecoTracker/LSTCore/interface/LSTESData.h @@ -19,16 +19,16 @@ namespace lst { uint16_t nLowerModules; unsigned int nPixels; unsigned int nEndCapMap; - std::shared_ptr> modulesBuffers; - std::shared_ptr> endcapGeometryBuffers; + ModulesBuffer modulesBuffers; + EndcapGeometryBuffer endcapGeometryBuffers; std::shared_ptr pixelMapping; LSTESData(uint16_t const& nModulesIn, uint16_t const& nLowerModulesIn, unsigned int const& nPixelsIn, unsigned int const& nEndCapMapIn, - std::shared_ptr> const& modulesBuffersIn, - std::shared_ptr> const& endcapGeometryBuffersIn, + ModulesBuffer const& modulesBuffersIn, + EndcapGeometryBuffer const& endcapGeometryBuffersIn, std::shared_ptr const& pixelMappingIn) : nModules(nModulesIn), nLowerModules(nLowerModulesIn), @@ -49,19 +49,17 @@ namespace cms::alpakatools { template static lst::LSTESData> copyAsync(TQueue& queue, lst::LSTESData const& srcData) { - auto deviceModulesBuffers = std::make_shared>>( - alpaka::getDev(queue), srcData.nModules, srcData.nPixels); - deviceModulesBuffers->copyFromSrc(queue, *srcData.modulesBuffers); - auto deviceEndcapGeometryBuffers = - std::make_shared>>(alpaka::getDev(queue), srcData.nEndCapMap); - deviceEndcapGeometryBuffers->copyFromSrc(queue, *srcData.endcapGeometryBuffers); + auto deviceModulesBuffers = lst::ModulesBuffer>(alpaka::getDev(queue), srcData.nModules, srcData.nPixels); + deviceModulesBuffers.copyFromSrc(queue, srcData.modulesBuffers); + auto deviceEndcapGeometryBuffers = lst::EndcapGeometryBuffer>(alpaka::getDev(queue), srcData.nEndCapMap); + deviceEndcapGeometryBuffers.copyFromSrc(queue, srcData.endcapGeometryBuffers); return lst::LSTESData>(srcData.nModules, srcData.nLowerModules, srcData.nPixels, srcData.nEndCapMap, - deviceModulesBuffers, - deviceEndcapGeometryBuffers, + std::move(deviceModulesBuffers), + std::move(deviceEndcapGeometryBuffers), srcData.pixelMapping); } }; diff --git a/RecoTracker/LSTCore/src/LSTESData.cc b/RecoTracker/LSTCore/src/LSTESData.cc index 482d97d34249c..01210d89a82f3 100644 --- a/RecoTracker/LSTCore/src/LSTESData.cc +++ b/RecoTracker/LSTCore/src/LSTESData.cc @@ -39,9 +39,9 @@ namespace { } void loadMapsHost(lst::MapPLStoLayer& pLStoLayer, - std::shared_ptr endcapGeometry, - std::shared_ptr tiltedGeometry, - std::shared_ptr moduleConnectionMap) { + lst::EndcapGeometry& endcapGeometry, + lst::TiltedGeometry& tiltedGeometry, + lst::ModuleConnectionMap& moduleConnectionMap) { // Module orientation information (DrDz or phi angles) auto endcap_geom = get_absolute_path_after_check_file_exists(trackLooperDir() + "/data/OT800_IT615_pt0.8/endcap_orientation.bin"); @@ -51,9 +51,9 @@ namespace { auto mappath = get_absolute_path_after_check_file_exists( trackLooperDir() + "/data/OT800_IT615_pt0.8/module_connection_tracing_merged.bin"); - endcapGeometry->load(endcap_geom); - tiltedGeometry->load(tilted_geom); - moduleConnectionMap->load(mappath); + endcapGeometry.load(endcap_geom); + tiltedGeometry.load(tilted_geom); + moduleConnectionMap.load(mappath); auto pLSMapDir = trackLooperDir() + "/data/OT800_IT615_pt0.8/pixelmap/pLS_map"; const std::array connects{ @@ -80,34 +80,28 @@ std::unique_ptr> lst::loadAndFillESHost() uint16_t nModules; uint16_t nLowerModules; unsigned int nPixels; - std::shared_ptr> modulesBuffers = nullptr; - auto pLStoLayer = std::make_shared(); - auto endcapGeometry = std::make_shared(); - auto tiltedGeometry = std::make_shared(); + MapPLStoLayer pLStoLayer; + EndcapGeometry endcapGeometry; + TiltedGeometry tiltedGeometry; auto pixelMapping = std::make_shared(); - auto moduleConnectionMap = std::make_shared(); - ::loadMapsHost(*pLStoLayer, endcapGeometry, tiltedGeometry, moduleConnectionMap); + ModuleConnectionMap moduleConnectionMap; + ::loadMapsHost(pLStoLayer, endcapGeometry, tiltedGeometry, moduleConnectionMap); - auto endcapGeometryBuffers = std::make_shared>( - cms::alpakatools::host(), endcapGeometry->nEndCapMap); - alpaka::QueueCpuBlocking queue(cms::alpakatools::host()); - alpaka::memcpy( - queue, endcapGeometryBuffers->geoMapDetId_buf, endcapGeometry->geoMapDetId_buf, endcapGeometry->nEndCapMap); - alpaka::memcpy( - queue, endcapGeometryBuffers->geoMapPhi_buf, endcapGeometry->geoMapPhi_buf, endcapGeometry->nEndCapMap); + auto endcapGeometryBuffers = EndcapGeometryBuffer(cms::alpakatools::host(), endcapGeometry.nEndCapMap); + std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapDetId_buf), endcapGeometry.geoMapDetId_buf.data(), endcapGeometry.nEndCapMap); + std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapPhi_buf), endcapGeometry.geoMapPhi_buf.data(), endcapGeometry.nEndCapMap); auto path = get_absolute_path_after_check_file_exists(trackLooperDir() + "/data/OT800_IT615_pt0.8/sensor_centroids.bin"); - lst::loadModulesFromFile(pLStoLayer.get(), + auto modulesBuffers = lst::loadModulesFromFile(pLStoLayer, path.c_str(), nModules, nLowerModules, nPixels, - modulesBuffers, pixelMapping.get(), - endcapGeometry.get(), - tiltedGeometry.get(), - moduleConnectionMap.get()); + endcapGeometry, + tiltedGeometry, + moduleConnectionMap); return std::make_unique>( - nModules, nLowerModules, nPixels, endcapGeometry->nEndCapMap, modulesBuffers, endcapGeometryBuffers, pixelMapping); + nModules, nLowerModules, nPixels, endcapGeometry.nEndCapMap, std::move(modulesBuffers), std::move(endcapGeometryBuffers), pixelMapping); } diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h index 9693a464fcf1a..f4b74917bec13 100644 --- a/RecoTracker/LSTCore/src/ModuleMethods.h +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -23,15 +23,13 @@ namespace lst { // https://github.com/cms-sw/cmssw/blob/5e809e8e0a625578aa265dc4b128a93830cb5429/Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h#L29 }; - template - inline void fillPixelMap(std::shared_ptr>& modulesBuf, + inline void fillPixelMap(ModulesBuffer& modulesBuf, uint16_t nModules, unsigned int& nPixels, PixelMap& pixelMapping, - TQueue queue, - const MapPLStoLayer& pLStoLayer, - ModuleMetaData& mmd) { - pixelMapping.pixelModuleIndex = mmd.detIdToIndex[1]; + MapPLStoLayer const& pLStoLayer, + ModuleMetaData const& mmd) { + pixelMapping.pixelModuleIndex = mmd.detIdToIndex.at(1); std::vector connectedModuleDetIds; std::vector connectedModuleDetIds_pos; @@ -81,68 +79,44 @@ namespace lst { unsigned int connectedPix_size = totalSizes + totalSizes_pos + totalSizes_neg; nPixels = connectedPix_size; - // Now we can initialize modulesBuf - alpaka_common::DevHost const& devHost = cms::alpakatools::host(); - if (modulesBuf == nullptr) { - modulesBuf = std::make_shared>(devHost, nModules, nPixels); - } + // Now we re-initialize connectedPixels_buf since nPixels is now known + modulesBuf.connectedPixels_buf = allocBufWrapper(cms::alpakatools::host(), nPixels); + modulesBuf.data_.setData(modulesBuf); - auto connectedPixels_buf = allocBufWrapper(devHost, connectedPix_size); - unsigned int* connectedPixels = alpaka::getPtrNative(connectedPixels_buf); + unsigned int* connectedPixels = alpaka::getPtrNative(modulesBuf.connectedPixels_buf); for (unsigned int icondet = 0; icondet < totalSizes; icondet++) { - connectedPixels[icondet] = mmd.detIdToIndex[connectedModuleDetIds[icondet]]; + connectedPixels[icondet] = mmd.detIdToIndex.at(connectedModuleDetIds[icondet]); } for (unsigned int icondet = 0; icondet < totalSizes_pos; icondet++) { - connectedPixels[icondet + totalSizes] = mmd.detIdToIndex[connectedModuleDetIds_pos[icondet]]; + connectedPixels[icondet + totalSizes] = mmd.detIdToIndex.at(connectedModuleDetIds_pos[icondet]); } for (unsigned int icondet = 0; icondet < totalSizes_neg; icondet++) { - connectedPixels[icondet + totalSizes + totalSizes_pos] = mmd.detIdToIndex[connectedModuleDetIds_neg[icondet]]; + connectedPixels[icondet + totalSizes + totalSizes_pos] = mmd.detIdToIndex.at(connectedModuleDetIds_neg[icondet]); } - - alpaka::memcpy(queue, modulesBuf->connectedPixels_buf, connectedPixels_buf); - alpaka::wait(queue); }; - template - inline void fillConnectedModuleArrayExplicit(ModulesBuffer* modulesBuf, - unsigned int nMod, - TQueue queue, - ModuleMetaData& mmd, - const ModuleConnectionMap* moduleConnectionMap) { - alpaka_common::DevHost const& devHost = cms::alpakatools::host(); - auto moduleMap_buf = allocBufWrapper(devHost, nMod * max_connected_modules); - uint16_t* moduleMap = alpaka::getPtrNative(moduleMap_buf); - - auto nConnectedModules_buf = allocBufWrapper(devHost, nMod); - uint16_t* nConnectedModules = alpaka::getPtrNative(nConnectedModules_buf); + inline void fillConnectedModuleArrayExplicit(ModulesBuffer& modulesBuf, + ModuleMetaData const& mmd, + ModuleConnectionMap const& moduleConnectionMap) { + uint16_t* moduleMap = alpaka::getPtrNative(modulesBuf.moduleMap_buf); + uint16_t* nConnectedModules = alpaka::getPtrNative(modulesBuf.nConnectedModules_buf); for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { unsigned int detId = it->first; uint16_t index = it->second; - auto& connectedModules = moduleConnectionMap->getConnectedModuleDetIds(detId); + auto& connectedModules = moduleConnectionMap.getConnectedModuleDetIds(detId); nConnectedModules[index] = connectedModules.size(); for (uint16_t i = 0; i < nConnectedModules[index]; i++) { - moduleMap[index * max_connected_modules + i] = mmd.detIdToIndex[connectedModules[i]]; + moduleMap[index * max_connected_modules + i] = mmd.detIdToIndex.at(connectedModules[i]); } } - - alpaka::memcpy(queue, modulesBuf->moduleMap_buf, moduleMap_buf); - alpaka::memcpy(queue, modulesBuf->nConnectedModules_buf, nConnectedModules_buf); - alpaka::wait(queue); }; - template - inline void fillMapArraysExplicit(ModulesBuffer* modulesBuf, - unsigned int nMod, - TQueue queue, - ModuleMetaData& mmd) { - alpaka_common::DevHost const& devHost = cms::alpakatools::host(); - auto mapIdx_buf = allocBufWrapper(devHost, nMod); - uint16_t* mapIdx = alpaka::getPtrNative(mapIdx_buf); - - auto mapdetId_buf = allocBufWrapper(devHost, nMod); - unsigned int* mapdetId = alpaka::getPtrNative(mapdetId_buf); + inline void fillMapArraysExplicit(ModulesBuffer& modulesBuf, + ModuleMetaData const& mmd) { + uint16_t* mapIdx = alpaka::getPtrNative(modulesBuf.mapIdx_buf); + unsigned int* mapdetId = alpaka::getPtrNative(modulesBuf.mapdetId_buf); unsigned int counter = 0; for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { @@ -152,10 +126,6 @@ namespace lst { mapdetId[counter] = detId; counter++; } - - alpaka::memcpy(queue, modulesBuf->mapIdx_buf, mapIdx_buf); - alpaka::memcpy(queue, modulesBuf->mapdetId_buf, mapdetId_buf); - alpaka::wait(queue); }; inline void setDerivedQuantities(unsigned int detId, @@ -218,59 +188,44 @@ namespace lst { nModules = counter; }; - inline void loadModulesFromFile(const MapPLStoLayer* pLStoLayer, + inline ModulesBuffer loadModulesFromFile(MapPLStoLayer const& pLStoLayer, const char* moduleMetaDataFilePath, uint16_t& nModules, uint16_t& nLowerModules, unsigned int& nPixels, - std::shared_ptr>& modulesBuf, PixelMap* pixelMapping, - const EndcapGeometry* endcapGeometry, - const TiltedGeometry* tiltedGeometry, - const ModuleConnectionMap* moduleConnectionMap) { + const EndcapGeometry& endcapGeometry, + const TiltedGeometry& tiltedGeometry, + const ModuleConnectionMap& moduleConnectionMap) { ModuleMetaData mmd; loadCentroidsFromFile(moduleMetaDataFilePath, mmd, nModules); - alpaka_common::DevHost const& devHost = cms::alpakatools::host(); - auto detIds_buf = allocBufWrapper(devHost, nModules); - auto layers_buf = allocBufWrapper(devHost, nModules); - auto rings_buf = allocBufWrapper(devHost, nModules); - auto rods_buf = allocBufWrapper(devHost, nModules); - auto modules_buf = allocBufWrapper(devHost, nModules); - auto subdets_buf = allocBufWrapper(devHost, nModules); - auto sides_buf = allocBufWrapper(devHost, nModules); - auto eta_buf = allocBufWrapper(devHost, nModules); - auto r_buf = allocBufWrapper(devHost, nModules); - auto isInverted_buf = allocBufWrapper(devHost, nModules); - auto isLower_buf = allocBufWrapper(devHost, nModules); - auto isAnchor_buf = allocBufWrapper(devHost, nModules); - auto moduleType_buf = allocBufWrapper(devHost, nModules); - auto moduleLayerType_buf = allocBufWrapper(devHost, nModules); - auto dxdys_buf = allocBufWrapper(devHost, nModules); - auto drdzs_buf = allocBufWrapper(devHost, nModules); - auto partnerModuleIndices_buf = allocBufWrapper(devHost, nModules); - auto lstLayers_buf = allocBufWrapper(devHost, nModules); + // Initialize modulesBuf, but with nPixels = 0 + // The fields that require nPixels are re-initialized in fillPixelMap + ModulesBuffer modulesBuf(cms::alpakatools::host(), nModules, 0); // Getting the underlying data pointers - unsigned int* host_detIds = alpaka::getPtrNative(detIds_buf); - short* host_layers = alpaka::getPtrNative(layers_buf); - short* host_rings = alpaka::getPtrNative(rings_buf); - short* host_rods = alpaka::getPtrNative(rods_buf); - short* host_modules = alpaka::getPtrNative(modules_buf); - short* host_subdets = alpaka::getPtrNative(subdets_buf); - short* host_sides = alpaka::getPtrNative(sides_buf); - float* host_eta = alpaka::getPtrNative(eta_buf); - float* host_r = alpaka::getPtrNative(r_buf); - bool* host_isInverted = alpaka::getPtrNative(isInverted_buf); - bool* host_isLower = alpaka::getPtrNative(isLower_buf); - bool* host_isAnchor = alpaka::getPtrNative(isAnchor_buf); - ModuleType* host_moduleType = alpaka::getPtrNative(moduleType_buf); - ModuleLayerType* host_moduleLayerType = alpaka::getPtrNative(moduleLayerType_buf); - float* host_dxdys = alpaka::getPtrNative(dxdys_buf); - float* host_drdzs = alpaka::getPtrNative(drdzs_buf); - uint16_t* host_partnerModuleIndices = alpaka::getPtrNative(partnerModuleIndices_buf); - int* host_lstLayers = alpaka::getPtrNative(lstLayers_buf); + unsigned int* host_detIds = alpaka::getPtrNative(modulesBuf.detIds_buf); + short* host_layers = alpaka::getPtrNative(modulesBuf.layers_buf); + short* host_rings = alpaka::getPtrNative(modulesBuf.rings_buf); + short* host_rods = alpaka::getPtrNative(modulesBuf.rods_buf); + short* host_modules = alpaka::getPtrNative(modulesBuf.modules_buf); + short* host_subdets = alpaka::getPtrNative(modulesBuf.subdets_buf); + short* host_sides = alpaka::getPtrNative(modulesBuf.sides_buf); + float* host_eta = alpaka::getPtrNative(modulesBuf.eta_buf); + float* host_r = alpaka::getPtrNative(modulesBuf.r_buf); + bool* host_isInverted = alpaka::getPtrNative(modulesBuf.isInverted_buf); + bool* host_isLower = alpaka::getPtrNative(modulesBuf.isLower_buf); + bool* host_isAnchor = alpaka::getPtrNative(modulesBuf.isAnchor_buf); + ModuleType* host_moduleType = alpaka::getPtrNative(modulesBuf.moduleType_buf); + ModuleLayerType* host_moduleLayerType = alpaka::getPtrNative(modulesBuf.moduleLayerType_buf); + float* host_dxdys = alpaka::getPtrNative(modulesBuf.dxdys_buf); + float* host_drdzs = alpaka::getPtrNative(modulesBuf.drdzs_buf); + uint16_t* host_nModules = alpaka::getPtrNative(modulesBuf.nModules_buf); + uint16_t* host_nLowerModules = alpaka::getPtrNative(modulesBuf.nLowerModules_buf); + uint16_t* host_partnerModuleIndices = alpaka::getPtrNative(modulesBuf.partnerModuleIndices_buf); + int* host_lstLayers = alpaka::getPtrNative(modulesBuf.lstLayers_buf); //reassign detIdToIndex indices here nLowerModules = (nModules - 1) / 2; @@ -347,8 +302,8 @@ namespace lst { host_isAnchor[index] = false; } - host_dxdys[index] = (subdet == Endcap) ? endcapGeometry->getdxdy_slope(detId) : tiltedGeometry->getDxDy(detId); - host_drdzs[index] = (subdet == Barrel) ? tiltedGeometry->getDrDz(detId) : 0; + host_dxdys[index] = (subdet == Endcap) ? endcapGeometry.getdxdy_slope(detId) : tiltedGeometry.getDxDy(detId); + host_drdzs[index] = (subdet == Barrel) ? tiltedGeometry.getDrDz(detId) : 0; } host_lstLayers[index] = @@ -372,41 +327,15 @@ namespace lst { } } - // TODO: We don't need a queue, but this code needs to be refactored - alpaka::QueueCpuBlocking queue(cms::alpakatools::host()); - - // modulesBuf is initialized in fillPixelMap since both nModules and nPix will be known - fillPixelMap(modulesBuf, nModules, nPixels, *pixelMapping, queue, *pLStoLayer, mmd); - - auto src_view_nModules = alpaka::createView(devHost, &nModules, (alpaka_common::Idx)1u); - alpaka::memcpy(queue, modulesBuf->nModules_buf, src_view_nModules); - - auto src_view_nLowerModules = alpaka::createView(devHost, &nLowerModules, (alpaka_common::Idx)1u); - alpaka::memcpy(queue, modulesBuf->nLowerModules_buf, src_view_nLowerModules); - - alpaka::memcpy(queue, modulesBuf->moduleType_buf, moduleType_buf); - alpaka::memcpy(queue, modulesBuf->moduleLayerType_buf, moduleLayerType_buf); - - alpaka::memcpy(queue, modulesBuf->detIds_buf, detIds_buf); - alpaka::memcpy(queue, modulesBuf->layers_buf, layers_buf); - alpaka::memcpy(queue, modulesBuf->rings_buf, rings_buf); - alpaka::memcpy(queue, modulesBuf->rods_buf, rods_buf); - alpaka::memcpy(queue, modulesBuf->modules_buf, modules_buf); - alpaka::memcpy(queue, modulesBuf->subdets_buf, subdets_buf); - alpaka::memcpy(queue, modulesBuf->sides_buf, sides_buf); - alpaka::memcpy(queue, modulesBuf->eta_buf, eta_buf); - alpaka::memcpy(queue, modulesBuf->r_buf, r_buf); - alpaka::memcpy(queue, modulesBuf->isInverted_buf, isInverted_buf); - alpaka::memcpy(queue, modulesBuf->isLower_buf, isLower_buf); - alpaka::memcpy(queue, modulesBuf->isAnchor_buf, isAnchor_buf); - alpaka::memcpy(queue, modulesBuf->dxdys_buf, dxdys_buf); - alpaka::memcpy(queue, modulesBuf->drdzs_buf, drdzs_buf); - alpaka::memcpy(queue, modulesBuf->partnerModuleIndices_buf, partnerModuleIndices_buf); - alpaka::memcpy(queue, modulesBuf->lstLayers_buf, lstLayers_buf); - alpaka::wait(queue); - - fillConnectedModuleArrayExplicit(modulesBuf.get(), nModules, queue, mmd, moduleConnectionMap); - fillMapArraysExplicit(modulesBuf.get(), nModules, queue, mmd); + fillPixelMap(modulesBuf, nModules, nPixels, *pixelMapping, pLStoLayer, mmd); + + *host_nModules = nModules; + *host_nLowerModules = nLowerModules; + + fillConnectedModuleArrayExplicit(modulesBuf, mmd, moduleConnectionMap); + fillMapArraysExplicit(modulesBuf, mmd); + + return modulesBuf; }; } // namespace lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 43f5bb7d9c3fe..6d4a2073d7e7e 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -195,9 +195,9 @@ void lst::Event::addHitToEvent(std::vector const& x, TwoS, nModules_, nEndCapMap_, - alpaka::getPtrNative(endcapGeometryBuffers_->geoMapDetId_buf), - alpaka::getPtrNative(endcapGeometryBuffers_->geoMapPhi_buf), - *modulesBuffers_->data(), + alpaka::getPtrNative(endcapGeometryBuffers_.geoMapDetId_buf), + alpaka::getPtrNative(endcapGeometryBuffers_.geoMapPhi_buf), + *modulesBuffers_.data(), *hitsInGPU, nHits)); @@ -209,7 +209,7 @@ void lst::Event::addHitToEvent(std::vector const& x, moduleRangesKernel module_ranges_kernel; auto const module_ranges_task(alpaka::createTaskKernel( - module_ranges_workdiv, module_ranges_kernel, *modulesBuffers_->data(), *hitsInGPU, nLowerModules_)); + module_ranges_workdiv, module_ranges_kernel, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_)); // Waiting isn't needed after second kernel call. Saves ~100 us. // This is because addPixelSegmentToEvent (which is run next) doesn't rely on hitsBuffers->hitrange variables. @@ -268,7 +268,7 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; auto const createMDArrayRangesGPUTask(alpaka::createTaskKernel( - createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_->data(), *rangesInGPU)); + createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); alpaka::enqueue(queue, createMDArrayRangesGPUTask); alpaka::wait(queue); @@ -300,7 +300,7 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& lst::createSegmentArrayRanges createSegmentArrayRanges_kernel; auto const createSegmentArrayRangesTask(alpaka::createTaskKernel(createSegmentArrayRanges_workDiv, createSegmentArrayRanges_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *rangesInGPU, *mdsInGPU)); @@ -376,7 +376,7 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& addPixelSegmentToEventKernel addPixelSegmentToEvent_kernel; auto const addPixelSegmentToEvent_task(alpaka::createTaskKernel(addPixelSegmentToEvent_workdiv, addPixelSegmentToEvent_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *rangesInGPU, *hitsInGPU, *mdsInGPU, @@ -412,7 +412,7 @@ void lst::Event::createMiniDoublets() { lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; auto const createMDArrayRangesGPUTask(alpaka::createTaskKernel( - createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_->data(), *rangesInGPU)); + createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); alpaka::enqueue(queue, createMDArrayRangesGPUTask); alpaka::wait(queue); @@ -440,7 +440,7 @@ void lst::Event::createMiniDoublets() { lst::createMiniDoubletsInGPUv2 createMiniDoubletsInGPUv2_kernel; auto const createMiniDoubletsInGPUv2Task(alpaka::createTaskKernel(createMiniDoubletsInGPUv2_workDiv, createMiniDoubletsInGPUv2_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *hitsInGPU, *mdsInGPU, *rangesInGPU)); @@ -456,7 +456,7 @@ void lst::Event::createMiniDoublets() { auto const addMiniDoubletRangesToEventExplicitTask( alpaka::createTaskKernel(addMiniDoubletRangesToEventExplicit_workDiv, addMiniDoubletRangesToEventExplicit_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *mdsInGPU, *rangesInGPU, *hitsInGPU)); @@ -485,7 +485,7 @@ void lst::Event::createSegmentsWithModuleMap() { lst::createSegmentsInGPUv2 createSegmentsInGPUv2_kernel; auto const createSegmentsInGPUv2Task(alpaka::createTaskKernel(createSegmentsInGPUv2_workDiv, createSegmentsInGPUv2_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, *rangesInGPU)); @@ -501,7 +501,7 @@ void lst::Event::createSegmentsWithModuleMap() { auto const addSegmentRangesToEventExplicitTask( alpaka::createTaskKernel(addSegmentRangesToEventExplicit_workDiv, addSegmentRangesToEventExplicit_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *segmentsInGPU, *rangesInGPU)); @@ -523,7 +523,7 @@ void lst::Event::createTriplets() { lst::createTripletArrayRanges createTripletArrayRanges_kernel; auto const createTripletArrayRangesTask(alpaka::createTaskKernel(createTripletArrayRanges_workDiv, createTripletArrayRanges_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *rangesInGPU, *segmentsInGPU)); @@ -564,7 +564,7 @@ void lst::Event::createTriplets() { // Allocate and copy module_nConnectedModules from device to host auto module_nConnectedModules_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_nConnectedModules_buf, modulesBuffers_->nConnectedModules_buf, nLowerModules_); + alpaka::memcpy(queue, module_nConnectedModules_buf, modulesBuffers_.nConnectedModules_buf, nLowerModules_); alpaka::wait(queue); uint16_t* module_nConnectedModules = alpaka::getPtrNative(module_nConnectedModules_buf); @@ -591,7 +591,7 @@ void lst::Event::createTriplets() { lst::createTripletsInGPUv2 createTripletsInGPUv2_kernel; auto const createTripletsInGPUv2Task(alpaka::createTaskKernel(createTripletsInGPUv2_workDiv, createTripletsInGPUv2_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, *tripletsInGPU, @@ -610,7 +610,7 @@ void lst::Event::createTriplets() { auto const addTripletRangesToEventExplicitTask( alpaka::createTaskKernel(addTripletRangesToEventExplicit_workDiv, addTripletRangesToEventExplicit_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *tripletsInGPU, *rangesInGPU)); @@ -644,7 +644,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ lst::crossCleanpT3 crossCleanpT3_kernel; auto const crossCleanpT3Task(alpaka::createTaskKernel(crossCleanpT3_workDiv, crossCleanpT3_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *rangesInGPU, *pixelTripletsInGPU, *segmentsInGPU, @@ -690,7 +690,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ lst::crossCleanT5 crossCleanT5_kernel; auto const crossCleanT5Task(alpaka::createTaskKernel(crossCleanT5_workDiv, crossCleanT5_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *quintupletsInGPU, *pixelQuintupletsInGPU, *pixelTripletsInGPU, @@ -721,7 +721,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ lst::checkHitspLS checkHitspLS_kernel; auto const checkHitspLSTask(alpaka::createTaskKernel( - checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_->data(), *segmentsInGPU, true)); + checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, true)); alpaka::enqueue(queue, checkHitspLSTask); } @@ -734,7 +734,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ lst::crossCleanpLS crossCleanpLS_kernel; auto const crossCleanpLSTask(alpaka::createTaskKernel(crossCleanpLS_workDiv, crossCleanpLS_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *rangesInGPU, *pixelTripletsInGPU, *trackCandidatesInGPU, @@ -868,7 +868,7 @@ void lst::Event::createPixelTriplets() { auto const createPixelTripletsInGPUFromMapv2Task( alpaka::createTaskKernel(createPixelTripletsInGPUFromMapv2_workDiv, createPixelTripletsInGPUFromMapv2_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *rangesInGPU, *mdsInGPU, *segmentsInGPU, @@ -915,7 +915,7 @@ void lst::Event::createQuintuplets() { auto const createEligibleModulesListForQuintupletsGPUTask( alpaka::createTaskKernel(createEligibleModulesListForQuintupletsGPU_workDiv, createEligibleModulesListForQuintupletsGPU_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *tripletsInGPU, *rangesInGPU)); @@ -949,7 +949,7 @@ void lst::Event::createQuintuplets() { lst::createQuintupletsInGPUv2 createQuintupletsInGPUv2_kernel; auto const createQuintupletsInGPUv2Task(alpaka::createTaskKernel(createQuintupletsInGPUv2_workDiv, createQuintupletsInGPUv2_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, *tripletsInGPU, @@ -968,7 +968,7 @@ void lst::Event::createQuintuplets() { auto const removeDupQuintupletsInGPUAfterBuildTask( alpaka::createTaskKernel(removeDupQuintupletsInGPUAfterBuild_workDiv, removeDupQuintupletsInGPUAfterBuild_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *quintupletsInGPU, *rangesInGPU)); @@ -983,7 +983,7 @@ void lst::Event::createQuintuplets() { auto const addQuintupletRangesToEventExplicitTask( alpaka::createTaskKernel(addQuintupletRangesToEventExplicit_workDiv, addQuintupletRangesToEventExplicit_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *quintupletsInGPU, *rangesInGPU)); @@ -1004,7 +1004,7 @@ void lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { lst::checkHitspLS checkHitspLS_kernel; auto const checkHitspLSTask(alpaka::createTaskKernel( - checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_->data(), *segmentsInGPU, false)); + checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, false)); alpaka::enqueue(queue, checkHitspLSTask); alpaka::wait(queue); @@ -1097,7 +1097,7 @@ void lst::Event::createPixelQuintuplets() { auto const createPixelQuintupletsInGPUFromMapv2Task( alpaka::createTaskKernel(createPixelQuintupletsInGPUFromMapv2_workDiv, createPixelQuintupletsInGPUFromMapv2_kernel, - *modulesBuffers_->data(), + *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, *tripletsInGPU, @@ -1155,10 +1155,10 @@ void lst::Event::addMiniDoubletsToEventExplicit() { alpaka::memcpy(queue, nMDsCPU_buf, miniDoubletsBuffers->nMDs_buf, nLowerModules_); auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_->subdets_buf, nLowerModules_); + alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers_->layers_buf, nLowerModules_); + alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); auto module_hitRanges_buf = allocBufWrapper(devHost, nLowerModules_ * 2, queue); alpaka::memcpy(queue, module_hitRanges_buf, hitsBuffers->hitRanges_buf, nLowerModules_ * 2u); @@ -1186,10 +1186,10 @@ void lst::Event::addSegmentsToEventExplicit() { alpaka::memcpy(queue, nSegmentsCPU_buf, segmentsBuffers->nSegments_buf, nLowerModules_); auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_->subdets_buf, nLowerModules_); + alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers_->layers_buf, nLowerModules_); + alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); alpaka::wait(queue); @@ -1213,10 +1213,10 @@ void lst::Event::addQuintupletsToEventExplicit() { alpaka::memcpy(queue, nQuintupletsCPU_buf, quintupletsBuffers->nQuintuplets_buf); auto module_subdets_buf = allocBufWrapper(devHost, nModules_, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_->subdets_buf, nModules_); + alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers_->layers_buf, nLowerModules_); + alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); auto module_quintupletModuleIndices_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_quintupletModuleIndices_buf, rangesBuffers->quintupletModuleIndices_buf); @@ -1244,10 +1244,10 @@ void lst::Event::addTripletsToEventExplicit() { alpaka::memcpy(queue, nTripletsCPU_buf, tripletsBuffers->nTriplets_buf); auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_->subdets_buf, nLowerModules_); + alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers_->layers_buf, nLowerModules_); + alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); alpaka::wait(queue); unsigned int* nTripletsCPU = alpaka::getPtrNative(nTripletsCPU_buf); @@ -1822,7 +1822,7 @@ lst::ModulesBuffer* lst::Event::getModules(bool isFull) { // The last input here is just a small placeholder for the allocation. modulesInCPU = new lst::ModulesBuffer(devHost, nModules_, nPixels_); - modulesInCPU->copyFromSrc(queue, *modulesBuffers_, isFull); + modulesInCPU->copyFromSrc(queue, modulesBuffers_, isFull); } return modulesInCPU; } diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 01abacba7dc74..cbf4302dcc465 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -87,9 +87,9 @@ namespace lst { const uint16_t nLowerModules_; const unsigned int nPixels_; const unsigned int nEndCapMap_; - const std::shared_ptr> modulesBuffers_; - const std::shared_ptr pixelMapping_; - const std::shared_ptr> endcapGeometryBuffers_; + ModulesBuffer const& modulesBuffers_; + std::shared_ptr pixelMapping_; + EndcapGeometryBuffer const& endcapGeometryBuffers_; public: // Constructor used for CMSSW integration. Uses an external queue. From 14b8bd918bddab91e3f0f85f3f06dfdd5ffe6198 Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Fri, 2 Aug 2024 08:36:24 -0700 Subject: [PATCH 02/20] Minor cleanup --- RecoTracker/LSTCore/interface/LSTESData.h | 6 +- RecoTracker/LSTCore/src/LSTESData.cc | 39 ++++++++----- RecoTracker/LSTCore/src/ModuleMethods.h | 21 ++++--- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 63 ++++++++++----------- RecoTracker/LSTCore/src/alpaka/Event.h | 4 +- 5 files changed, 71 insertions(+), 62 deletions(-) diff --git a/RecoTracker/LSTCore/interface/LSTESData.h b/RecoTracker/LSTCore/interface/LSTESData.h index 321c2f371cc32..9f51be48f28b6 100644 --- a/RecoTracker/LSTCore/interface/LSTESData.h +++ b/RecoTracker/LSTCore/interface/LSTESData.h @@ -49,9 +49,11 @@ namespace cms::alpakatools { template static lst::LSTESData> copyAsync(TQueue& queue, lst::LSTESData const& srcData) { - auto deviceModulesBuffers = lst::ModulesBuffer>(alpaka::getDev(queue), srcData.nModules, srcData.nPixels); + auto deviceModulesBuffers = + lst::ModulesBuffer>(alpaka::getDev(queue), srcData.nModules, srcData.nPixels); deviceModulesBuffers.copyFromSrc(queue, srcData.modulesBuffers); - auto deviceEndcapGeometryBuffers = lst::EndcapGeometryBuffer>(alpaka::getDev(queue), srcData.nEndCapMap); + auto deviceEndcapGeometryBuffers = + lst::EndcapGeometryBuffer>(alpaka::getDev(queue), srcData.nEndCapMap); deviceEndcapGeometryBuffers.copyFromSrc(queue, srcData.endcapGeometryBuffers); return lst::LSTESData>(srcData.nModules, diff --git a/RecoTracker/LSTCore/src/LSTESData.cc b/RecoTracker/LSTCore/src/LSTESData.cc index 01210d89a82f3..36c11ea59a711 100644 --- a/RecoTracker/LSTCore/src/LSTESData.cc +++ b/RecoTracker/LSTCore/src/LSTESData.cc @@ -83,25 +83,36 @@ std::unique_ptr> lst::loadAndFillESHost() MapPLStoLayer pLStoLayer; EndcapGeometry endcapGeometry; TiltedGeometry tiltedGeometry; - auto pixelMapping = std::make_shared(); + PixelMap pixelMapping; ModuleConnectionMap moduleConnectionMap; ::loadMapsHost(pLStoLayer, endcapGeometry, tiltedGeometry, moduleConnectionMap); - auto endcapGeometryBuffers = EndcapGeometryBuffer(cms::alpakatools::host(), endcapGeometry.nEndCapMap); - std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapDetId_buf), endcapGeometry.geoMapDetId_buf.data(), endcapGeometry.nEndCapMap); - std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapPhi_buf), endcapGeometry.geoMapPhi_buf.data(), endcapGeometry.nEndCapMap); + auto endcapGeometryBuffers = + EndcapGeometryBuffer(cms::alpakatools::host(), endcapGeometry.nEndCapMap); + std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapDetId_buf), + endcapGeometry.geoMapDetId_buf.data(), + endcapGeometry.nEndCapMap); + std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapPhi_buf), + endcapGeometry.geoMapPhi_buf.data(), + endcapGeometry.nEndCapMap); auto path = get_absolute_path_after_check_file_exists(trackLooperDir() + "/data/OT800_IT615_pt0.8/sensor_centroids.bin"); auto modulesBuffers = lst::loadModulesFromFile(pLStoLayer, - path.c_str(), - nModules, - nLowerModules, - nPixels, - pixelMapping.get(), - endcapGeometry, - tiltedGeometry, - moduleConnectionMap); - return std::make_unique>( - nModules, nLowerModules, nPixels, endcapGeometry.nEndCapMap, std::move(modulesBuffers), std::move(endcapGeometryBuffers), pixelMapping); + path.c_str(), + nModules, + nLowerModules, + nPixels, + pixelMapping, + endcapGeometry, + tiltedGeometry, + moduleConnectionMap); + auto pixelMappingPtr = std::make_shared(std::move(pixelMapping)); + return std::make_unique>(nModules, + nLowerModules, + nPixels, + endcapGeometry.nEndCapMap, + std::move(modulesBuffers), + std::move(endcapGeometryBuffers), + pixelMappingPtr); } diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h index f4b74917bec13..a5b16573db57b 100644 --- a/RecoTracker/LSTCore/src/ModuleMethods.h +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -113,8 +113,7 @@ namespace lst { } }; - inline void fillMapArraysExplicit(ModulesBuffer& modulesBuf, - ModuleMetaData const& mmd) { + inline void fillMapArraysExplicit(ModulesBuffer& modulesBuf, ModuleMetaData const& mmd) { uint16_t* mapIdx = alpaka::getPtrNative(modulesBuf.mapIdx_buf); unsigned int* mapdetId = alpaka::getPtrNative(modulesBuf.mapdetId_buf); @@ -189,14 +188,14 @@ namespace lst { }; inline ModulesBuffer loadModulesFromFile(MapPLStoLayer const& pLStoLayer, - const char* moduleMetaDataFilePath, - uint16_t& nModules, - uint16_t& nLowerModules, - unsigned int& nPixels, - PixelMap* pixelMapping, - const EndcapGeometry& endcapGeometry, - const TiltedGeometry& tiltedGeometry, - const ModuleConnectionMap& moduleConnectionMap) { + const char* moduleMetaDataFilePath, + uint16_t& nModules, + uint16_t& nLowerModules, + unsigned int& nPixels, + PixelMap& pixelMapping, + const EndcapGeometry& endcapGeometry, + const TiltedGeometry& tiltedGeometry, + const ModuleConnectionMap& moduleConnectionMap) { ModuleMetaData mmd; loadCentroidsFromFile(moduleMetaDataFilePath, mmd, nModules); @@ -327,7 +326,7 @@ namespace lst { } } - fillPixelMap(modulesBuf, nModules, nPixels, *pixelMapping, pLStoLayer, mmd); + fillPixelMap(modulesBuf, nModules, nPixels, pixelMapping, pLStoLayer, mmd); *host_nModules = nModules; *host_nLowerModules = nLowerModules; diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 6d4a2073d7e7e..318622da2ce42 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -188,18 +188,17 @@ void lst::Event::addHitToEvent(std::vector const& x, WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); hitLoopKernel hit_loop_kernel; - auto const hit_loop_task( - alpaka::createTaskKernel(hit_loop_workdiv, - hit_loop_kernel, - Endcap, - TwoS, - nModules_, - nEndCapMap_, - alpaka::getPtrNative(endcapGeometryBuffers_.geoMapDetId_buf), - alpaka::getPtrNative(endcapGeometryBuffers_.geoMapPhi_buf), - *modulesBuffers_.data(), - *hitsInGPU, - nHits)); + auto const hit_loop_task(alpaka::createTaskKernel(hit_loop_workdiv, + hit_loop_kernel, + Endcap, + TwoS, + nModules_, + nEndCapMap_, + alpaka::getPtrNative(endcapGeometryBuffers_.geoMapDetId_buf), + alpaka::getPtrNative(endcapGeometryBuffers_.geoMapPhi_buf), + *modulesBuffers_.data(), + *hitsInGPU, + nHits)); alpaka::enqueue(queue, hit_loop_task); @@ -247,7 +246,7 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& } unsigned int mdSize = 2 * size; - uint16_t pixelModuleIndex = pixelMapping_->pixelModuleIndex; + uint16_t pixelModuleIndex = pixelMapping_.pixelModuleIndex; if (mdsInGPU == nullptr) { // Create a view for the element nLowerModules_ inside rangesBuffers->miniDoubletModuleOccupancy @@ -820,9 +819,9 @@ void lst::Event::createPixelTriplets() { alpaka::wait(queue); int pixelIndexOffsetPos = - pixelMapping_->connectedPixelsIndex[size_superbins - 1] + pixelMapping_->connectedPixelsSizes[size_superbins - 1]; - int pixelIndexOffsetNeg = pixelMapping_->connectedPixelsIndexPos[size_superbins - 1] + - pixelMapping_->connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; + pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; // TODO: check if a map/reduction to just eligible pLSs would speed up the kernel // the current selection still leaves a significant fraction of unmatchable pLSs @@ -838,19 +837,19 @@ void lst::Event::createPixelTriplets() { // Used pixel type to select correct size-index arrays if (pixelType == 0) { connectedPixelSize_host[i] = - pixelMapping_->connectedPixelsSizes[superbin]; // number of connected modules to this pixel - auto connectedIdxBase = pixelMapping_->connectedPixelsIndex[superbin]; + pixelMapping_.connectedPixelsSizes[superbin]; // number of connected modules to this pixel + auto connectedIdxBase = pixelMapping_.connectedPixelsIndex[superbin]; connectedPixelIndex_host[i] = connectedIdxBase; // index to get start of connected modules for this superbin in map } else if (pixelType == 1) { connectedPixelSize_host[i] = - pixelMapping_->connectedPixelsSizesPos[superbin]; // number of pixel connected modules - auto connectedIdxBase = pixelMapping_->connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos; + pixelMapping_.connectedPixelsSizesPos[superbin]; // number of pixel connected modules + auto connectedIdxBase = pixelMapping_.connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos; connectedPixelIndex_host[i] = connectedIdxBase; // index to get start of connected pixel modules } else if (pixelType == 2) { connectedPixelSize_host[i] = - pixelMapping_->connectedPixelsSizesNeg[superbin]; // number of pixel connected modules - auto connectedIdxBase = pixelMapping_->connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg; + pixelMapping_.connectedPixelsSizesNeg[superbin]; // number of pixel connected modules + auto connectedIdxBase = pixelMapping_.connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg; connectedPixelIndex_host[i] = connectedIdxBase; // index to get start of connected pixel modules } } @@ -1052,9 +1051,9 @@ void lst::Event::createPixelQuintuplets() { alpaka::wait(queue); int pixelIndexOffsetPos = - pixelMapping_->connectedPixelsIndex[size_superbins - 1] + pixelMapping_->connectedPixelsSizes[size_superbins - 1]; - int pixelIndexOffsetNeg = pixelMapping_->connectedPixelsIndexPos[size_superbins - 1] + - pixelMapping_->connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; + pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; // Loop over # pLS for (unsigned int i = 0; i < nInnerSegments; i++) { @@ -1068,18 +1067,16 @@ void lst::Event::createPixelQuintuplets() { // Used pixel type to select correct size-index arrays if (pixelType == 0) { connectedPixelSize_host[i] = - pixelMapping_->connectedPixelsSizes[superbin]; //number of connected modules to this pixel - unsigned int connectedIdxBase = pixelMapping_->connectedPixelsIndex[superbin]; + pixelMapping_.connectedPixelsSizes[superbin]; //number of connected modules to this pixel + unsigned int connectedIdxBase = pixelMapping_.connectedPixelsIndex[superbin]; connectedPixelIndex_host[i] = connectedIdxBase; } else if (pixelType == 1) { - connectedPixelSize_host[i] = - pixelMapping_->connectedPixelsSizesPos[superbin]; //number of pixel connected modules - unsigned int connectedIdxBase = pixelMapping_->connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos; + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesPos[superbin]; //number of pixel connected modules + unsigned int connectedIdxBase = pixelMapping_.connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos; connectedPixelIndex_host[i] = connectedIdxBase; } else if (pixelType == 2) { - connectedPixelSize_host[i] = - pixelMapping_->connectedPixelsSizesNeg[superbin]; //number of pixel connected modules - unsigned int connectedIdxBase = pixelMapping_->connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg; + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesNeg[superbin]; //number of pixel connected modules + unsigned int connectedIdxBase = pixelMapping_.connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg; connectedPixelIndex_host[i] = connectedIdxBase; } } diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index cbf4302dcc465..f1fa3a7d23347 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -88,7 +88,7 @@ namespace lst { const unsigned int nPixels_; const unsigned int nEndCapMap_; ModulesBuffer const& modulesBuffers_; - std::shared_ptr pixelMapping_; + PixelMap const& pixelMapping_; EndcapGeometryBuffer const& endcapGeometryBuffers_; public: @@ -103,7 +103,7 @@ namespace lst { nPixels_(deviceESData->nPixels), nEndCapMap_(deviceESData->nEndCapMap), modulesBuffers_(deviceESData->modulesBuffers), - pixelMapping_(deviceESData->pixelMapping), + pixelMapping_(*deviceESData->pixelMapping), endcapGeometryBuffers_(deviceESData->endcapGeometryBuffers) { init(verbose); } From 8f720ad0a855687648cad43780c15adb542e2b9e Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Mon, 5 Aug 2024 07:57:46 -0700 Subject: [PATCH 03/20] Fixed memcpy --- RecoTracker/LSTCore/src/LSTESData.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/RecoTracker/LSTCore/src/LSTESData.cc b/RecoTracker/LSTCore/src/LSTESData.cc index 36c11ea59a711..9079d0d229216 100644 --- a/RecoTracker/LSTCore/src/LSTESData.cc +++ b/RecoTracker/LSTCore/src/LSTESData.cc @@ -91,10 +91,10 @@ std::unique_ptr> lst::loadAndFillESHost() EndcapGeometryBuffer(cms::alpakatools::host(), endcapGeometry.nEndCapMap); std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapDetId_buf), endcapGeometry.geoMapDetId_buf.data(), - endcapGeometry.nEndCapMap); + endcapGeometry.nEndCapMap * sizeof(unsigned int)); std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapPhi_buf), endcapGeometry.geoMapPhi_buf.data(), - endcapGeometry.nEndCapMap); + endcapGeometry.nEndCapMap * sizeof(float)); auto path = get_absolute_path_after_check_file_exists(trackLooperDir() + "/data/OT800_IT615_pt0.8/sensor_centroids.bin"); From 85576445ee6b89c8675978d738d170895ce1c25a Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Fri, 2 Aug 2024 08:16:42 -0700 Subject: [PATCH 04/20] remove spurious semicolons at the end of method implementations --- RecoTracker/LSTCore/interface/Module.h | 6 +-- RecoTracker/LSTCore/src/ModuleMethods.h | 12 ++--- RecoTracker/LSTCore/src/alpaka/Hit.h | 14 +++--- RecoTracker/LSTCore/src/alpaka/Kernels.h | 14 +++--- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 16 +++---- .../LSTCore/src/alpaka/PixelQuintuplet.h | 20 ++++---- RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 6 +-- RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 46 +++++++++---------- RecoTracker/LSTCore/src/alpaka/Segment.h | 18 ++++---- .../LSTCore/src/alpaka/TrackCandidate.h | 6 +-- RecoTracker/LSTCore/src/alpaka/Triplet.h | 16 +++---- 11 files changed, 87 insertions(+), 87 deletions(-) diff --git a/RecoTracker/LSTCore/interface/Module.h b/RecoTracker/LSTCore/interface/Module.h index d45415f800a4f..78396c195cd8f 100644 --- a/RecoTracker/LSTCore/interface/Module.h +++ b/RecoTracker/LSTCore/interface/Module.h @@ -72,15 +72,15 @@ namespace lst { } else { return false; } - }; + } static bool parseIsLower(bool isInvertedx, unsigned int detId) { return (isInvertedx) ? !(detId & 1) : (detId & 1); - }; + } static unsigned int parsePartnerModuleId(unsigned int detId, bool isLowerx, bool isInvertedx) { return isLowerx ? (isInvertedx ? detId - 1 : detId + 1) : (isInvertedx ? detId + 1 : detId - 1); - }; + } template void setData(TBuff const& buf) { diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h index a5b16573db57b..54514cccf2b54 100644 --- a/RecoTracker/LSTCore/src/ModuleMethods.h +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -94,7 +94,7 @@ namespace lst { for (unsigned int icondet = 0; icondet < totalSizes_neg; icondet++) { connectedPixels[icondet + totalSizes + totalSizes_pos] = mmd.detIdToIndex.at(connectedModuleDetIds_neg[icondet]); } - }; + } inline void fillConnectedModuleArrayExplicit(ModulesBuffer& modulesBuf, ModuleMetaData const& mmd, @@ -111,7 +111,7 @@ namespace lst { moduleMap[index * max_connected_modules + i] = mmd.detIdToIndex.at(connectedModules[i]); } } - }; + } inline void fillMapArraysExplicit(ModulesBuffer& modulesBuf, ModuleMetaData const& mmd) { uint16_t* mapIdx = alpaka::getPtrNative(modulesBuf.mapIdx_buf); @@ -125,7 +125,7 @@ namespace lst { mapdetId[counter] = detId; counter++; } - }; + } inline void setDerivedQuantities(unsigned int detId, unsigned short& layer, @@ -148,7 +148,7 @@ namespace lst { r = std::sqrt(m_x * m_x + m_y * m_y + m_z * m_z); eta = ((m_z > 0) - (m_z < 0)) * std::acosh(r / std::sqrt(m_x * m_x + m_y * m_y)); - }; + } inline void loadCentroidsFromFile(const char* filePath, ModuleMetaData& mmd, uint16_t& nModules) { std::ifstream ifile(filePath, std::ios::binary); @@ -185,7 +185,7 @@ namespace lst { mmd.detIdToIndex[1] = counter; //pixel module is the last module in the module list counter++; nModules = counter; - }; + } inline ModulesBuffer loadModulesFromFile(MapPLStoLayer const& pLStoLayer, const char* moduleMetaDataFilePath, @@ -335,6 +335,6 @@ namespace lst { fillMapArraysExplicit(modulesBuf, mmd); return modulesBuf; - }; + } } // namespace lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h index c14ac26124e6d..c0eb481c73228 100644 --- a/RecoTracker/LSTCore/src/alpaka/Hit.h +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -113,7 +113,7 @@ namespace lst { float rt = alpaka::math::sqrt(acc, x * x + y * y); float eta = ((z > 0) - (z < 0)) * alpaka::math::acosh(acc, r3 / rt); return eta; - }; + } template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float phi_mpi_pi(TAcc const& acc, float x) { @@ -123,24 +123,24 @@ namespace lst { constexpr float o2pi = 1.f / (2.f * float(M_PI)); float n = alpaka::math::round(acc, x * o2pi); return x - n * float(2.f * float(M_PI)); - }; + } template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float phi(TAcc const& acc, float x, float y) { return phi_mpi_pi(acc, float(M_PI) + alpaka::math::atan2(acc, -y, -x)); - }; + } template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float deltaPhi(TAcc const& acc, float x1, float y1, float x2, float y2) { float phi1 = phi(acc, x1, y1); float phi2 = phi(acc, x2, y2); return phi_mpi_pi(acc, (phi2 - phi1)); - }; + } template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float deltaPhiChange(TAcc const& acc, float x1, float y1, float x2, float y2) { return deltaPhi(acc, x1, y1, x2 - x1, y2 - y1); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float calculate_dPhi(float phi1, float phi2) { // Calculate dPhi @@ -154,7 +154,7 @@ namespace lst { } return dPhi; - }; + } ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE int binary_search(const unsigned int* data, // Array that we are searching over unsigned int search_val, // Value we want to find in data array @@ -175,7 +175,7 @@ namespace lst { } // Couldn't find search value in array. return -1; - }; + } struct moduleRangesKernel { template diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h index 8e3fa46c3ab6f..496a3f2ce0fb2 100644 --- a/RecoTracker/LSTCore/src/alpaka/Kernels.h +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -18,23 +18,23 @@ namespace lst { unsigned int quintupletIndex, bool secondpass = false) { quintupletsInGPU.isDup[quintupletIndex] |= 1 + secondpass; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(lst::PixelTriplets& pixelTripletsInGPU, unsigned int pixelTripletIndex) { pixelTripletsInGPU.isDup[pixelTripletIndex] = true; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(lst::PixelQuintuplets& pixelQuintupletsInGPU, unsigned int pixelQuintupletIndex) { pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = true; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(lst::Segments& segmentsInGPU, unsigned int pixelSegmentArrayIndex, bool secondpass = false) { segmentsInGPU.isDup[pixelSegmentArrayIndex] |= 1 + secondpass; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, unsigned int jx, @@ -61,7 +61,7 @@ namespace lst { } } return nMatched; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitspT5(unsigned int ix, unsigned int jx, @@ -88,7 +88,7 @@ namespace lst { } } return nMatched; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void checkHitspT3(unsigned int ix, unsigned int jx, @@ -140,7 +140,7 @@ namespace lst { matched[0] = npMatched; matched[1] = nMatched; - }; + } struct removeDupQuintupletsInGPUAfterBuild { template diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index 86a22d943c33f..bdbd366bba338 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -260,7 +260,7 @@ namespace lst { mdsInGPU.outerHighEdgeY[idx] = hitsInGPU.highEdgeYs[outerHitIndex]; mdsInGPU.outerLowEdgeX[idx] = hitsInGPU.lowEdgeXs[outerHitIndex]; mdsInGPU.outerLowEdgeY[idx] = hitsInGPU.lowEdgeYs[outerHitIndex]; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules(lst::Modules const& modulesInGPU, uint16_t moduleIndex) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing @@ -280,7 +280,7 @@ namespace lst { return false; } else return false; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(struct lst::Modules const& modulesInGPU, uint16_t moduleIndex) { float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; @@ -331,7 +331,7 @@ namespace lst { } return moduleSeparation; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold( @@ -390,7 +390,7 @@ namespace lst { else { return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniLum * miniLum); } - }; + } template ALPAKA_FN_INLINE ALPAKA_FN_ACC void shiftStripHits(TAcc const& acc, @@ -556,7 +556,7 @@ namespace lst { shiftedCoords[0] = xn; shiftedCoords[1] = yn; shiftedCoords[2] = zn; - }; + } template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, @@ -628,7 +628,7 @@ namespace lst { zUpper, rtUpper); } - }; + } template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, @@ -755,7 +755,7 @@ namespace lst { } return alpaka::math::abs(acc, dPhiChange) < miniCut; - }; + } template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, @@ -866,7 +866,7 @@ namespace lst { noShiftedDphichange = noShiftedDphi / dzFrac * (1.f + dzFrac); return alpaka::math::abs(acc, dPhiChange) < miniCut; - }; + } struct createMiniDoubletsInGPUv2 { template diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index ee172f9e05f6e..fcdcd4d7c78bb 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -201,7 +201,7 @@ namespace lst { pixelQuintupletsInGPU.rzChiSquared[pixelQuintupletIndex] = rzChiSquared; pixelQuintupletsInGPU.rPhiChiSquared[pixelQuintupletIndex] = rPhiChiSquared; pixelQuintupletsInGPU.rPhiChiSquaredInwards[pixelQuintupletIndex] = rPhiChiSquaredInwards; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(lst::Modules const& modulesInGPU, uint16_t lowerModuleIndex1, @@ -291,7 +291,7 @@ namespace lst { } } return true; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredCuts(lst::Modules const& modulesInGPU, uint16_t lowerModuleIndex1, @@ -381,7 +381,7 @@ namespace lst { } } return true; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquaredpT5(TAcc const& acc, @@ -427,7 +427,7 @@ namespace lst { (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / (sigma2); } return chiSquared; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression_pT5(TAcc const& acc, @@ -513,7 +513,7 @@ namespace lst { } #endif } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquared(TAcc const& acc, @@ -536,7 +536,7 @@ namespace lst { chiSquared = computeChiSquaredpT5(acc, 5, xs, ys, delta1, delta2, slopes, isFlat, g, f, radius); return chiSquared; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquaredInwards( float g, float f, float r, float* xPix, float* yPix) { @@ -551,7 +551,7 @@ namespace lst { } chiSquared *= 0.5f; return chiSquared; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredInwardsCuts(lst::Modules const& modulesInGPU, uint16_t lowerModuleIndex1, @@ -641,7 +641,7 @@ namespace lst { } } return true; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelQuintupletDefaultAlgo(TAcc const& acc, @@ -787,7 +787,7 @@ namespace lst { centerY = (centerY + T5CenterY) / 2; return true; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RZChiSquared(TAcc const& acc, @@ -831,7 +831,7 @@ namespace lst { RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. return RMSE; - }; + } struct createPixelQuintupletsInGPUFromMapv2 { template diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 3b6faffbce426..4d6f88a8336be 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -1167,7 +1167,7 @@ namespace lst { //2nd update pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, @@ -1425,7 +1425,7 @@ namespace lst { (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); float dBeta = betaIn - betaOut; return dBeta * dBeta <= dBetaCut2; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, @@ -1689,7 +1689,7 @@ namespace lst { (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); float dBeta = betaIn - betaOut; return dBeta * dBeta <= dBetaCut2; - }; + } } // namespace lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 1165d33f6da5e..ff6e6ea8380b3 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -148,7 +148,7 @@ namespace lst { float secondMin, float secondMax) { return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(lst::Triplets const& tripletsInGPU, lst::Quintuplets& quintupletsInGPU, @@ -230,7 +230,7 @@ namespace lst { quintupletsInGPU.rzChiSquared[quintupletIndex] = rzChiSquared; quintupletsInGPU.chiSquared[quintupletIndex] = rPhiChiSquared; quintupletsInGPU.nonAnchorChiSquared[quintupletIndex] = nonAnchorChiSquared; - }; + } //90% constraint ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(lst::Modules const& modulesInGPU, @@ -313,7 +313,7 @@ namespace lst { } return true; - }; + } //bounds can be found at http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_RZFix/t5_rz_thresholds.txt template @@ -749,7 +749,7 @@ namespace lst { } } return true; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(lst::Triplets const& tripletsInGPU, @@ -764,7 +764,7 @@ namespace lst { segmentsInGPU.mdIndices[2 * outerInnerSegmentIndex]; //outer triplet inner segment inner MD index return (innerOuterOuterMiniDoubletIndex == outerInnerInnerMiniDoubletIndex); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeErrorInRadius(TAcc const& acc, @@ -796,7 +796,7 @@ namespace lst { } } } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE12378(TAcc const& acc, @@ -820,7 +820,7 @@ namespace lst { innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); - }; + } /*bounds for high Pt taken from : http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_efficiency/efficiencies/new_efficiencies/efficiencies_20210513_T5_recovering_high_Pt_efficiencies/highE_radius_matching/highE_bounds.txt */ template @@ -845,7 +845,7 @@ namespace lst { bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBBE(TAcc const& acc, @@ -869,7 +869,7 @@ namespace lst { bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE23478(TAcc const& acc, @@ -893,7 +893,7 @@ namespace lst { innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE34578(TAcc const& acc, @@ -917,7 +917,7 @@ namespace lst { innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBEEE(TAcc const& acc, @@ -947,7 +947,7 @@ namespace lst { innerInvRadiusMax, alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBEEEE(TAcc const& acc, @@ -978,7 +978,7 @@ namespace lst { alpaka::math::max(acc, innerInvRadiusMax, 1.0 / innerRadiusMin2S), alpaka::math::min(acc, bridgeInvRadiusMin, 1.0 / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0 / bridgeRadiusMin2S)); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiEEEEE(TAcc const& acc, @@ -1009,7 +1009,7 @@ namespace lst { alpaka::math::max(acc, innerInvRadiusMax, 1.0 / innerRadiusMin2S), alpaka::math::min(acc, bridgeInvRadiusMin, 1.0 / bridgeRadiusMax2S), alpaka::math::max(acc, bridgeInvRadiusMax, 1.0 / bridgeRadiusMin2S)); - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression(TAcc const& acc, @@ -1096,7 +1096,7 @@ namespace lst { #endif } } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusUsingRegression(TAcc const& acc, @@ -1194,7 +1194,7 @@ namespace lst { (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) / sigmas2[i]; } return radius; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquared(TAcc const& acc, @@ -1240,7 +1240,7 @@ namespace lst { (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / sigma2; } return chiSquared; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void runDeltaBetaIterationsT5(TAcc const& acc, @@ -1346,7 +1346,7 @@ namespace lst { //2nd update pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBBB(TAcc const& acc, @@ -1598,7 +1598,7 @@ namespace lst { float dBeta = betaIn - betaOut; return dBeta * dBeta <= dBetaCut2; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBEE(TAcc const& acc, @@ -1848,7 +1848,7 @@ namespace lst { float dBeta = betaIn - betaOut; //Cut #7: Cut on dBet return dBeta * dBeta <= dBetaCut2; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoEEEE(TAcc const& acc, @@ -2077,7 +2077,7 @@ namespace lst { float dBeta = betaIn - betaOut; //Cut #7: Cut on dBeta return dBeta * dBeta <= dBetaCut2; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletAlgoSelector(TAcc const& acc, @@ -2182,7 +2182,7 @@ namespace lst { } return false; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgo(TAcc const& acc, @@ -2533,7 +2533,7 @@ namespace lst { regressionF, regressionRadius); return true; - }; + } struct createQuintupletsInGPUv2 { template diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 6e79bacfa4902..3468a40acc3c0 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -190,7 +190,7 @@ namespace lst { return (subdet == Barrel) && (((side != Center) && (layer == 3)) || ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(short subdet, short layer, short side, short rod) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing @@ -199,7 +199,7 @@ namespace lst { return (subdet == Barrel) && (((side != Center) && (layer == 3)) || ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(short layer, short ring, short subdet, short side, short rod) { static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; @@ -229,7 +229,7 @@ namespace lst { } return moduleSeparation; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(lst::Modules const& modulesInGPU, unsigned int moduleIndex) { static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; @@ -261,7 +261,7 @@ namespace lst { } return moduleSeparation; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void dAlphaThreshold(TAcc const& acc, @@ -356,7 +356,7 @@ namespace lst { //Inner to outer dAlphaThresholdValues[2] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(lst::Segments& segmentsInGPU, unsigned int lowerMDIndex, @@ -448,7 +448,7 @@ namespace lst { segmentsInGPU.circleCenterX[pixelSegmentArrayIndex] = candidateCenterXs[bestIndex]; segmentsInGPU.circleCenterY[pixelSegmentArrayIndex] = candidateCenterYs[bestIndex]; segmentsInGPU.circleRadius[pixelSegmentArrayIndex] = circleRadius; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoBarrel(TAcc const& acc, @@ -538,7 +538,7 @@ namespace lst { if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) return false; return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoEndcap(TAcc const& acc, @@ -654,7 +654,7 @@ namespace lst { if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) return false; return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgo(TAcc const& acc, @@ -700,7 +700,7 @@ namespace lst { dPhiChangeMin, dPhiChangeMax); } - }; + } struct createSegmentsInGPUv2 { template diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index ede4dd9471e8e..dbf5cf24f6d55 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -126,7 +126,7 @@ namespace lst { trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 1] = hitIndices.z; trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 2] = hitIndices.y; trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 3] = hitIndices.w; - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(lst::TrackCandidates& trackCandidatesInGPU, short trackCandidateType, @@ -163,7 +163,7 @@ namespace lst { trackCandidatesInGPU.centerX[trackCandidateIndex] = __F2H(centerX); trackCandidatesInGPU.centerY[trackCandidateIndex] = __F2H(centerY); trackCandidatesInGPU.radius[trackCandidateIndex] = __F2H(radius); - }; + } ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits(unsigned int ix, unsigned int jx, @@ -203,7 +203,7 @@ namespace lst { npMatched++; } return npMatched; - }; + } struct crossCleanpT3 { template diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index f5a216724c1da..9f3521e712ed6 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -202,7 +202,7 @@ namespace lst { tripletsInGPU.rtOut[tripletIndex] = rtOut; tripletsInGPU.betaInCut[tripletIndex] = betaInCut; #endif - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc, @@ -262,7 +262,7 @@ namespace lst { } else { return alpaka::math::abs(acc, residual) < 5; } - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBB(TAcc const& acc, @@ -361,7 +361,7 @@ namespace lst { //Cut #3: first beta cut return alpaka::math::abs(acc, betaIn) < betaInCut; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBE(TAcc const& acc, @@ -482,7 +482,7 @@ namespace lst { //Cut #4: first beta cut return alpaka::math::abs(acc, betaInRHmin) < betaInCut; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintEEE(TAcc const& acc, @@ -605,7 +605,7 @@ namespace lst { //Cut #4: first beta cut return alpaka::math::abs(acc, betaInRHmin) < betaInCut; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc, @@ -707,7 +707,7 @@ namespace lst { betaInCut); } return false; // failsafe - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusFromThreeAnchorHits( @@ -740,7 +740,7 @@ namespace lst { radius = alpaka::math::sqrt(acc, g * g + f * f - c); return radius; - }; + } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletConstraintsAndAlgo(TAcc const& acc, @@ -806,7 +806,7 @@ namespace lst { circleRadius = computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3, circleCenterX, circleCenterY); return true; - }; + } struct createTripletsInGPUv2 { template From e8cc7a8789a6345818a9a400bd56ee10a5d39817 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Fri, 2 Aug 2024 16:20:25 -0700 Subject: [PATCH 05/20] cleanup unnecessary alpaka::wait --- RecoTracker/LSTCore/interface/Module.h | 1 - RecoTracker/LSTCore/src/alpaka/Hit.h | 1 - RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 1 - RecoTracker/LSTCore/src/alpaka/ObjectRanges.h | 1 - RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h | 1 - RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 1 - RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 1 - RecoTracker/LSTCore/src/alpaka/Segment.h | 1 - RecoTracker/LSTCore/src/alpaka/TrackCandidate.h | 1 - 9 files changed, 9 deletions(-) diff --git a/RecoTracker/LSTCore/interface/Module.h b/RecoTracker/LSTCore/interface/Module.h index 78396c195cd8f..eca086b91850f 100644 --- a/RecoTracker/LSTCore/interface/Module.h +++ b/RecoTracker/LSTCore/interface/Module.h @@ -212,7 +212,6 @@ namespace lst { alpaka::memcpy(queue, lstLayers_buf, src.lstLayers_buf); alpaka::memcpy(queue, connectedPixels_buf, src.connectedPixels_buf); } - alpaka::wait(queue); } template diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h index c0eb481c73228..7f3412ce4694a 100644 --- a/RecoTracker/LSTCore/src/alpaka/Hit.h +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -100,7 +100,6 @@ namespace lst { alpaka::memset(queue, hitRangesUpper_buf, 0xff); alpaka::memset(queue, hitRangesnLower_buf, 0xff); alpaka::memset(queue, hitRangesnUpper_buf, 0xff); - alpaka::wait(queue); } inline Hits const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index bdbd366bba338..bda334b31afc1 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -181,7 +181,6 @@ namespace lst { outerLowEdgeY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)) { alpaka::memset(queue, nMDs_buf, 0u); alpaka::memset(queue, totOccupancyMDs_buf, 0u); - alpaka::wait(queue); } inline MiniDoublets const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h index 1e1ccf8df12bc..09aac58bc8eb4 100644 --- a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h +++ b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h @@ -143,7 +143,6 @@ namespace lst { alpaka::memset(queue, trackCandidateRanges_buf, 0xff); alpaka::memset(queue, quintupletRanges_buf, 0xff); alpaka::memset(queue, quintupletModuleIndices_buf, 0xff); - alpaka::wait(queue); data_.setData(*this); } diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index fcdcd4d7c78bb..2c0b143a6d913 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -100,7 +100,6 @@ namespace lst { rPhiChiSquaredInwards_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)) { alpaka::memset(queue, nPixelQuintuplets_buf, 0u); alpaka::memset(queue, totOccupancyPixelQuintuplets_buf, 0u); - alpaka::wait(queue); } inline PixelQuintuplets const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 4d6f88a8336be..15e4456c21fc6 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -123,7 +123,6 @@ namespace lst { alpaka::memset(queue, nPixelTriplets_buf, 0u); alpaka::memset(queue, totOccupancyPixelTriplets_buf, 0u); alpaka::memset(queue, partOfPT5_buf, false); - alpaka::wait(queue); } inline PixelTriplets const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index ff6e6ea8380b3..3b700dbb94793 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -136,7 +136,6 @@ namespace lst { alpaka::memset(queue, isDup_buf, 0u); alpaka::memset(queue, TightCutFlag_buf, false); alpaka::memset(queue, partOfPT5_buf, false); - alpaka::wait(queue); } inline Quintuplets const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 3468a40acc3c0..76436778802b1 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -170,7 +170,6 @@ namespace lst { alpaka::memset(queue, totOccupancySegments_buf, 0u); alpaka::memset(queue, partOfPT5_buf, false); alpaka::memset(queue, pLSHitsIdxs_buf, 0u); - alpaka::wait(queue); } inline Segments const* data() const { return &data_; } diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index dbf5cf24f6d55..835647c65e4bd 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -102,7 +102,6 @@ namespace lst { alpaka::memset(queue, lowerModuleIndices_buf, 0u); alpaka::memset(queue, hitIndices_buf, 0u); alpaka::memset(queue, pixelSeedIndex_buf, 0); - alpaka::wait(queue); } inline TrackCandidates const* data() const { return &data_; } From 73431560c541ac40e180f4b179182bb1d336860c Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 8 Aug 2024 05:31:06 -0700 Subject: [PATCH 06/20] cleanup unnecessary alpaka::wait; add comments justifying other alpaka::wait calls; switch to cms::alpakatools::make_host_buffer for a few local buffers --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 433 +++++++++----------- RecoTracker/LSTCore/src/alpaka/Event.h | 2 +- 2 files changed, 188 insertions(+), 247 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 318622da2ce42..05b9faac480e7 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -1,3 +1,5 @@ +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + #include "Event.h" using namespace ALPAKA_ACCELERATOR_NAMESPACE; @@ -181,7 +183,7 @@ void lst::Event::addHitToEvent(std::vector const& x, alpaka::memcpy(queue, hitsBuffers->detid_buf, detId, nHits); alpaka::memcpy(queue, hitsBuffers->idxs_buf, idxInNtuple, nHits); alpaka::memcpy(queue, hitsBuffers->nHits_buf, nHits_view); - alpaka::wait(queue); + alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock1{1, 1, 256}; Vec3D const blocksPerGrid1{1, 1, max_blocks}; @@ -253,12 +255,11 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& auto dst_view_miniDoubletModuleOccupancy = alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); - // Create a source view for the value to be set - int value = n_max_pixel_md_per_modules; - auto src_view_value = alpaka::createView(devHost, &value, (Idx)1u); + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; - alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, src_view_value); - alpaka::wait(queue); + alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); Vec3D const threadsPerBlockCreateMD{1, 1, 1024}; Vec3D const blocksPerGridCreateMD{1, 1, 1}; @@ -270,22 +271,19 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); alpaka::enqueue(queue, createMDArrayRangesGPUTask); - alpaka::wait(queue); - unsigned int nTotalMDs; - auto nTotalMDs_view = alpaka::createView(devHost, &nTotalMDs, (Idx)1u); + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); + alpaka::wait(queue); // wait to get the data before manipulation - alpaka::memcpy(queue, nTotalMDs_view, rangesBuffers->device_nTotalMDs_buf); - alpaka::wait(queue); - - nTotalMDs += n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); mdsInGPU = new lst::MiniDoublets(); miniDoubletsBuffers = new lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); - alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_view); - alpaka::wait(queue); + alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_buf_h); } if (segmentsInGPU == nullptr) { // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. @@ -304,22 +302,20 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& *mdsInGPU)); alpaka::enqueue(queue, createSegmentArrayRangesTask); - alpaka::wait(queue); - auto nTotalSegments_view = alpaka::createView(devHost, &nTotalSegments, (Idx)1u); + auto nTotalSegments_view = alpaka::createView(devHost, &nTotalSegments_, (Idx)1u); alpaka::memcpy(queue, nTotalSegments_view, rangesBuffers->device_nTotalSegs_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the value before manipulation - nTotalSegments += n_max_pixel_segments_per_module; + nTotalSegments_ += n_max_pixel_segments_per_module; segmentsInGPU = new lst::Segments(); - segmentsBuffers = - new lst::SegmentsBuffer(nTotalSegments, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); + segmentsBuffers = new lst::SegmentsBuffer( + nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); alpaka::memcpy(queue, segmentsBuffers->nMemoryLocations_buf, nTotalSegments_view); - alpaka::wait(queue); } auto hitIndices0_dev = allocBufWrapper(devAcc, size, queue); @@ -366,7 +362,7 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& alpaka::createSubView(miniDoubletsBuffers->totOccupancyMDs_buf, (Idx)1u, (Idx)pixelModuleIndex); alpaka::memcpy(queue, dst_view_totOccupancyMDs, src_view_mdSize); - alpaka::wait(queue); + alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock{1, 1, 256}; Vec3D const blocksPerGrid{1, 1, max_blocks}; @@ -389,7 +385,6 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& size)); alpaka::enqueue(queue, addPixelSegmentToEvent_task); - alpaka::wait(queue); } void lst::Event::createMiniDoublets() { @@ -397,12 +392,11 @@ void lst::Event::createMiniDoublets() { auto dst_view_miniDoubletModuleOccupancy = alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); - // Create a source view for the value to be set - int value = n_max_pixel_md_per_modules; - auto src_view_value = alpaka::createView(devHost, &value, (Idx)1u); + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; - alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, src_view_value); - alpaka::wait(queue); + alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); Vec3D const threadsPerBlockCreateMD{1, 1, 1024}; Vec3D const blocksPerGridCreateMD{1, 1, 1}; @@ -414,16 +408,13 @@ void lst::Event::createMiniDoublets() { createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); alpaka::enqueue(queue, createMDArrayRangesGPUTask); - alpaka::wait(queue); - auto nTotalMDs_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTotalMDs_buf, rangesBuffers->device_nTotalMDs_buf); - alpaka::wait(queue); + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); + alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); + alpaka::wait(queue); // wait to get the data before manipulation - unsigned int nTotalMDs = *alpaka::getPtrNative(nTotalMDs_buf); - - nTotalMDs += n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); if (mdsInGPU == nullptr) { mdsInGPU = new lst::MiniDoublets(); @@ -461,7 +452,6 @@ void lst::Event::createMiniDoublets() { *hitsInGPU)); alpaka::enqueue(queue, addMiniDoubletRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addMiniDoubletsToEventExplicit(); @@ -471,8 +461,8 @@ void lst::Event::createMiniDoublets() { void lst::Event::createSegmentsWithModuleMap() { if (segmentsInGPU == nullptr) { segmentsInGPU = new lst::Segments(); - segmentsBuffers = - new lst::SegmentsBuffer(nTotalSegments, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); + segmentsBuffers = new lst::SegmentsBuffer( + nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); } @@ -505,7 +495,6 @@ void lst::Event::createSegmentsWithModuleMap() { *rangesInGPU)); alpaka::enqueue(queue, addSegmentRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addSegmentsToEventExplicit(); @@ -527,46 +516,40 @@ void lst::Event::createTriplets() { *segmentsInGPU)); alpaka::enqueue(queue, createTripletArrayRangesTask); - alpaka::wait(queue); // TODO: Why are we pulling this back down only to put it back on the device in a new struct? - auto maxTriplets_buf = allocBufWrapper(devHost, 1, queue); + auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - alpaka::memcpy(queue, maxTriplets_buf, rangesBuffers->device_nTotalTrips_buf); - alpaka::wait(queue); + alpaka::memcpy(queue, maxTriplets_buf_h, rangesBuffers->device_nTotalTrips_buf); + alpaka::wait(queue); // wait to get the value before using it tripletsInGPU = new lst::Triplets(); - tripletsBuffers = - new lst::TripletsBuffer(*alpaka::getPtrNative(maxTriplets_buf), nLowerModules_, devAcc, queue); + tripletsBuffers = new lst::TripletsBuffer(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); tripletsInGPU->setData(*tripletsBuffers); - alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf); - alpaka::wait(queue); + alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf_h); } uint16_t nonZeroModules = 0; unsigned int max_InnerSeg = 0; - // Allocate host index - auto index_buf = allocBufWrapper(devHost, nLowerModules_, queue); - uint16_t* index = alpaka::getPtrNative(index_buf); + // Allocate and copy nSegments from device to host (only nLowerModules in OT, not the +1 with pLSs) + auto nSegments_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); + alpaka::memcpy(queue, nSegments_buf_h, segmentsBuffers->nSegments_buf, nLowerModules_); - // Allocate device index - auto index_gpu_buf = allocBufWrapper(devAcc, nLowerModules_, queue); + // ... same for module_nConnectedModules + // FIXME: replace by ES host data + auto module_nConnectedModules_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); + alpaka::memcpy(queue, module_nConnectedModules_buf_h, modulesBuffers_.nConnectedModules_buf, nLowerModules_); - // Allocate and copy nSegments from device to host - auto nSegments_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, nSegments_buf, segmentsBuffers->nSegments_buf, nLowerModules_); - alpaka::wait(queue); + alpaka::wait(queue); // wait for nSegments and module_nConnectedModules before using - unsigned int* nSegments = alpaka::getPtrNative(nSegments_buf); + auto const* nSegments = nSegments_buf_h.data(); + auto const* module_nConnectedModules = module_nConnectedModules_buf_h.data(); - // Allocate and copy module_nConnectedModules from device to host - auto module_nConnectedModules_buf = allocBufWrapper(devHost, nLowerModules_, queue); - alpaka::memcpy(queue, module_nConnectedModules_buf, modulesBuffers_.nConnectedModules_buf, nLowerModules_); - alpaka::wait(queue); - - uint16_t* module_nConnectedModules = alpaka::getPtrNative(module_nConnectedModules_buf); + // Allocate host index and fill it directly + auto index_buf_h = cms::alpakatools::make_host_buffer(queue, nLowerModules_); + auto* index = index_buf_h.data(); for (uint16_t innerLowerModuleIndex = 0; innerLowerModuleIndex < nLowerModules_; innerLowerModuleIndex++) { uint16_t nConnectedModules = module_nConnectedModules[innerLowerModuleIndex]; @@ -578,9 +561,9 @@ void lst::Event::createTriplets() { max_InnerSeg = std::max(max_InnerSeg, nInnerSegments); } - // Copy index from host to device - alpaka::memcpy(queue, index_gpu_buf, index_buf, nonZeroModules); - alpaka::wait(queue); + // Allocate and copy to device index + auto index_gpu_buf = allocBufWrapper(devAcc, nLowerModules_, queue); + alpaka::memcpy(queue, index_gpu_buf, index_buf_h, nonZeroModules); Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; @@ -614,7 +597,6 @@ void lst::Event::createTriplets() { *rangesInGPU)); alpaka::enqueue(queue, addTripletRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addTripletsToEventExplicit(); @@ -629,12 +611,6 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ trackCandidatesInGPU->setData(*trackCandidatesBuffers); } - // Pull nEligibleT5Modules from the device. - auto nEligibleModules_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nEligibleModules_buf, rangesBuffers->nEligibleT5Modules_buf); - alpaka::wait(queue); - uint16_t nEligibleModules = *alpaka::getPtrNative(nEligibleModules_buf); - Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; Vec3D const blocksPerGrid_crossCleanpT3{1, 4, 20}; WorkDiv3D const crossCleanpT3_workDiv = @@ -667,6 +643,12 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ alpaka::enqueue(queue, addpT3asTrackCandidatesInGPUTask); + // Pull nEligibleT5Modules from the device. + auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nEligibleModules_buf_h, rangesBuffers->nEligibleT5Modules_buf); + alpaka::wait(queue); // wait to get the value before using + auto const nEligibleModules = *nEligibleModules_buf_h.data(); + Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = @@ -768,12 +750,12 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ alpaka::memcpy(queue, nTrackCanpT3Host_buf, trackCandidatesBuffers->nTrackCandidatespT3_buf); alpaka::memcpy(queue, nTrackCanpLSHost_buf, trackCandidatesBuffers->nTrackCandidatespLS_buf); alpaka::memcpy(queue, nTrackCanT5Host_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the values before using them - int nTrackCandidatespT5 = *alpaka::getPtrNative(nTrackCanpT5Host_buf); - int nTrackCandidatespT3 = *alpaka::getPtrNative(nTrackCanpT3Host_buf); - int nTrackCandidatespLS = *alpaka::getPtrNative(nTrackCanpLSHost_buf); - int nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCanT5Host_buf); + auto nTrackCandidatespT5 = *alpaka::getPtrNative(nTrackCanpT5Host_buf); + auto nTrackCandidatespT3 = *alpaka::getPtrNative(nTrackCanpT3Host_buf); + auto nTrackCandidatespLS = *alpaka::getPtrNative(nTrackCanpLSHost_buf); + auto nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCanT5Host_buf); if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { printf( @@ -792,31 +774,30 @@ void lst::Event::createPixelTriplets() { pixelTripletsInGPU->setData(*pixelTripletsBuffers); } + auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + + alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); + alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + unsigned int nInnerSegments; auto nInnerSegments_src_view = alpaka::createView(devHost, &nInnerSegments, (size_t)1u); + // Create a sub-view for the device buffer auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); - alpaka::wait(queue); - - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - - alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); - alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - int* superbins = alpaka::getPtrNative(superbins_buf); - int8_t* pixelTypes = alpaka::getPtrNative(pixelTypes_buf); unsigned int* connectedPixelSize_host = alpaka::getPtrNative(connectedPixelSize_host_buf); unsigned int* connectedPixelIndex_host = alpaka::getPtrNative(connectedPixelIndex_host_buf); - alpaka::wait(queue); int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; @@ -856,7 +837,6 @@ void lst::Event::createPixelTriplets() { alpaka::memcpy(queue, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); - alpaka::wait(queue); Vec3D const threadsPerBlock{1, 4, 32}; Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; @@ -878,13 +858,12 @@ void lst::Event::createPixelTriplets() { nInnerSegments)); alpaka::enqueue(queue, createPixelTripletsInGPUFromMapv2Task); - alpaka::wait(queue); #ifdef WARNINGS auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the value before using it std::cout << "number of pixel triplets = " << *alpaka::getPtrNative(nPixelTriplets_buf) << std::endl; #endif @@ -901,7 +880,6 @@ void lst::Event::createPixelTriplets() { removeDupPixelTripletsInGPUFromMap_workDiv, removeDupPixelTripletsInGPUFromMap_kernel, *pixelTripletsInGPU)); alpaka::enqueue(queue, removeDupPixelTripletsInGPUFromMapTask); - alpaka::wait(queue); } void lst::Event::createQuintuplets() { @@ -919,17 +897,16 @@ void lst::Event::createQuintuplets() { *rangesInGPU)); alpaka::enqueue(queue, createEligibleModulesListForQuintupletsGPUTask); - alpaka::wait(queue); auto nEligibleT5Modules_buf = allocBufWrapper(devHost, 1, queue); auto nTotalQuintuplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nEligibleT5Modules_buf, rangesBuffers->nEligibleT5Modules_buf); alpaka::memcpy(queue, nTotalQuintuplets_buf, rangesBuffers->device_nTotalQuints_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait for the values before using them - uint16_t nEligibleT5Modules = *alpaka::getPtrNative(nEligibleT5Modules_buf); - unsigned int nTotalQuintuplets = *alpaka::getPtrNative(nTotalQuintuplets_buf); + auto nEligibleT5Modules = *nEligibleT5Modules_buf.data(); + auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); if (quintupletsInGPU == nullptr) { quintupletsInGPU = new lst::Quintuplets(); @@ -937,7 +914,6 @@ void lst::Event::createQuintuplets() { quintupletsInGPU->setData(*quintupletsBuffers); alpaka::memcpy(queue, quintupletsBuffers->nMemoryLocations_buf, nTotalQuintuplets_buf); - alpaka::wait(queue); } Vec3D const threadsPerBlockQuints{1, 8, 32}; @@ -987,7 +963,6 @@ void lst::Event::createQuintuplets() { *rangesInGPU)); alpaka::enqueue(queue, addQuintupletRangesToEventExplicitTask); - alpaka::wait(queue); if (addObjects) { addQuintupletsToEventExplicit(); @@ -1006,7 +981,6 @@ void lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, false)); alpaka::enqueue(queue, checkHitspLSTask); - alpaka::wait(queue); } } @@ -1023,6 +997,14 @@ void lst::Event::createPixelQuintuplets() { trackCandidatesInGPU->setData(*trackCandidatesBuffers); } + auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + + alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); + alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + unsigned int nInnerSegments; auto nInnerSegments_src_view = alpaka::createView(devHost, &nInnerSegments, (size_t)1u); @@ -1030,25 +1012,15 @@ void lst::Event::createPixelQuintuplets() { auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); - alpaka::wait(queue); - - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - - alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); - alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - int* superbins = alpaka::getPtrNative(superbins_buf); - int8_t* pixelTypes = alpaka::getPtrNative(pixelTypes_buf); - unsigned int* connectedPixelSize_host = alpaka::getPtrNative(connectedPixelSize_host_buf); - unsigned int* connectedPixelIndex_host = alpaka::getPtrNative(connectedPixelIndex_host_buf); - alpaka::wait(queue); + auto* connectedPixelSize_host = connectedPixelSize_host_buf.data(); + auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; @@ -1083,7 +1055,6 @@ void lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); - alpaka::wait(queue); Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; @@ -1135,13 +1106,12 @@ void lst::Event::createPixelQuintuplets() { *rangesInGPU)); alpaka::enqueue(queue, addpT5asTrackCandidateInGPUTask); - alpaka::wait(queue); #ifdef WARNINGS auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get the value before using it std::cout << "number of pixel quintuplets = " << *alpaka::getPtrNative(nPixelQuintuplets_buf) << std::endl; #endif @@ -1151,6 +1121,7 @@ void lst::Event::addMiniDoubletsToEventExplicit() { auto nMDsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nMDsCPU_buf, miniDoubletsBuffers->nMDs_buf, nLowerModules_); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); @@ -1160,12 +1131,12 @@ void lst::Event::addMiniDoubletsToEventExplicit() { auto module_hitRanges_buf = allocBufWrapper(devHost, nLowerModules_ * 2, queue); alpaka::memcpy(queue, module_hitRanges_buf, hitsBuffers->hitRanges_buf, nLowerModules_ * 2u); - alpaka::wait(queue); + alpaka::wait(queue); // wait for inputs before using them - unsigned int* nMDsCPU = alpaka::getPtrNative(nMDsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); - int* module_hitRanges = alpaka::getPtrNative(module_hitRanges_buf); + auto const* nMDsCPU = nMDsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_hitRanges = module_hitRanges_buf.data(); for (unsigned int i = 0; i < nLowerModules_; i++) { if (!(nMDsCPU[i] == 0 or module_hitRanges[i * 2] == -1)) { @@ -1182,17 +1153,18 @@ void lst::Event::addSegmentsToEventExplicit() { auto nSegmentsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nSegmentsCPU_buf, segmentsBuffers->nSegments_buf, nLowerModules_); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - alpaka::wait(queue); + alpaka::wait(queue); // wait for inputs before using them - unsigned int* nSegmentsCPU = alpaka::getPtrNative(nSegmentsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); + auto const* nSegmentsCPU = nSegmentsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); for (unsigned int i = 0; i < nLowerModules_; i++) { if (!(nSegmentsCPU[i] == 0)) { @@ -1209,6 +1181,7 @@ void lst::Event::addQuintupletsToEventExplicit() { auto nQuintupletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nQuintupletsCPU_buf, quintupletsBuffers->nQuintuplets_buf); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); @@ -1218,12 +1191,12 @@ void lst::Event::addQuintupletsToEventExplicit() { auto module_quintupletModuleIndices_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_quintupletModuleIndices_buf, rangesBuffers->quintupletModuleIndices_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait for inputs before using them - unsigned int* nQuintupletsCPU = alpaka::getPtrNative(nQuintupletsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); - int* module_quintupletModuleIndices = alpaka::getPtrNative(module_quintupletModuleIndices_buf); + auto const* nQuintupletsCPU = nQuintupletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_quintupletModuleIndices = module_quintupletModuleIndices_buf.data(); for (uint16_t i = 0; i < nLowerModules_; i++) { if (!(nQuintupletsCPU[i] == 0 or module_quintupletModuleIndices[i] == -1)) { @@ -1240,16 +1213,18 @@ void lst::Event::addTripletsToEventExplicit() { auto nTripletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nTripletsCPU_buf, tripletsBuffers->nTriplets_buf); + // FIXME: replace by ES host data auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - alpaka::wait(queue); - unsigned int* nTripletsCPU = alpaka::getPtrNative(nTripletsCPU_buf); - short* module_subdets = alpaka::getPtrNative(module_subdets_buf); - short* module_layers = alpaka::getPtrNative(module_layers_buf); + alpaka::wait(queue); // wait for inputs before using them + + auto const* nTripletsCPU = nTripletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); for (uint16_t i = 0; i < nLowerModules_; i++) { if (nTripletsCPU[i] != 0) { @@ -1371,25 +1346,19 @@ unsigned int lst::Event::getNumberOfTripletsByLayerEndcap(unsigned int la } int lst::Event::getNumberOfPixelTriplets() { - auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); + alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); - int nPixelTriplets = *alpaka::getPtrNative(nPixelTriplets_buf); - - return nPixelTriplets; + return *nPixelTriplets_buf_h.data(); } int lst::Event::getNumberOfPixelQuintuplets() { - auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nPixelQuintuplets = *alpaka::getPtrNative(nPixelQuintuplets_buf); + alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - return nPixelQuintuplets; + return *nPixelQuintuplets_buf_h.data(); } unsigned int lst::Event::getNumberOfQuintuplets() { @@ -1420,110 +1389,90 @@ unsigned int lst::Event::getNumberOfQuintupletsByLayerEndcap(unsigned int } int lst::Event::getNumberOfTrackCandidates() { - auto nTrackCandidates_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nTrackCandidates_buf, trackCandidatesBuffers->nTrackCandidates_buf); - alpaka::wait(queue); - - int nTrackCandidates = *alpaka::getPtrNative(nTrackCandidates_buf); + alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); - return nTrackCandidates; + return *nTrackCandidates_buf_h.data(); } int lst::Event::getNumberOfPT5TrackCandidates() { - auto nTrackCandidatesPT5_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - alpaka::memcpy(queue, nTrackCandidatesPT5_buf, trackCandidatesBuffers->nTrackCandidatespT5_buf); + alpaka::memcpy(queue, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers->nTrackCandidatespT5_buf); alpaka::wait(queue); - int nTrackCandidatesPT5 = *alpaka::getPtrNative(nTrackCandidatesPT5_buf); - - return nTrackCandidatesPT5; + return *nTrackCandidatesPT5_buf_h.data(); } int lst::Event::getNumberOfPT3TrackCandidates() { - auto nTrackCandidatesPT3_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidatesPT3_buf, trackCandidatesBuffers->nTrackCandidatespT3_buf); - alpaka::wait(queue); + auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nTrackCandidatesPT3 = *alpaka::getPtrNative(nTrackCandidatesPT3_buf); + alpaka::memcpy(queue, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers->nTrackCandidatespT3_buf); - return nTrackCandidatesPT3; + return *nTrackCandidatesPT3_buf_h.data(); } int lst::Event::getNumberOfPLSTrackCandidates() { - auto nTrackCandidatesPLS_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidatesPLS_buf, trackCandidatesBuffers->nTrackCandidatespLS_buf); - alpaka::wait(queue); + auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - unsigned int nTrackCandidatesPLS = *alpaka::getPtrNative(nTrackCandidatesPLS_buf); + alpaka::memcpy(queue, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers->nTrackCandidatespLS_buf); - return nTrackCandidatesPLS; + return *nTrackCandidatesPLS_buf_h.data(); } int lst::Event::getNumberOfPixelTrackCandidates() { - auto nTrackCandidates_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCandidatesT5_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidates_buf, trackCandidatesBuffers->nTrackCandidates_buf); - alpaka::memcpy(queue, nTrackCandidatesT5_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nTrackCandidates = *alpaka::getPtrNative(nTrackCandidates_buf); - int nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCandidatesT5_buf); + alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); + alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); - return nTrackCandidates - nTrackCandidatesT5; + return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); } int lst::Event::getNumberOfT5TrackCandidates() { - auto nTrackCandidatesT5_buf = allocBufWrapper(devHost, 1, queue); - - alpaka::memcpy(queue, nTrackCandidatesT5_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); - alpaka::wait(queue); + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); - int nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCandidatesT5_buf); + alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); - return nTrackCandidatesT5; + return *nTrackCandidatesT5_buf_h.data(); } lst::HitsBuffer* lst::Event::getHits() //std::shared_ptr should take care of garbage collection { if (hitsInCPU == nullptr) { - auto nHits_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nHits_buf, hitsBuffers->nHits_buf); - alpaka::wait(queue); + auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nHits = *alpaka::getPtrNative(nHits_buf); + auto const nHits = *nHits_buf_h.data(); hitsInCPU = new lst::HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); - *alpaka::getPtrNative(hitsInCPU->nHits_buf) = nHits; + *hitsInCPU->nHits_buf.data() = nHits; alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->detid_buf, hitsBuffers->detid_buf, nHits); alpaka::memcpy(queue, hitsInCPU->xs_buf, hitsBuffers->xs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->ys_buf, hitsBuffers->ys_buf, nHits); alpaka::memcpy(queue, hitsInCPU->zs_buf, hitsBuffers->zs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->moduleIndices_buf, hitsBuffers->moduleIndices_buf, nHits); - alpaka::wait(queue); } return hitsInCPU; } lst::HitsBuffer* lst::Event::getHitsInCMSSW() { if (hitsInCPU == nullptr) { - auto nHits_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nHits_buf, hitsBuffers->nHits_buf); - alpaka::wait(queue); + auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nHits = *alpaka::getPtrNative(nHits_buf); + auto const nHits = *nHits_buf_h.data(); hitsInCPU = new lst::HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); - *alpaka::getPtrNative(hitsInCPU->nHits_buf) = nHits; + *hitsInCPU->nHits_buf.data() = nHits; alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits); - alpaka::wait(queue); } return hitsInCPU; } @@ -1538,7 +1487,7 @@ lst::ObjectRangesBuffer* lst::Event::getRanges() { alpaka::memcpy(queue, rangesInCPU->miniDoubletModuleIndices_buf, rangesBuffers->miniDoubletModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->segmentModuleIndices_buf, rangesBuffers->segmentModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->tripletModuleIndices_buf, rangesBuffers->tripletModuleIndices_buf); - alpaka::wait(queue); + alpaka::wait(queue); // wait to get completed host data } return rangesInCPU; } @@ -1546,21 +1495,20 @@ lst::ObjectRangesBuffer* lst::Event::getRanges() { lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { if (mdsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based mdsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, miniDoubletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, miniDoubletsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); mdsInCPU = new lst::MiniDoubletsBuffer(nMemHost, nLowerModules_, devHost, queue); mdsInCPU->setData(*mdsInCPU); - *alpaka::getPtrNative(mdsInCPU->nMemoryLocations_buf) = nMemHost; + *mdsInCPU->nMemoryLocations_buf.data() = nMemHost; alpaka::memcpy(queue, mdsInCPU->anchorHitIndices_buf, miniDoubletsBuffers->anchorHitIndices_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->outerHitIndices_buf, miniDoubletsBuffers->outerHitIndices_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->dphichanges_buf, miniDoubletsBuffers->dphichanges_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->nMDs_buf, miniDoubletsBuffers->nMDs_buf); alpaka::memcpy(queue, mdsInCPU->totOccupancyMDs_buf, miniDoubletsBuffers->totOccupancyMDs_buf); - alpaka::wait(queue); } return mdsInCPU; } @@ -1568,16 +1516,16 @@ lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { lst::SegmentsBuffer* lst::Event::getSegments() { if (segmentsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based segmentsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, segmentsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, segmentsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); segmentsInCPU = new lst::SegmentsBuffer(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, devHost, queue); segmentsInCPU->setData(*segmentsInCPU); - *alpaka::getPtrNative(segmentsInCPU->nMemoryLocations_buf) = nMemHost; + *segmentsInCPU->nMemoryLocations_buf.data() = nMemHost; alpaka::memcpy(queue, segmentsInCPU->nSegments_buf, segmentsBuffers->nSegments_buf); alpaka::memcpy(queue, segmentsInCPU->mdIndices_buf, segmentsBuffers->mdIndices_buf, 2u * nMemHost); alpaka::memcpy(queue, @@ -1596,7 +1544,6 @@ lst::SegmentsBuffer* lst::Event::getSegments() { alpaka::memcpy(queue, segmentsInCPU->isDup_buf, segmentsBuffers->isDup_buf); alpaka::memcpy(queue, segmentsInCPU->isQuad_buf, segmentsBuffers->isQuad_buf); alpaka::memcpy(queue, segmentsInCPU->score_buf, segmentsBuffers->score_buf); - alpaka::wait(queue); } return segmentsInCPU; } @@ -1604,15 +1551,15 @@ lst::SegmentsBuffer* lst::Event::getSegments() { lst::TripletsBuffer* lst::Event::getTriplets() { if (tripletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based tripletsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, tripletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, tripletsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); tripletsInCPU = new lst::TripletsBuffer(nMemHost, nLowerModules_, devHost, queue); tripletsInCPU->setData(*tripletsInCPU); - *alpaka::getPtrNative(tripletsInCPU->nMemoryLocations_buf) = nMemHost; + *tripletsInCPU->nMemoryLocations_buf.data() = nMemHost; #ifdef CUT_VALUE_DEBUG alpaka::memcpy(queue, tripletsInCPU->zOut_buf, tripletsBuffers->zOut_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->zLo_buf, tripletsBuffers->zLo_buf, nMemHost); @@ -1632,7 +1579,6 @@ lst::TripletsBuffer* lst::Event::getTriplets() { alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->nTriplets_buf, tripletsBuffers->nTriplets_buf); alpaka::memcpy(queue, tripletsInCPU->totOccupancyTriplets_buf, tripletsBuffers->totOccupancyTriplets_buf); - alpaka::wait(queue); } return tripletsInCPU; } @@ -1640,15 +1586,15 @@ lst::TripletsBuffer* lst::Event::getTriplets() { lst::QuintupletsBuffer* lst::Event::getQuintuplets() { if (quintupletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based quintupletsInCPU - auto nMemHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nMemHost_buf, quintupletsBuffers->nMemoryLocations_buf); - alpaka::wait(queue); + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nMemHost_buf_h, quintupletsBuffers->nMemoryLocations_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nMemHost = *alpaka::getPtrNative(nMemHost_buf); + auto const nMemHost = *nMemHost_buf_h.data(); quintupletsInCPU = new lst::QuintupletsBuffer(nMemHost, nLowerModules_, devHost, queue); quintupletsInCPU->setData(*quintupletsInCPU); - *alpaka::getPtrNative(quintupletsInCPU->nMemoryLocations_buf) = nMemHost; + *quintupletsInCPU->nMemoryLocations_buf.data() = nMemHost; alpaka::memcpy(queue, quintupletsInCPU->nQuintuplets_buf, quintupletsBuffers->nQuintuplets_buf); alpaka::memcpy( queue, quintupletsInCPU->totOccupancyQuintuplets_buf, quintupletsBuffers->totOccupancyQuintuplets_buf); @@ -1668,7 +1614,6 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets() { alpaka::memcpy(queue, quintupletsInCPU->rzChiSquared_buf, quintupletsBuffers->rzChiSquared_buf, nMemHost); alpaka::memcpy( queue, quintupletsInCPU->nonAnchorChiSquared_buf, quintupletsBuffers->nonAnchorChiSquared_buf, nMemHost); - alpaka::wait(queue); } return quintupletsInCPU; } @@ -1676,15 +1621,15 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets() { lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { if (pixelTripletsInCPU == nullptr) { // Get nPixelTriplets parameter to initialize host based quintupletsInCPU - auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); - alpaka::wait(queue); + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nPixelTriplets = *alpaka::getPtrNative(nPixelTriplets_buf); + auto const nPixelTriplets = *nPixelTriplets_buf_h.data(); pixelTripletsInCPU = new lst::PixelTripletsBuffer(nPixelTriplets, devHost, queue); pixelTripletsInCPU->setData(*pixelTripletsInCPU); - *alpaka::getPtrNative(pixelTripletsInCPU->nPixelTriplets_buf) = nPixelTriplets; + *pixelTripletsInCPU->nPixelTriplets_buf.data() = nPixelTriplets; alpaka::memcpy( queue, pixelTripletsInCPU->totOccupancyPixelTriplets_buf, pixelTripletsBuffers->totOccupancyPixelTriplets_buf); alpaka::memcpy(queue, pixelTripletsInCPU->rzChiSquared_buf, pixelTripletsBuffers->rzChiSquared_buf, nPixelTriplets); @@ -1707,7 +1652,6 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { alpaka::memcpy(queue, pixelTripletsInCPU->eta_buf, pixelTripletsBuffers->eta_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->phi_buf, pixelTripletsBuffers->phi_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->score_buf, pixelTripletsBuffers->score_buf, nPixelTriplets); - alpaka::wait(queue); } return pixelTripletsInCPU; } @@ -1715,15 +1659,15 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { if (pixelQuintupletsInCPU == nullptr) { // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU - auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); - alpaka::wait(queue); + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nPixelQuintuplets = *alpaka::getPtrNative(nPixelQuintuplets_buf); + auto const nPixelQuintuplets = *nPixelQuintuplets_buf_h.data(); pixelQuintupletsInCPU = new lst::PixelQuintupletsBuffer(nPixelQuintuplets, devHost, queue); pixelQuintupletsInCPU->setData(*pixelQuintupletsInCPU); - *alpaka::getPtrNative(pixelQuintupletsInCPU->nPixelQuintuplets_buf) = nPixelQuintuplets; + *pixelQuintupletsInCPU->nPixelQuintuplets_buf.data() = nPixelQuintuplets; alpaka::memcpy(queue, pixelQuintupletsInCPU->totOccupancyPixelQuintuplets_buf, pixelQuintupletsBuffers->totOccupancyPixelQuintuplets_buf); @@ -1743,7 +1687,6 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { queue, pixelQuintupletsInCPU->T5Indices_buf, pixelQuintupletsBuffers->T5Indices_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->isDup_buf, pixelQuintupletsBuffers->isDup_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->score_buf, pixelQuintupletsBuffers->score_buf, nPixelQuintuplets); - alpaka::wait(queue); } return pixelQuintupletsInCPU; } @@ -1751,16 +1694,16 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU - auto nTrackCanHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nTrackCanHost_buf, trackCandidatesBuffers->nTrackCandidates_buf); + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nTrackCanHost_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); alpaka::wait(queue); - unsigned int nTrackCanHost = *alpaka::getPtrNative(nTrackCanHost_buf); + auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); trackCandidatesInCPU = new lst::TrackCandidatesBuffer( n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); - *alpaka::getPtrNative(trackCandidatesInCPU->nTrackCandidates_buf) = nTrackCanHost; + *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, @@ -1781,7 +1724,6 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); - alpaka::wait(queue); } return trackCandidatesInCPU; } @@ -1789,16 +1731,16 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSSW() { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU - auto nTrackCanHost_buf = allocBufWrapper(devHost, 1, queue); - alpaka::memcpy(queue, nTrackCanHost_buf, trackCandidatesBuffers->nTrackCandidates_buf); - alpaka::wait(queue); + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); + alpaka::memcpy(queue, nTrackCanHost_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); + alpaka::wait(queue); // wait for the value before using - unsigned int nTrackCanHost = *alpaka::getPtrNative(nTrackCanHost_buf); + auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); trackCandidatesInCPU = new lst::TrackCandidatesBuffer( n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); - *alpaka::getPtrNative(trackCandidatesInCPU->nTrackCandidates_buf) = nTrackCanHost; + *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, @@ -1809,7 +1751,6 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSS trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); - alpaka::wait(queue); } return trackCandidatesInCPU; } diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index f1fa3a7d23347..7e2a351a8b699 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -44,9 +44,9 @@ namespace lst { std::array n_trackCandidates_by_layer_endcap_; std::array n_quintuplets_by_layer_barrel_; std::array n_quintuplets_by_layer_endcap_; + unsigned int nTotalSegments_; //Device stuff - unsigned int nTotalSegments; ObjectRanges* rangesInGPU; ObjectRangesBuffer* rangesBuffers; Hits* hitsInGPU; From 4ef678d1c691665c37506f39c2274afb0bfef00a Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 8 Aug 2024 06:21:53 -0700 Subject: [PATCH 07/20] switch to using alpaka::exec --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 478 +++++++++----------- 1 file changed, 215 insertions(+), 263 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 05b9faac480e7..82a7f44a268b9 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -190,32 +190,26 @@ void lst::Event::addHitToEvent(std::vector const& x, WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); hitLoopKernel hit_loop_kernel; - auto const hit_loop_task(alpaka::createTaskKernel(hit_loop_workdiv, - hit_loop_kernel, - Endcap, - TwoS, - nModules_, - nEndCapMap_, - alpaka::getPtrNative(endcapGeometryBuffers_.geoMapDetId_buf), - alpaka::getPtrNative(endcapGeometryBuffers_.geoMapPhi_buf), - *modulesBuffers_.data(), - *hitsInGPU, - nHits)); - - alpaka::enqueue(queue, hit_loop_task); + alpaka::exec(queue, + hit_loop_workdiv, + hit_loop_kernel, + Endcap, + TwoS, + nModules_, + nEndCapMap_, + alpaka::getPtrNative(endcapGeometryBuffers_.geoMapDetId_buf), + alpaka::getPtrNative(endcapGeometryBuffers_.geoMapPhi_buf), + *modulesBuffers_.data(), + *hitsInGPU, + nHits); Vec3D const threadsPerBlock2{1, 1, 256}; Vec3D const blocksPerGrid2{1, 1, max_blocks}; WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); moduleRangesKernel module_ranges_kernel; - auto const module_ranges_task(alpaka::createTaskKernel( - module_ranges_workdiv, module_ranges_kernel, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_)); - - // Waiting isn't needed after second kernel call. Saves ~100 us. - // This is because addPixelSegmentToEvent (which is run next) doesn't rely on hitsBuffers->hitrange variables. - // Also, modulesInGPU->partnerModuleIndices is not alterned in addPixelSegmentToEvent. - alpaka::enqueue(queue, module_ranges_task); + alpaka::exec( + queue, module_ranges_workdiv, module_ranges_kernel, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); } void lst::Event::addPixelSegmentToEvent(std::vector const& hitIndices0, @@ -267,10 +261,8 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread); lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; - auto const createMDArrayRangesGPUTask(alpaka::createTaskKernel( - createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); - - alpaka::enqueue(queue, createMDArrayRangesGPUTask); + alpaka::exec( + queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); @@ -295,13 +287,12 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); lst::createSegmentArrayRanges createSegmentArrayRanges_kernel; - auto const createSegmentArrayRangesTask(alpaka::createTaskKernel(createSegmentArrayRanges_workDiv, - createSegmentArrayRanges_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *mdsInGPU)); - - alpaka::enqueue(queue, createSegmentArrayRangesTask); + alpaka::exec(queue, + createSegmentArrayRanges_workDiv, + createSegmentArrayRanges_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *mdsInGPU); auto nTotalSegments_view = alpaka::createView(devHost, &nTotalSegments_, (Idx)1u); @@ -369,22 +360,21 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); addPixelSegmentToEventKernel addPixelSegmentToEvent_kernel; - auto const addPixelSegmentToEvent_task(alpaka::createTaskKernel(addPixelSegmentToEvent_workdiv, - addPixelSegmentToEvent_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *hitsInGPU, - *mdsInGPU, - *segmentsInGPU, - alpaka::getPtrNative(hitIndices0_dev), - alpaka::getPtrNative(hitIndices1_dev), - alpaka::getPtrNative(hitIndices2_dev), - alpaka::getPtrNative(hitIndices3_dev), - alpaka::getPtrNative(dPhiChange_dev), - pixelModuleIndex, - size)); - - alpaka::enqueue(queue, addPixelSegmentToEvent_task); + alpaka::exec(queue, + addPixelSegmentToEvent_workdiv, + addPixelSegmentToEvent_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *hitsInGPU, + *mdsInGPU, + *segmentsInGPU, + alpaka::getPtrNative(hitIndices0_dev), + alpaka::getPtrNative(hitIndices1_dev), + alpaka::getPtrNative(hitIndices2_dev), + alpaka::getPtrNative(hitIndices3_dev), + alpaka::getPtrNative(dPhiChange_dev), + pixelModuleIndex, + size); } void lst::Event::createMiniDoublets() { @@ -404,10 +394,8 @@ void lst::Event::createMiniDoublets() { createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread); lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; - auto const createMDArrayRangesGPUTask(alpaka::createTaskKernel( - createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU)); - - alpaka::enqueue(queue, createMDArrayRangesGPUTask); + alpaka::exec( + queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); @@ -428,14 +416,13 @@ void lst::Event::createMiniDoublets() { createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, elementsPerThread); lst::createMiniDoubletsInGPUv2 createMiniDoubletsInGPUv2_kernel; - auto const createMiniDoubletsInGPUv2Task(alpaka::createTaskKernel(createMiniDoubletsInGPUv2_workDiv, - createMiniDoubletsInGPUv2_kernel, - *modulesBuffers_.data(), - *hitsInGPU, - *mdsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, createMiniDoubletsInGPUv2Task); + alpaka::exec(queue, + createMiniDoubletsInGPUv2_workDiv, + createMiniDoubletsInGPUv2_kernel, + *modulesBuffers_.data(), + *hitsInGPU, + *mdsInGPU, + *rangesInGPU); Vec3D const threadsPerBlockAddMD{1, 1, 1024}; Vec3D const blocksPerGridAddMD{1, 1, 1}; @@ -443,15 +430,13 @@ void lst::Event::createMiniDoublets() { createWorkDiv(blocksPerGridAddMD, threadsPerBlockAddMD, elementsPerThread); lst::addMiniDoubletRangesToEventExplicit addMiniDoubletRangesToEventExplicit_kernel; - auto const addMiniDoubletRangesToEventExplicitTask( - alpaka::createTaskKernel(addMiniDoubletRangesToEventExplicit_workDiv, - addMiniDoubletRangesToEventExplicit_kernel, - *modulesBuffers_.data(), - *mdsInGPU, - *rangesInGPU, - *hitsInGPU)); - - alpaka::enqueue(queue, addMiniDoubletRangesToEventExplicitTask); + alpaka::exec(queue, + addMiniDoubletRangesToEventExplicit_workDiv, + addMiniDoubletRangesToEventExplicit_kernel, + *modulesBuffers_.data(), + *mdsInGPU, + *rangesInGPU, + *hitsInGPU); if (addObjects) { addMiniDoubletsToEventExplicit(); @@ -472,14 +457,13 @@ void lst::Event::createSegmentsWithModuleMap() { createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); lst::createSegmentsInGPUv2 createSegmentsInGPUv2_kernel; - auto const createSegmentsInGPUv2Task(alpaka::createTaskKernel(createSegmentsInGPUv2_workDiv, - createSegmentsInGPUv2_kernel, - *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, createSegmentsInGPUv2Task); + alpaka::exec(queue, + createSegmentsInGPUv2_workDiv, + createSegmentsInGPUv2_kernel, + *modulesBuffers_.data(), + *mdsInGPU, + *segmentsInGPU, + *rangesInGPU); Vec3D const threadsPerBlockAddSeg{1, 1, 1024}; Vec3D const blocksPerGridAddSeg{1, 1, 1}; @@ -487,14 +471,12 @@ void lst::Event::createSegmentsWithModuleMap() { createWorkDiv(blocksPerGridAddSeg, threadsPerBlockAddSeg, elementsPerThread); lst::addSegmentRangesToEventExplicit addSegmentRangesToEventExplicit_kernel; - auto const addSegmentRangesToEventExplicitTask( - alpaka::createTaskKernel(addSegmentRangesToEventExplicit_workDiv, - addSegmentRangesToEventExplicit_kernel, - *modulesBuffers_.data(), - *segmentsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addSegmentRangesToEventExplicitTask); + alpaka::exec(queue, + addSegmentRangesToEventExplicit_workDiv, + addSegmentRangesToEventExplicit_kernel, + *modulesBuffers_.data(), + *segmentsInGPU, + *rangesInGPU); if (addObjects) { addSegmentsToEventExplicit(); @@ -509,13 +491,12 @@ void lst::Event::createTriplets() { createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); lst::createTripletArrayRanges createTripletArrayRanges_kernel; - auto const createTripletArrayRangesTask(alpaka::createTaskKernel(createTripletArrayRanges_workDiv, - createTripletArrayRanges_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *segmentsInGPU)); - - alpaka::enqueue(queue, createTripletArrayRangesTask); + alpaka::exec(queue, + createTripletArrayRanges_workDiv, + createTripletArrayRanges_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *segmentsInGPU); // TODO: Why are we pulling this back down only to put it back on the device in a new struct? auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); @@ -571,17 +552,16 @@ void lst::Event::createTriplets() { createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); lst::createTripletsInGPUv2 createTripletsInGPUv2_kernel; - auto const createTripletsInGPUv2Task(alpaka::createTaskKernel(createTripletsInGPUv2_workDiv, - createTripletsInGPUv2_kernel, - *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *rangesInGPU, - alpaka::getPtrNative(index_gpu_buf), - nonZeroModules)); - - alpaka::enqueue(queue, createTripletsInGPUv2Task); + alpaka::exec(queue, + createTripletsInGPUv2_workDiv, + createTripletsInGPUv2_kernel, + *modulesBuffers_.data(), + *mdsInGPU, + *segmentsInGPU, + *tripletsInGPU, + *rangesInGPU, + alpaka::getPtrNative(index_gpu_buf), + nonZeroModules); Vec3D const threadsPerBlockAddTrip{1, 1, 1024}; Vec3D const blocksPerGridAddTrip{1, 1, 1}; @@ -589,14 +569,12 @@ void lst::Event::createTriplets() { createWorkDiv(blocksPerGridAddTrip, threadsPerBlockAddTrip, elementsPerThread); lst::addTripletRangesToEventExplicit addTripletRangesToEventExplicit_kernel; - auto const addTripletRangesToEventExplicitTask( - alpaka::createTaskKernel(addTripletRangesToEventExplicit_workDiv, - addTripletRangesToEventExplicit_kernel, - *modulesBuffers_.data(), - *tripletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addTripletRangesToEventExplicitTask); + alpaka::exec(queue, + addTripletRangesToEventExplicit_workDiv, + addTripletRangesToEventExplicit_kernel, + *modulesBuffers_.data(), + *tripletsInGPU, + *rangesInGPU); if (addObjects) { addTripletsToEventExplicit(); @@ -617,15 +595,14 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread); lst::crossCleanpT3 crossCleanpT3_kernel; - auto const crossCleanpT3Task(alpaka::createTaskKernel(crossCleanpT3_workDiv, - crossCleanpT3_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *pixelTripletsInGPU, - *segmentsInGPU, - *pixelQuintupletsInGPU)); - - alpaka::enqueue(queue, crossCleanpT3Task); + alpaka::exec(queue, + crossCleanpT3_workDiv, + crossCleanpT3_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *pixelTripletsInGPU, + *segmentsInGPU, + *pixelQuintupletsInGPU); Vec3D const threadsPerBlock_addpT3asTrackCandidatesInGPU{1, 1, 512}; Vec3D const blocksPerGrid_addpT3asTrackCandidatesInGPU{1, 1, 1}; @@ -633,15 +610,14 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ blocksPerGrid_addpT3asTrackCandidatesInGPU, threadsPerBlock_addpT3asTrackCandidatesInGPU, elementsPerThread); lst::addpT3asTrackCandidatesInGPU addpT3asTrackCandidatesInGPU_kernel; - auto const addpT3asTrackCandidatesInGPUTask(alpaka::createTaskKernel(addpT3asTrackCandidatesInGPU_workDiv, - addpT3asTrackCandidatesInGPU_kernel, - nLowerModules_, - *pixelTripletsInGPU, - *trackCandidatesInGPU, - *segmentsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addpT3asTrackCandidatesInGPUTask); + alpaka::exec(queue, + addpT3asTrackCandidatesInGPU_workDiv, + addpT3asTrackCandidatesInGPU_kernel, + nLowerModules_, + *pixelTripletsInGPU, + *trackCandidatesInGPU, + *segmentsInGPU, + *rangesInGPU); // Pull nEligibleT5Modules from the device. auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -655,13 +631,11 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); lst::removeDupQuintupletsInGPUBeforeTC removeDupQuintupletsInGPUBeforeTC_kernel; - auto const removeDupQuintupletsInGPUBeforeTCTask( - alpaka::createTaskKernel(removeDupQuintupletsInGPUBeforeTC_workDiv, - removeDupQuintupletsInGPUBeforeTC_kernel, - *quintupletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, removeDupQuintupletsInGPUBeforeTCTask); + alpaka::exec(queue, + removeDupQuintupletsInGPUBeforeTC_workDiv, + removeDupQuintupletsInGPUBeforeTC_kernel, + *quintupletsInGPU, + *rangesInGPU); Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks}; @@ -669,15 +643,14 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread); lst::crossCleanT5 crossCleanT5_kernel; - auto const crossCleanT5Task(alpaka::createTaskKernel(crossCleanT5_workDiv, - crossCleanT5_kernel, - *modulesBuffers_.data(), - *quintupletsInGPU, - *pixelQuintupletsInGPU, - *pixelTripletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, crossCleanT5Task); + alpaka::exec(queue, + crossCleanT5_workDiv, + crossCleanT5_kernel, + *modulesBuffers_.data(), + *quintupletsInGPU, + *pixelQuintupletsInGPU, + *pixelTripletsInGPU, + *rangesInGPU); Vec3D const threadsPerBlock_addT5asTrackCandidateInGPU{1, 8, 128}; Vec3D const blocksPerGrid_addT5asTrackCandidateInGPU{1, 8, 10}; @@ -685,14 +658,13 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, elementsPerThread); lst::addT5asTrackCandidateInGPU addT5asTrackCandidateInGPU_kernel; - auto const addT5asTrackCandidateInGPUTask(alpaka::createTaskKernel(addT5asTrackCandidateInGPU_workDiv, - addT5asTrackCandidateInGPU_kernel, - nLowerModules_, - *quintupletsInGPU, - *trackCandidatesInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addT5asTrackCandidateInGPUTask); + alpaka::exec(queue, + addT5asTrackCandidateInGPU_workDiv, + addT5asTrackCandidateInGPU_kernel, + nLowerModules_, + *quintupletsInGPU, + *trackCandidatesInGPU, + *rangesInGPU); if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; @@ -701,10 +673,8 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); lst::checkHitspLS checkHitspLS_kernel; - auto const checkHitspLSTask(alpaka::createTaskKernel( - checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, true)); - - alpaka::enqueue(queue, checkHitspLSTask); + alpaka::exec( + queue, checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, true); } Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; @@ -713,18 +683,17 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread); lst::crossCleanpLS crossCleanpLS_kernel; - auto const crossCleanpLSTask(alpaka::createTaskKernel(crossCleanpLS_workDiv, - crossCleanpLS_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *pixelTripletsInGPU, - *trackCandidatesInGPU, - *segmentsInGPU, - *mdsInGPU, - *hitsInGPU, - *quintupletsInGPU)); - - alpaka::enqueue(queue, crossCleanpLSTask); + alpaka::exec(queue, + crossCleanpLS_workDiv, + crossCleanpLS_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *pixelTripletsInGPU, + *trackCandidatesInGPU, + *segmentsInGPU, + *mdsInGPU, + *hitsInGPU, + *quintupletsInGPU); Vec3D const threadsPerBlock_addpLSasTrackCandidateInGPU{1, 1, 384}; Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, max_blocks}; @@ -732,14 +701,13 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, elementsPerThread); lst::addpLSasTrackCandidateInGPU addpLSasTrackCandidateInGPU_kernel; - auto const addpLSasTrackCandidateInGPUTask(alpaka::createTaskKernel(addpLSasTrackCandidateInGPU_workDiv, - addpLSasTrackCandidateInGPU_kernel, - nLowerModules_, - *trackCandidatesInGPU, - *segmentsInGPU, - tc_pls_triplets)); - - alpaka::enqueue(queue, addpLSasTrackCandidateInGPUTask); + alpaka::exec(queue, + addpLSasTrackCandidateInGPU_workDiv, + addpLSasTrackCandidateInGPU_kernel, + nLowerModules_, + *trackCandidatesInGPU, + *segmentsInGPU, + tc_pls_triplets); // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached auto nTrackCanpT5Host_buf = allocBufWrapper(devHost, 1, queue); @@ -844,20 +812,18 @@ void lst::Event::createPixelTriplets() { createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); lst::createPixelTripletsInGPUFromMapv2 createPixelTripletsInGPUFromMapv2_kernel; - auto const createPixelTripletsInGPUFromMapv2Task( - alpaka::createTaskKernel(createPixelTripletsInGPUFromMapv2_workDiv, - createPixelTripletsInGPUFromMapv2_kernel, - *modulesBuffers_.data(), - *rangesInGPU, - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *pixelTripletsInGPU, - alpaka::getPtrNative(connectedPixelSize_dev_buf), - alpaka::getPtrNative(connectedPixelIndex_dev_buf), - nInnerSegments)); - - alpaka::enqueue(queue, createPixelTripletsInGPUFromMapv2Task); + alpaka::exec(queue, + createPixelTripletsInGPUFromMapv2_workDiv, + createPixelTripletsInGPUFromMapv2_kernel, + *modulesBuffers_.data(), + *rangesInGPU, + *mdsInGPU, + *segmentsInGPU, + *tripletsInGPU, + *pixelTripletsInGPU, + alpaka::getPtrNative(connectedPixelSize_dev_buf), + alpaka::getPtrNative(connectedPixelIndex_dev_buf), + nInnerSegments); #ifdef WARNINGS auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); @@ -876,10 +842,10 @@ void lst::Event::createPixelTriplets() { createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); lst::removeDupPixelTripletsInGPUFromMap removeDupPixelTripletsInGPUFromMap_kernel; - auto const removeDupPixelTripletsInGPUFromMapTask(alpaka::createTaskKernel( - removeDupPixelTripletsInGPUFromMap_workDiv, removeDupPixelTripletsInGPUFromMap_kernel, *pixelTripletsInGPU)); - - alpaka::enqueue(queue, removeDupPixelTripletsInGPUFromMapTask); + alpaka::exec(queue, + removeDupPixelTripletsInGPUFromMap_workDiv, + removeDupPixelTripletsInGPUFromMap_kernel, + *pixelTripletsInGPU); } void lst::Event::createQuintuplets() { @@ -889,14 +855,12 @@ void lst::Event::createQuintuplets() { createWorkDiv(blocksPerGridCreateQuints, threadsPerBlockCreateQuints, elementsPerThread); lst::createEligibleModulesListForQuintupletsGPU createEligibleModulesListForQuintupletsGPU_kernel; - auto const createEligibleModulesListForQuintupletsGPUTask( - alpaka::createTaskKernel(createEligibleModulesListForQuintupletsGPU_workDiv, - createEligibleModulesListForQuintupletsGPU_kernel, - *modulesBuffers_.data(), - *tripletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, createEligibleModulesListForQuintupletsGPUTask); + alpaka::exec(queue, + createEligibleModulesListForQuintupletsGPU_workDiv, + createEligibleModulesListForQuintupletsGPU_kernel, + *modulesBuffers_.data(), + *tripletsInGPU, + *rangesInGPU); auto nEligibleT5Modules_buf = allocBufWrapper(devHost, 1, queue); auto nTotalQuintuplets_buf = allocBufWrapper(devHost, 1, queue); @@ -922,17 +886,16 @@ void lst::Event::createQuintuplets() { createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); lst::createQuintupletsInGPUv2 createQuintupletsInGPUv2_kernel; - auto const createQuintupletsInGPUv2Task(alpaka::createTaskKernel(createQuintupletsInGPUv2_workDiv, - createQuintupletsInGPUv2_kernel, - *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *quintupletsInGPU, - *rangesInGPU, - nEligibleT5Modules)); - - alpaka::enqueue(queue, createQuintupletsInGPUv2Task); + alpaka::exec(queue, + createQuintupletsInGPUv2_workDiv, + createQuintupletsInGPUv2_kernel, + *modulesBuffers_.data(), + *mdsInGPU, + *segmentsInGPU, + *tripletsInGPU, + *quintupletsInGPU, + *rangesInGPU, + nEligibleT5Modules); Vec3D const threadsPerBlockDupQuint{1, 16, 16}; Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1}; @@ -940,14 +903,12 @@ void lst::Event::createQuintuplets() { createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); lst::removeDupQuintupletsInGPUAfterBuild removeDupQuintupletsInGPUAfterBuild_kernel; - auto const removeDupQuintupletsInGPUAfterBuildTask( - alpaka::createTaskKernel(removeDupQuintupletsInGPUAfterBuild_workDiv, - removeDupQuintupletsInGPUAfterBuild_kernel, - *modulesBuffers_.data(), - *quintupletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, removeDupQuintupletsInGPUAfterBuildTask); + alpaka::exec(queue, + removeDupQuintupletsInGPUAfterBuild_workDiv, + removeDupQuintupletsInGPUAfterBuild_kernel, + *modulesBuffers_.data(), + *quintupletsInGPU, + *rangesInGPU); Vec3D const threadsPerBlockAddQuint{1, 1, 1024}; Vec3D const blocksPerGridAddQuint{1, 1, 1}; @@ -955,14 +916,12 @@ void lst::Event::createQuintuplets() { createWorkDiv(blocksPerGridAddQuint, threadsPerBlockAddQuint, elementsPerThread); lst::addQuintupletRangesToEventExplicit addQuintupletRangesToEventExplicit_kernel; - auto const addQuintupletRangesToEventExplicitTask( - alpaka::createTaskKernel(addQuintupletRangesToEventExplicit_workDiv, - addQuintupletRangesToEventExplicit_kernel, - *modulesBuffers_.data(), - *quintupletsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addQuintupletRangesToEventExplicitTask); + alpaka::exec(queue, + addQuintupletRangesToEventExplicit_workDiv, + addQuintupletRangesToEventExplicit_kernel, + *modulesBuffers_.data(), + *quintupletsInGPU, + *rangesInGPU); if (addObjects) { addQuintupletsToEventExplicit(); @@ -977,10 +936,8 @@ void lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); lst::checkHitspLS checkHitspLS_kernel; - auto const checkHitspLSTask(alpaka::createTaskKernel( - checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, false)); - - alpaka::enqueue(queue, checkHitspLSTask); + alpaka::exec( + queue, checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, false); } } @@ -1062,21 +1019,19 @@ void lst::Event::createPixelQuintuplets() { createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); lst::createPixelQuintupletsInGPUFromMapv2 createPixelQuintupletsInGPUFromMapv2_kernel; - auto const createPixelQuintupletsInGPUFromMapv2Task( - alpaka::createTaskKernel(createPixelQuintupletsInGPUFromMapv2_workDiv, - createPixelQuintupletsInGPUFromMapv2_kernel, - *modulesBuffers_.data(), - *mdsInGPU, - *segmentsInGPU, - *tripletsInGPU, - *quintupletsInGPU, - *pixelQuintupletsInGPU, - alpaka::getPtrNative(connectedPixelSize_dev_buf), - alpaka::getPtrNative(connectedPixelIndex_dev_buf), - nInnerSegments, - *rangesInGPU)); - - alpaka::enqueue(queue, createPixelQuintupletsInGPUFromMapv2Task); + alpaka::exec(queue, + createPixelQuintupletsInGPUFromMapv2_workDiv, + createPixelQuintupletsInGPUFromMapv2_kernel, + *modulesBuffers_.data(), + *mdsInGPU, + *segmentsInGPU, + *tripletsInGPU, + *quintupletsInGPU, + *pixelQuintupletsInGPU, + alpaka::getPtrNative(connectedPixelSize_dev_buf), + alpaka::getPtrNative(connectedPixelIndex_dev_buf), + nInnerSegments, + *rangesInGPU); Vec3D const threadsPerBlockDupPix{1, 16, 16}; Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; @@ -1084,12 +1039,10 @@ void lst::Event::createPixelQuintuplets() { createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); lst::removeDupPixelQuintupletsInGPUFromMap removeDupPixelQuintupletsInGPUFromMap_kernel; - auto const removeDupPixelQuintupletsInGPUFromMapTask( - alpaka::createTaskKernel(removeDupPixelQuintupletsInGPUFromMap_workDiv, - removeDupPixelQuintupletsInGPUFromMap_kernel, - *pixelQuintupletsInGPU)); - - alpaka::enqueue(queue, removeDupPixelQuintupletsInGPUFromMapTask); + alpaka::exec(queue, + removeDupPixelQuintupletsInGPUFromMap_workDiv, + removeDupPixelQuintupletsInGPUFromMap_kernel, + *pixelQuintupletsInGPU); Vec3D const threadsPerBlockAddpT5asTrackCan{1, 1, 256}; Vec3D const blocksPerGridAddpT5asTrackCan{1, 1, 1}; @@ -1097,15 +1050,14 @@ void lst::Event::createPixelQuintuplets() { createWorkDiv(blocksPerGridAddpT5asTrackCan, threadsPerBlockAddpT5asTrackCan, elementsPerThread); lst::addpT5asTrackCandidateInGPU addpT5asTrackCandidateInGPU_kernel; - auto const addpT5asTrackCandidateInGPUTask(alpaka::createTaskKernel(addpT5asTrackCandidateInGPU_workDiv, - addpT5asTrackCandidateInGPU_kernel, - nLowerModules_, - *pixelQuintupletsInGPU, - *trackCandidatesInGPU, - *segmentsInGPU, - *rangesInGPU)); - - alpaka::enqueue(queue, addpT5asTrackCandidateInGPUTask); + alpaka::exec(queue, + addpT5asTrackCandidateInGPU_workDiv, + addpT5asTrackCandidateInGPU_kernel, + nLowerModules_, + *pixelQuintupletsInGPU, + *trackCandidatesInGPU, + *segmentsInGPU, + *rangesInGPU); #ifdef WARNINGS auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); From a7d73c09a9c7a7ecbf25c45d56f70ebcadddb285 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 8 Aug 2024 06:37:00 -0700 Subject: [PATCH 08/20] replace alpaka::getPtrNative(buf) with buf.data() --- .../LSTCore/interface/EndcapGeometryBuffer.h | 4 +- RecoTracker/LSTCore/interface/Module.h | 52 +++++++------- RecoTracker/LSTCore/src/LSTESData.cc | 4 +- RecoTracker/LSTCore/src/ModuleMethods.h | 50 ++++++------- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 40 +++++------ RecoTracker/LSTCore/src/alpaka/Hit.h | 38 +++++----- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 72 +++++++++---------- RecoTracker/LSTCore/src/alpaka/ObjectRanges.h | 56 +++++++-------- .../LSTCore/src/alpaka/PixelQuintuplet.h | 36 +++++----- RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 44 ++++++------ RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 48 ++++++------- RecoTracker/LSTCore/src/alpaka/Segment.h | 68 +++++++++--------- .../LSTCore/src/alpaka/TrackCandidate.h | 34 ++++----- RecoTracker/LSTCore/src/alpaka/Triplet.h | 34 ++++----- 14 files changed, 290 insertions(+), 290 deletions(-) diff --git a/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h b/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h index 6a787a5ed95eb..2c6df9ab2773c 100644 --- a/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h +++ b/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h @@ -19,8 +19,8 @@ namespace lst { template void setData(TBuff const& buf) { - geoMapDetId = alpaka::getPtrNative(buf.geoMapDetId_buf); - geoMapPhi = alpaka::getPtrNative(buf.geoMapPhi_buf); + geoMapDetId = buf.geoMapDetId_buf.data(); + geoMapPhi = buf.geoMapPhi_buf.data(); } }; diff --git a/RecoTracker/LSTCore/interface/Module.h b/RecoTracker/LSTCore/interface/Module.h index eca086b91850f..7266ebd7bc49b 100644 --- a/RecoTracker/LSTCore/interface/Module.h +++ b/RecoTracker/LSTCore/interface/Module.h @@ -84,32 +84,32 @@ namespace lst { template void setData(TBuff const& buf) { - detIds = alpaka::getPtrNative(buf.detIds_buf); - moduleMap = alpaka::getPtrNative(buf.moduleMap_buf); - mapdetId = alpaka::getPtrNative(buf.mapdetId_buf); - mapIdx = alpaka::getPtrNative(buf.mapIdx_buf); - nConnectedModules = alpaka::getPtrNative(buf.nConnectedModules_buf); - drdzs = alpaka::getPtrNative(buf.drdzs_buf); - dxdys = alpaka::getPtrNative(buf.dxdys_buf); - nModules = alpaka::getPtrNative(buf.nModules_buf); - nLowerModules = alpaka::getPtrNative(buf.nLowerModules_buf); - partnerModuleIndices = alpaka::getPtrNative(buf.partnerModuleIndices_buf); - - layers = alpaka::getPtrNative(buf.layers_buf); - rings = alpaka::getPtrNative(buf.rings_buf); - modules = alpaka::getPtrNative(buf.modules_buf); - rods = alpaka::getPtrNative(buf.rods_buf); - subdets = alpaka::getPtrNative(buf.subdets_buf); - sides = alpaka::getPtrNative(buf.sides_buf); - eta = alpaka::getPtrNative(buf.eta_buf); - r = alpaka::getPtrNative(buf.r_buf); - isInverted = alpaka::getPtrNative(buf.isInverted_buf); - isLower = alpaka::getPtrNative(buf.isLower_buf); - isAnchor = alpaka::getPtrNative(buf.isAnchor_buf); - moduleType = alpaka::getPtrNative(buf.moduleType_buf); - moduleLayerType = alpaka::getPtrNative(buf.moduleLayerType_buf); - lstLayers = alpaka::getPtrNative(buf.lstLayers_buf); - connectedPixels = alpaka::getPtrNative(buf.connectedPixels_buf); + detIds = buf.detIds_buf.data(); + moduleMap = buf.moduleMap_buf.data(); + mapdetId = buf.mapdetId_buf.data(); + mapIdx = buf.mapIdx_buf.data(); + nConnectedModules = buf.nConnectedModules_buf.data(); + drdzs = buf.drdzs_buf.data(); + dxdys = buf.dxdys_buf.data(); + nModules = buf.nModules_buf.data(); + nLowerModules = buf.nLowerModules_buf.data(); + partnerModuleIndices = buf.partnerModuleIndices_buf.data(); + + layers = buf.layers_buf.data(); + rings = buf.rings_buf.data(); + modules = buf.modules_buf.data(); + rods = buf.rods_buf.data(); + subdets = buf.subdets_buf.data(); + sides = buf.sides_buf.data(); + eta = buf.eta_buf.data(); + r = buf.r_buf.data(); + isInverted = buf.isInverted_buf.data(); + isLower = buf.isLower_buf.data(); + isAnchor = buf.isAnchor_buf.data(); + moduleType = buf.moduleType_buf.data(); + moduleLayerType = buf.moduleLayerType_buf.data(); + lstLayers = buf.lstLayers_buf.data(); + connectedPixels = buf.connectedPixels_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/LSTESData.cc b/RecoTracker/LSTCore/src/LSTESData.cc index 9079d0d229216..1acf085a0f491 100644 --- a/RecoTracker/LSTCore/src/LSTESData.cc +++ b/RecoTracker/LSTCore/src/LSTESData.cc @@ -89,10 +89,10 @@ std::unique_ptr> lst::loadAndFillESHost() auto endcapGeometryBuffers = EndcapGeometryBuffer(cms::alpakatools::host(), endcapGeometry.nEndCapMap); - std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapDetId_buf), + std::memcpy(endcapGeometryBuffers.geoMapDetId_buf.data(), endcapGeometry.geoMapDetId_buf.data(), endcapGeometry.nEndCapMap * sizeof(unsigned int)); - std::memcpy(alpaka::getPtrNative(endcapGeometryBuffers.geoMapPhi_buf), + std::memcpy(endcapGeometryBuffers.geoMapPhi_buf.data(), endcapGeometry.geoMapPhi_buf.data(), endcapGeometry.nEndCapMap * sizeof(float)); diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h index 54514cccf2b54..196212defdfa6 100644 --- a/RecoTracker/LSTCore/src/ModuleMethods.h +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -83,7 +83,7 @@ namespace lst { modulesBuf.connectedPixels_buf = allocBufWrapper(cms::alpakatools::host(), nPixels); modulesBuf.data_.setData(modulesBuf); - unsigned int* connectedPixels = alpaka::getPtrNative(modulesBuf.connectedPixels_buf); + unsigned int* connectedPixels = modulesBuf.connectedPixels_buf.data(); for (unsigned int icondet = 0; icondet < totalSizes; icondet++) { connectedPixels[icondet] = mmd.detIdToIndex.at(connectedModuleDetIds[icondet]); @@ -99,8 +99,8 @@ namespace lst { inline void fillConnectedModuleArrayExplicit(ModulesBuffer& modulesBuf, ModuleMetaData const& mmd, ModuleConnectionMap const& moduleConnectionMap) { - uint16_t* moduleMap = alpaka::getPtrNative(modulesBuf.moduleMap_buf); - uint16_t* nConnectedModules = alpaka::getPtrNative(modulesBuf.nConnectedModules_buf); + uint16_t* moduleMap = modulesBuf.moduleMap_buf.data(); + uint16_t* nConnectedModules = modulesBuf.nConnectedModules_buf.data(); for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { unsigned int detId = it->first; @@ -114,8 +114,8 @@ namespace lst { } inline void fillMapArraysExplicit(ModulesBuffer& modulesBuf, ModuleMetaData const& mmd) { - uint16_t* mapIdx = alpaka::getPtrNative(modulesBuf.mapIdx_buf); - unsigned int* mapdetId = alpaka::getPtrNative(modulesBuf.mapdetId_buf); + uint16_t* mapIdx = modulesBuf.mapIdx_buf.data(); + unsigned int* mapdetId = modulesBuf.mapdetId_buf.data(); unsigned int counter = 0; for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { @@ -205,26 +205,26 @@ namespace lst { ModulesBuffer modulesBuf(cms::alpakatools::host(), nModules, 0); // Getting the underlying data pointers - unsigned int* host_detIds = alpaka::getPtrNative(modulesBuf.detIds_buf); - short* host_layers = alpaka::getPtrNative(modulesBuf.layers_buf); - short* host_rings = alpaka::getPtrNative(modulesBuf.rings_buf); - short* host_rods = alpaka::getPtrNative(modulesBuf.rods_buf); - short* host_modules = alpaka::getPtrNative(modulesBuf.modules_buf); - short* host_subdets = alpaka::getPtrNative(modulesBuf.subdets_buf); - short* host_sides = alpaka::getPtrNative(modulesBuf.sides_buf); - float* host_eta = alpaka::getPtrNative(modulesBuf.eta_buf); - float* host_r = alpaka::getPtrNative(modulesBuf.r_buf); - bool* host_isInverted = alpaka::getPtrNative(modulesBuf.isInverted_buf); - bool* host_isLower = alpaka::getPtrNative(modulesBuf.isLower_buf); - bool* host_isAnchor = alpaka::getPtrNative(modulesBuf.isAnchor_buf); - ModuleType* host_moduleType = alpaka::getPtrNative(modulesBuf.moduleType_buf); - ModuleLayerType* host_moduleLayerType = alpaka::getPtrNative(modulesBuf.moduleLayerType_buf); - float* host_dxdys = alpaka::getPtrNative(modulesBuf.dxdys_buf); - float* host_drdzs = alpaka::getPtrNative(modulesBuf.drdzs_buf); - uint16_t* host_nModules = alpaka::getPtrNative(modulesBuf.nModules_buf); - uint16_t* host_nLowerModules = alpaka::getPtrNative(modulesBuf.nLowerModules_buf); - uint16_t* host_partnerModuleIndices = alpaka::getPtrNative(modulesBuf.partnerModuleIndices_buf); - int* host_lstLayers = alpaka::getPtrNative(modulesBuf.lstLayers_buf); + unsigned int* host_detIds = modulesBuf.detIds_buf.data(); + short* host_layers = modulesBuf.layers_buf.data(); + short* host_rings = modulesBuf.rings_buf.data(); + short* host_rods = modulesBuf.rods_buf.data(); + short* host_modules = modulesBuf.modules_buf.data(); + short* host_subdets = modulesBuf.subdets_buf.data(); + short* host_sides = modulesBuf.sides_buf.data(); + float* host_eta = modulesBuf.eta_buf.data(); + float* host_r = modulesBuf.r_buf.data(); + bool* host_isInverted = modulesBuf.isInverted_buf.data(); + bool* host_isLower = modulesBuf.isLower_buf.data(); + bool* host_isAnchor = modulesBuf.isAnchor_buf.data(); + ModuleType* host_moduleType = modulesBuf.moduleType_buf.data(); + ModuleLayerType* host_moduleLayerType = modulesBuf.moduleLayerType_buf.data(); + float* host_dxdys = modulesBuf.dxdys_buf.data(); + float* host_drdzs = modulesBuf.drdzs_buf.data(); + uint16_t* host_nModules = modulesBuf.nModules_buf.data(); + uint16_t* host_nLowerModules = modulesBuf.nLowerModules_buf.data(); + uint16_t* host_partnerModuleIndices = modulesBuf.partnerModuleIndices_buf.data(); + int* host_lstLayers = modulesBuf.lstLayers_buf.data(); //reassign detIdToIndex indices here nLowerModules = (nModules - 1) / 2; diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 82a7f44a268b9..9e46c96a4488c 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -197,8 +197,8 @@ void lst::Event::addHitToEvent(std::vector const& x, TwoS, nModules_, nEndCapMap_, - alpaka::getPtrNative(endcapGeometryBuffers_.geoMapDetId_buf), - alpaka::getPtrNative(endcapGeometryBuffers_.geoMapPhi_buf), + endcapGeometryBuffers_.geoMapDetId_buf.data(), + endcapGeometryBuffers_.geoMapPhi_buf.data(), *modulesBuffers_.data(), *hitsInGPU, nHits); @@ -368,11 +368,11 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& *hitsInGPU, *mdsInGPU, *segmentsInGPU, - alpaka::getPtrNative(hitIndices0_dev), - alpaka::getPtrNative(hitIndices1_dev), - alpaka::getPtrNative(hitIndices2_dev), - alpaka::getPtrNative(hitIndices3_dev), - alpaka::getPtrNative(dPhiChange_dev), + hitIndices0_dev.data(), + hitIndices1_dev.data(), + hitIndices2_dev.data(), + hitIndices3_dev.data(), + dPhiChange_dev.data(), pixelModuleIndex, size); } @@ -560,7 +560,7 @@ void lst::Event::createTriplets() { *segmentsInGPU, *tripletsInGPU, *rangesInGPU, - alpaka::getPtrNative(index_gpu_buf), + index_gpu_buf.data(), nonZeroModules); Vec3D const threadsPerBlockAddTrip{1, 1, 1024}; @@ -720,10 +720,10 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ alpaka::memcpy(queue, nTrackCanT5Host_buf, trackCandidatesBuffers->nTrackCandidatesT5_buf); alpaka::wait(queue); // wait to get the values before using them - auto nTrackCandidatespT5 = *alpaka::getPtrNative(nTrackCanpT5Host_buf); - auto nTrackCandidatespT3 = *alpaka::getPtrNative(nTrackCanpT3Host_buf); - auto nTrackCandidatespLS = *alpaka::getPtrNative(nTrackCanpLSHost_buf); - auto nTrackCandidatesT5 = *alpaka::getPtrNative(nTrackCanT5Host_buf); + auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data(); + auto nTrackCandidatespT3 = *nTrackCanpT3Host_buf.data(); + auto nTrackCandidatespLS = *nTrackCanpLSHost_buf.data(); + auto nTrackCandidatesT5 = *nTrackCanT5Host_buf.data(); if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { printf( @@ -764,8 +764,8 @@ void lst::Event::createPixelTriplets() { auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - unsigned int* connectedPixelSize_host = alpaka::getPtrNative(connectedPixelSize_host_buf); - unsigned int* connectedPixelIndex_host = alpaka::getPtrNative(connectedPixelIndex_host_buf); + unsigned int* connectedPixelSize_host = connectedPixelSize_host_buf.data(); + unsigned int* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; @@ -821,8 +821,8 @@ void lst::Event::createPixelTriplets() { *segmentsInGPU, *tripletsInGPU, *pixelTripletsInGPU, - alpaka::getPtrNative(connectedPixelSize_dev_buf), - alpaka::getPtrNative(connectedPixelIndex_dev_buf), + connectedPixelSize_dev_buf.data(), + connectedPixelIndex_dev_buf.data(), nInnerSegments); #ifdef WARNINGS @@ -831,7 +831,7 @@ void lst::Event::createPixelTriplets() { alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); alpaka::wait(queue); // wait to get the value before using it - std::cout << "number of pixel triplets = " << *alpaka::getPtrNative(nPixelTriplets_buf) << std::endl; + std::cout << "number of pixel triplets = " << *nPixelTriplets_buf.data() << std::endl; #endif //pT3s can be cleaned here because they're not used in making pT5s! @@ -1028,8 +1028,8 @@ void lst::Event::createPixelQuintuplets() { *tripletsInGPU, *quintupletsInGPU, *pixelQuintupletsInGPU, - alpaka::getPtrNative(connectedPixelSize_dev_buf), - alpaka::getPtrNative(connectedPixelIndex_dev_buf), + connectedPixelSize_dev_buf.data(), + connectedPixelIndex_dev_buf.data(), nInnerSegments, *rangesInGPU); @@ -1065,7 +1065,7 @@ void lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); alpaka::wait(queue); // wait to get the value before using it - std::cout << "number of pixel quintuplets = " << *alpaka::getPtrNative(nPixelQuintuplets_buf) << std::endl; + std::cout << "number of pixel quintuplets = " << *nPixelQuintuplets_buf.data() << std::endl; #endif } diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h index 7f3412ce4694a..253b0860c7068 100644 --- a/RecoTracker/LSTCore/src/alpaka/Hit.h +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -28,25 +28,25 @@ namespace lst { template void setData(TBuff& buf) { - nHits = alpaka::getPtrNative(buf.nHits_buf); - xs = alpaka::getPtrNative(buf.xs_buf); - ys = alpaka::getPtrNative(buf.ys_buf); - zs = alpaka::getPtrNative(buf.zs_buf); - moduleIndices = alpaka::getPtrNative(buf.moduleIndices_buf); - idxs = alpaka::getPtrNative(buf.idxs_buf); - detid = alpaka::getPtrNative(buf.detid_buf); - rts = alpaka::getPtrNative(buf.rts_buf); - phis = alpaka::getPtrNative(buf.phis_buf); - etas = alpaka::getPtrNative(buf.etas_buf); - highEdgeXs = alpaka::getPtrNative(buf.highEdgeXs_buf); - highEdgeYs = alpaka::getPtrNative(buf.highEdgeYs_buf); - lowEdgeXs = alpaka::getPtrNative(buf.lowEdgeXs_buf); - lowEdgeYs = alpaka::getPtrNative(buf.lowEdgeYs_buf); - hitRanges = alpaka::getPtrNative(buf.hitRanges_buf); - hitRangesLower = alpaka::getPtrNative(buf.hitRangesLower_buf); - hitRangesUpper = alpaka::getPtrNative(buf.hitRangesUpper_buf); - hitRangesnLower = alpaka::getPtrNative(buf.hitRangesnLower_buf); - hitRangesnUpper = alpaka::getPtrNative(buf.hitRangesnUpper_buf); + nHits = buf.nHits_buf.data(); + xs = buf.xs_buf.data(); + ys = buf.ys_buf.data(); + zs = buf.zs_buf.data(); + moduleIndices = buf.moduleIndices_buf.data(); + idxs = buf.idxs_buf.data(); + detid = buf.detid_buf.data(); + rts = buf.rts_buf.data(); + phis = buf.phis_buf.data(); + etas = buf.etas_buf.data(); + highEdgeXs = buf.highEdgeXs_buf.data(); + highEdgeYs = buf.highEdgeYs_buf.data(); + lowEdgeXs = buf.lowEdgeXs_buf.data(); + lowEdgeYs = buf.lowEdgeYs_buf.data(); + hitRanges = buf.hitRanges_buf.data(); + hitRangesLower = buf.hitRangesLower_buf.data(); + hitRangesUpper = buf.hitRangesUpper_buf.data(); + hitRangesnLower = buf.hitRangesnLower_buf.data(); + hitRangesnUpper = buf.hitRangesnUpper_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index bda334b31afc1..b4cbd500c7bf8 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -56,42 +56,42 @@ namespace lst { template void setData(TBuf& buf) { - nMemoryLocations = alpaka::getPtrNative(buf.nMemoryLocations_buf); - anchorHitIndices = alpaka::getPtrNative(buf.anchorHitIndices_buf); - outerHitIndices = alpaka::getPtrNative(buf.outerHitIndices_buf); - moduleIndices = alpaka::getPtrNative(buf.moduleIndices_buf); - nMDs = alpaka::getPtrNative(buf.nMDs_buf); - totOccupancyMDs = alpaka::getPtrNative(buf.totOccupancyMDs_buf); - dphichanges = alpaka::getPtrNative(buf.dphichanges_buf); - dzs = alpaka::getPtrNative(buf.dzs_buf); - dphis = alpaka::getPtrNative(buf.dphis_buf); - shiftedXs = alpaka::getPtrNative(buf.shiftedXs_buf); - shiftedYs = alpaka::getPtrNative(buf.shiftedYs_buf); - shiftedZs = alpaka::getPtrNative(buf.shiftedZs_buf); - noShiftedDphis = alpaka::getPtrNative(buf.noShiftedDphis_buf); - noShiftedDphiChanges = alpaka::getPtrNative(buf.noShiftedDphiChanges_buf); - anchorX = alpaka::getPtrNative(buf.anchorX_buf); - anchorY = alpaka::getPtrNative(buf.anchorY_buf); - anchorZ = alpaka::getPtrNative(buf.anchorZ_buf); - anchorRt = alpaka::getPtrNative(buf.anchorRt_buf); - anchorPhi = alpaka::getPtrNative(buf.anchorPhi_buf); - anchorEta = alpaka::getPtrNative(buf.anchorEta_buf); - anchorHighEdgeX = alpaka::getPtrNative(buf.anchorHighEdgeX_buf); - anchorHighEdgeY = alpaka::getPtrNative(buf.anchorHighEdgeY_buf); - anchorLowEdgeX = alpaka::getPtrNative(buf.anchorLowEdgeX_buf); - anchorLowEdgeY = alpaka::getPtrNative(buf.anchorLowEdgeY_buf); - outerX = alpaka::getPtrNative(buf.outerX_buf); - outerY = alpaka::getPtrNative(buf.outerY_buf); - outerZ = alpaka::getPtrNative(buf.outerZ_buf); - outerRt = alpaka::getPtrNative(buf.outerRt_buf); - outerPhi = alpaka::getPtrNative(buf.outerPhi_buf); - outerEta = alpaka::getPtrNative(buf.outerEta_buf); - outerHighEdgeX = alpaka::getPtrNative(buf.outerHighEdgeX_buf); - outerHighEdgeY = alpaka::getPtrNative(buf.outerHighEdgeY_buf); - outerLowEdgeX = alpaka::getPtrNative(buf.outerLowEdgeX_buf); - outerLowEdgeY = alpaka::getPtrNative(buf.outerLowEdgeY_buf); - anchorLowEdgePhi = alpaka::getPtrNative(buf.anchorLowEdgePhi_buf); - anchorHighEdgePhi = alpaka::getPtrNative(buf.anchorHighEdgePhi_buf); + nMemoryLocations = buf.nMemoryLocations_buf.data(); + anchorHitIndices = buf.anchorHitIndices_buf.data(); + outerHitIndices = buf.outerHitIndices_buf.data(); + moduleIndices = buf.moduleIndices_buf.data(); + nMDs = buf.nMDs_buf.data(); + totOccupancyMDs = buf.totOccupancyMDs_buf.data(); + dphichanges = buf.dphichanges_buf.data(); + dzs = buf.dzs_buf.data(); + dphis = buf.dphis_buf.data(); + shiftedXs = buf.shiftedXs_buf.data(); + shiftedYs = buf.shiftedYs_buf.data(); + shiftedZs = buf.shiftedZs_buf.data(); + noShiftedDphis = buf.noShiftedDphis_buf.data(); + noShiftedDphiChanges = buf.noShiftedDphiChanges_buf.data(); + anchorX = buf.anchorX_buf.data(); + anchorY = buf.anchorY_buf.data(); + anchorZ = buf.anchorZ_buf.data(); + anchorRt = buf.anchorRt_buf.data(); + anchorPhi = buf.anchorPhi_buf.data(); + anchorEta = buf.anchorEta_buf.data(); + anchorHighEdgeX = buf.anchorHighEdgeX_buf.data(); + anchorHighEdgeY = buf.anchorHighEdgeY_buf.data(); + anchorLowEdgeX = buf.anchorLowEdgeX_buf.data(); + anchorLowEdgeY = buf.anchorLowEdgeY_buf.data(); + outerX = buf.outerX_buf.data(); + outerY = buf.outerY_buf.data(); + outerZ = buf.outerZ_buf.data(); + outerRt = buf.outerRt_buf.data(); + outerPhi = buf.outerPhi_buf.data(); + outerEta = buf.outerEta_buf.data(); + outerHighEdgeX = buf.outerHighEdgeX_buf.data(); + outerHighEdgeY = buf.outerHighEdgeY_buf.data(); + outerLowEdgeX = buf.outerLowEdgeX_buf.data(); + outerLowEdgeY = buf.outerLowEdgeY_buf.data(); + anchorLowEdgePhi = buf.anchorLowEdgePhi_buf.data(); + anchorHighEdgePhi = buf.anchorHighEdgePhi_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h index 09aac58bc8eb4..0e17185104c74 100644 --- a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h +++ b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h @@ -40,34 +40,34 @@ namespace lst { template void setData(TBuff& buf) { - hitRanges = alpaka::getPtrNative(buf.hitRanges_buf); - hitRangesLower = alpaka::getPtrNative(buf.hitRangesLower_buf); - hitRangesUpper = alpaka::getPtrNative(buf.hitRangesUpper_buf); - hitRangesnLower = alpaka::getPtrNative(buf.hitRangesnLower_buf); - hitRangesnUpper = alpaka::getPtrNative(buf.hitRangesnUpper_buf); - mdRanges = alpaka::getPtrNative(buf.mdRanges_buf); - segmentRanges = alpaka::getPtrNative(buf.segmentRanges_buf); - trackletRanges = alpaka::getPtrNative(buf.trackletRanges_buf); - tripletRanges = alpaka::getPtrNative(buf.tripletRanges_buf); - trackCandidateRanges = alpaka::getPtrNative(buf.trackCandidateRanges_buf); - quintupletRanges = alpaka::getPtrNative(buf.quintupletRanges_buf); - - nEligibleT5Modules = alpaka::getPtrNative(buf.nEligibleT5Modules_buf); - indicesOfEligibleT5Modules = alpaka::getPtrNative(buf.indicesOfEligibleT5Modules_buf); - - quintupletModuleIndices = alpaka::getPtrNative(buf.quintupletModuleIndices_buf); - quintupletModuleOccupancy = alpaka::getPtrNative(buf.quintupletModuleOccupancy_buf); - miniDoubletModuleIndices = alpaka::getPtrNative(buf.miniDoubletModuleIndices_buf); - miniDoubletModuleOccupancy = alpaka::getPtrNative(buf.miniDoubletModuleOccupancy_buf); - segmentModuleIndices = alpaka::getPtrNative(buf.segmentModuleIndices_buf); - segmentModuleOccupancy = alpaka::getPtrNative(buf.segmentModuleOccupancy_buf); - tripletModuleIndices = alpaka::getPtrNative(buf.tripletModuleIndices_buf); - tripletModuleOccupancy = alpaka::getPtrNative(buf.tripletModuleOccupancy_buf); - - device_nTotalMDs = alpaka::getPtrNative(buf.device_nTotalMDs_buf); - device_nTotalSegs = alpaka::getPtrNative(buf.device_nTotalSegs_buf); - device_nTotalTrips = alpaka::getPtrNative(buf.device_nTotalTrips_buf); - device_nTotalQuints = alpaka::getPtrNative(buf.device_nTotalQuints_buf); + hitRanges = buf.hitRanges_buf.data(); + hitRangesLower = buf.hitRangesLower_buf.data(); + hitRangesUpper = buf.hitRangesUpper_buf.data(); + hitRangesnLower = buf.hitRangesnLower_buf.data(); + hitRangesnUpper = buf.hitRangesnUpper_buf.data(); + mdRanges = buf.mdRanges_buf.data(); + segmentRanges = buf.segmentRanges_buf.data(); + trackletRanges = buf.trackletRanges_buf.data(); + tripletRanges = buf.tripletRanges_buf.data(); + trackCandidateRanges = buf.trackCandidateRanges_buf.data(); + quintupletRanges = buf.quintupletRanges_buf.data(); + + nEligibleT5Modules = buf.nEligibleT5Modules_buf.data(); + indicesOfEligibleT5Modules = buf.indicesOfEligibleT5Modules_buf.data(); + + quintupletModuleIndices = buf.quintupletModuleIndices_buf.data(); + quintupletModuleOccupancy = buf.quintupletModuleOccupancy_buf.data(); + miniDoubletModuleIndices = buf.miniDoubletModuleIndices_buf.data(); + miniDoubletModuleOccupancy = buf.miniDoubletModuleOccupancy_buf.data(); + segmentModuleIndices = buf.segmentModuleIndices_buf.data(); + segmentModuleOccupancy = buf.segmentModuleOccupancy_buf.data(); + tripletModuleIndices = buf.tripletModuleIndices_buf.data(); + tripletModuleOccupancy = buf.tripletModuleOccupancy_buf.data(); + + device_nTotalMDs = buf.device_nTotalMDs_buf.data(); + device_nTotalSegs = buf.device_nTotalSegs_buf.data(); + device_nTotalTrips = buf.device_nTotalTrips_buf.data(); + device_nTotalQuints = buf.device_nTotalQuints_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index 2c0b143a6d913..0a14f2cbbd112 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -34,24 +34,24 @@ namespace lst { template void setData(TBuff& buf) { - pixelIndices = alpaka::getPtrNative(buf.pixelIndices_buf); - T5Indices = alpaka::getPtrNative(buf.T5Indices_buf); - nPixelQuintuplets = alpaka::getPtrNative(buf.nPixelQuintuplets_buf); - totOccupancyPixelQuintuplets = alpaka::getPtrNative(buf.totOccupancyPixelQuintuplets_buf); - isDup = alpaka::getPtrNative(buf.isDup_buf); - score = alpaka::getPtrNative(buf.score_buf); - eta = alpaka::getPtrNative(buf.eta_buf); - phi = alpaka::getPtrNative(buf.phi_buf); - logicalLayers = alpaka::getPtrNative(buf.logicalLayers_buf); - hitIndices = alpaka::getPtrNative(buf.hitIndices_buf); - lowerModuleIndices = alpaka::getPtrNative(buf.lowerModuleIndices_buf); - pixelRadius = alpaka::getPtrNative(buf.pixelRadius_buf); - quintupletRadius = alpaka::getPtrNative(buf.quintupletRadius_buf); - centerX = alpaka::getPtrNative(buf.centerX_buf); - centerY = alpaka::getPtrNative(buf.centerY_buf); - rzChiSquared = alpaka::getPtrNative(buf.rzChiSquared_buf); - rPhiChiSquared = alpaka::getPtrNative(buf.rPhiChiSquared_buf); - rPhiChiSquaredInwards = alpaka::getPtrNative(buf.rPhiChiSquaredInwards_buf); + pixelIndices = buf.pixelIndices_buf.data(); + T5Indices = buf.T5Indices_buf.data(); + nPixelQuintuplets = buf.nPixelQuintuplets_buf.data(); + totOccupancyPixelQuintuplets = buf.totOccupancyPixelQuintuplets_buf.data(); + isDup = buf.isDup_buf.data(); + score = buf.score_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + pixelRadius = buf.pixelRadius_buf.data(); + quintupletRadius = buf.quintupletRadius_buf.data(); + centerX = buf.centerX_buf.data(); + centerY = buf.centerY_buf.data(); + rzChiSquared = buf.rzChiSquared_buf.data(); + rPhiChiSquared = buf.rPhiChiSquared_buf.data(); + rPhiChiSquaredInwards = buf.rPhiChiSquaredInwards_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 15e4456c21fc6..aa37b91ebb9da 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -42,28 +42,28 @@ namespace lst { template void setData(TBuff& buf) { - pixelSegmentIndices = alpaka::getPtrNative(buf.pixelSegmentIndices_buf); - tripletIndices = alpaka::getPtrNative(buf.tripletIndices_buf); - nPixelTriplets = alpaka::getPtrNative(buf.nPixelTriplets_buf); - totOccupancyPixelTriplets = alpaka::getPtrNative(buf.totOccupancyPixelTriplets_buf); - pixelRadius = alpaka::getPtrNative(buf.pixelRadius_buf); - tripletRadius = alpaka::getPtrNative(buf.tripletRadius_buf); - pt = alpaka::getPtrNative(buf.pt_buf); - eta = alpaka::getPtrNative(buf.eta_buf); - phi = alpaka::getPtrNative(buf.phi_buf); - eta_pix = alpaka::getPtrNative(buf.eta_pix_buf); - phi_pix = alpaka::getPtrNative(buf.phi_pix_buf); - score = alpaka::getPtrNative(buf.score_buf); - isDup = alpaka::getPtrNative(buf.isDup_buf); - partOfPT5 = alpaka::getPtrNative(buf.partOfPT5_buf); - logicalLayers = alpaka::getPtrNative(buf.logicalLayers_buf); - hitIndices = alpaka::getPtrNative(buf.hitIndices_buf); - lowerModuleIndices = alpaka::getPtrNative(buf.lowerModuleIndices_buf); - centerX = alpaka::getPtrNative(buf.centerX_buf); - centerY = alpaka::getPtrNative(buf.centerY_buf); - rPhiChiSquared = alpaka::getPtrNative(buf.rPhiChiSquared_buf); - rPhiChiSquaredInwards = alpaka::getPtrNative(buf.rPhiChiSquaredInwards_buf); - rzChiSquared = alpaka::getPtrNative(buf.rzChiSquared_buf); + pixelSegmentIndices = buf.pixelSegmentIndices_buf.data(); + tripletIndices = buf.tripletIndices_buf.data(); + nPixelTriplets = buf.nPixelTriplets_buf.data(); + totOccupancyPixelTriplets = buf.totOccupancyPixelTriplets_buf.data(); + pixelRadius = buf.pixelRadius_buf.data(); + tripletRadius = buf.tripletRadius_buf.data(); + pt = buf.pt_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + eta_pix = buf.eta_pix_buf.data(); + phi_pix = buf.phi_pix_buf.data(); + score = buf.score_buf.data(); + isDup = buf.isDup_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + centerX = buf.centerX_buf.data(); + centerY = buf.centerY_buf.data(); + rPhiChiSquared = buf.rPhiChiSquared_buf.data(); + rPhiChiSquaredInwards = buf.rPhiChiSquaredInwards_buf.data(); + rzChiSquared = buf.rzChiSquared_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 3b700dbb94793..49eb3b1902c9a 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -46,30 +46,30 @@ namespace lst { template void setData(TBuff& buf) { - tripletIndices = alpaka::getPtrNative(buf.tripletIndices_buf); - lowerModuleIndices = alpaka::getPtrNative(buf.lowerModuleIndices_buf); - nQuintuplets = alpaka::getPtrNative(buf.nQuintuplets_buf); - totOccupancyQuintuplets = alpaka::getPtrNative(buf.totOccupancyQuintuplets_buf); - nMemoryLocations = alpaka::getPtrNative(buf.nMemoryLocations_buf); - innerRadius = alpaka::getPtrNative(buf.innerRadius_buf); - bridgeRadius = alpaka::getPtrNative(buf.bridgeRadius_buf); - outerRadius = alpaka::getPtrNative(buf.outerRadius_buf); - pt = alpaka::getPtrNative(buf.pt_buf); - eta = alpaka::getPtrNative(buf.eta_buf); - phi = alpaka::getPtrNative(buf.phi_buf); - score_rphisum = alpaka::getPtrNative(buf.score_rphisum_buf); - layer = alpaka::getPtrNative(buf.layer_buf); - isDup = alpaka::getPtrNative(buf.isDup_buf); - TightCutFlag = alpaka::getPtrNative(buf.TightCutFlag_buf); - partOfPT5 = alpaka::getPtrNative(buf.partOfPT5_buf); - regressionRadius = alpaka::getPtrNative(buf.regressionRadius_buf); - regressionG = alpaka::getPtrNative(buf.regressionG_buf); - regressionF = alpaka::getPtrNative(buf.regressionF_buf); - logicalLayers = alpaka::getPtrNative(buf.logicalLayers_buf); - hitIndices = alpaka::getPtrNative(buf.hitIndices_buf); - rzChiSquared = alpaka::getPtrNative(buf.rzChiSquared_buf); - chiSquared = alpaka::getPtrNative(buf.chiSquared_buf); - nonAnchorChiSquared = alpaka::getPtrNative(buf.nonAnchorChiSquared_buf); + tripletIndices = buf.tripletIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + nQuintuplets = buf.nQuintuplets_buf.data(); + totOccupancyQuintuplets = buf.totOccupancyQuintuplets_buf.data(); + nMemoryLocations = buf.nMemoryLocations_buf.data(); + innerRadius = buf.innerRadius_buf.data(); + bridgeRadius = buf.bridgeRadius_buf.data(); + outerRadius = buf.outerRadius_buf.data(); + pt = buf.pt_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + score_rphisum = buf.score_rphisum_buf.data(); + layer = buf.layer_buf.data(); + isDup = buf.isDup_buf.data(); + TightCutFlag = buf.TightCutFlag_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + regressionRadius = buf.regressionRadius_buf.data(); + regressionG = buf.regressionG_buf.data(); + regressionF = buf.regressionF_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + rzChiSquared = buf.rzChiSquared_buf.data(); + chiSquared = buf.chiSquared_buf.data(); + nonAnchorChiSquared = buf.nonAnchorChiSquared_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 76436778802b1..cee59e316064a 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -50,40 +50,40 @@ namespace lst { template void setData(TBuff& buf) { - dPhis = alpaka::getPtrNative(buf.dPhis_buf); - dPhiMins = alpaka::getPtrNative(buf.dPhiMins_buf); - dPhiMaxs = alpaka::getPtrNative(buf.dPhiMaxs_buf); - dPhiChanges = alpaka::getPtrNative(buf.dPhiChanges_buf); - dPhiChangeMins = alpaka::getPtrNative(buf.dPhiChangeMins_buf); - dPhiChangeMaxs = alpaka::getPtrNative(buf.dPhiChangeMaxs_buf); - innerLowerModuleIndices = alpaka::getPtrNative(buf.innerLowerModuleIndices_buf); - outerLowerModuleIndices = alpaka::getPtrNative(buf.outerLowerModuleIndices_buf); - seedIdx = alpaka::getPtrNative(buf.seedIdx_buf); - mdIndices = alpaka::getPtrNative(buf.mdIndices_buf); - nMemoryLocations = alpaka::getPtrNative(buf.nMemoryLocations_buf); - innerMiniDoubletAnchorHitIndices = alpaka::getPtrNative(buf.innerMiniDoubletAnchorHitIndices_buf); - outerMiniDoubletAnchorHitIndices = alpaka::getPtrNative(buf.outerMiniDoubletAnchorHitIndices_buf); - charge = alpaka::getPtrNative(buf.charge_buf); - superbin = alpaka::getPtrNative(buf.superbin_buf); - nSegments = alpaka::getPtrNative(buf.nSegments_buf); - totOccupancySegments = alpaka::getPtrNative(buf.totOccupancySegments_buf); - pLSHitsIdxs = alpaka::getPtrNative(buf.pLSHitsIdxs_buf); - pixelType = alpaka::getPtrNative(buf.pixelType_buf); - isQuad = alpaka::getPtrNative(buf.isQuad_buf); - isDup = alpaka::getPtrNative(buf.isDup_buf); - partOfPT5 = alpaka::getPtrNative(buf.partOfPT5_buf); - ptIn = alpaka::getPtrNative(buf.ptIn_buf); - ptErr = alpaka::getPtrNative(buf.ptErr_buf); - px = alpaka::getPtrNative(buf.px_buf); - py = alpaka::getPtrNative(buf.py_buf); - pz = alpaka::getPtrNative(buf.pz_buf); - etaErr = alpaka::getPtrNative(buf.etaErr_buf); - eta = alpaka::getPtrNative(buf.eta_buf); - phi = alpaka::getPtrNative(buf.phi_buf); - score = alpaka::getPtrNative(buf.score_buf); - circleCenterX = alpaka::getPtrNative(buf.circleCenterX_buf); - circleCenterY = alpaka::getPtrNative(buf.circleCenterY_buf); - circleRadius = alpaka::getPtrNative(buf.circleRadius_buf); + dPhis = buf.dPhis_buf.data(); + dPhiMins = buf.dPhiMins_buf.data(); + dPhiMaxs = buf.dPhiMaxs_buf.data(); + dPhiChanges = buf.dPhiChanges_buf.data(); + dPhiChangeMins = buf.dPhiChangeMins_buf.data(); + dPhiChangeMaxs = buf.dPhiChangeMaxs_buf.data(); + innerLowerModuleIndices = buf.innerLowerModuleIndices_buf.data(); + outerLowerModuleIndices = buf.outerLowerModuleIndices_buf.data(); + seedIdx = buf.seedIdx_buf.data(); + mdIndices = buf.mdIndices_buf.data(); + nMemoryLocations = buf.nMemoryLocations_buf.data(); + innerMiniDoubletAnchorHitIndices = buf.innerMiniDoubletAnchorHitIndices_buf.data(); + outerMiniDoubletAnchorHitIndices = buf.outerMiniDoubletAnchorHitIndices_buf.data(); + charge = buf.charge_buf.data(); + superbin = buf.superbin_buf.data(); + nSegments = buf.nSegments_buf.data(); + totOccupancySegments = buf.totOccupancySegments_buf.data(); + pLSHitsIdxs = buf.pLSHitsIdxs_buf.data(); + pixelType = buf.pixelType_buf.data(); + isQuad = buf.isQuad_buf.data(); + isDup = buf.isDup_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + ptIn = buf.ptIn_buf.data(); + ptErr = buf.ptErr_buf.data(); + px = buf.px_buf.data(); + py = buf.py_buf.data(); + pz = buf.pz_buf.data(); + etaErr = buf.etaErr_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + score = buf.score_buf.data(); + circleCenterX = buf.circleCenterX_buf.data(); + circleCenterY = buf.circleCenterY_buf.data(); + circleRadius = buf.circleRadius_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 835647c65e4bd..03e853cea7d7b 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -34,23 +34,23 @@ namespace lst { template void setData(TBuff& buf) { - trackCandidateType = alpaka::getPtrNative(buf.trackCandidateType_buf); - directObjectIndices = alpaka::getPtrNative(buf.directObjectIndices_buf); - objectIndices = alpaka::getPtrNative(buf.objectIndices_buf); - nTrackCandidates = alpaka::getPtrNative(buf.nTrackCandidates_buf); - nTrackCandidatespT3 = alpaka::getPtrNative(buf.nTrackCandidatespT3_buf); - nTrackCandidatespT5 = alpaka::getPtrNative(buf.nTrackCandidatespT5_buf); - nTrackCandidatespLS = alpaka::getPtrNative(buf.nTrackCandidatespLS_buf); - nTrackCandidatesT5 = alpaka::getPtrNative(buf.nTrackCandidatesT5_buf); - - logicalLayers = alpaka::getPtrNative(buf.logicalLayers_buf); - hitIndices = alpaka::getPtrNative(buf.hitIndices_buf); - pixelSeedIndex = alpaka::getPtrNative(buf.pixelSeedIndex_buf); - lowerModuleIndices = alpaka::getPtrNative(buf.lowerModuleIndices_buf); - - centerX = alpaka::getPtrNative(buf.centerX_buf); - centerY = alpaka::getPtrNative(buf.centerY_buf); - radius = alpaka::getPtrNative(buf.radius_buf); + trackCandidateType = buf.trackCandidateType_buf.data(); + directObjectIndices = buf.directObjectIndices_buf.data(); + objectIndices = buf.objectIndices_buf.data(); + nTrackCandidates = buf.nTrackCandidates_buf.data(); + nTrackCandidatespT3 = buf.nTrackCandidatespT3_buf.data(); + nTrackCandidatespT5 = buf.nTrackCandidatespT5_buf.data(); + nTrackCandidatespLS = buf.nTrackCandidatespLS_buf.data(); + nTrackCandidatesT5 = buf.nTrackCandidatesT5_buf.data(); + + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + pixelSeedIndex = buf.pixelSeedIndex_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + + centerX = buf.centerX_buf.data(); + centerY = buf.centerY_buf.data(); + radius = buf.radius_buf.data(); } }; diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index 9f3521e712ed6..3744dfb69e262 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -36,24 +36,24 @@ namespace lst { #endif template void setData(TBuff& buf) { - segmentIndices = alpaka::getPtrNative(buf.segmentIndices_buf); - lowerModuleIndices = alpaka::getPtrNative(buf.lowerModuleIndices_buf); - nTriplets = alpaka::getPtrNative(buf.nTriplets_buf); - totOccupancyTriplets = alpaka::getPtrNative(buf.totOccupancyTriplets_buf); - nMemoryLocations = alpaka::getPtrNative(buf.nMemoryLocations_buf); - logicalLayers = alpaka::getPtrNative(buf.logicalLayers_buf); - hitIndices = alpaka::getPtrNative(buf.hitIndices_buf); - betaIn = alpaka::getPtrNative(buf.betaIn_buf); - circleRadius = alpaka::getPtrNative(buf.circleRadius_buf); - circleCenterX = alpaka::getPtrNative(buf.circleCenterX_buf); - circleCenterY = alpaka::getPtrNative(buf.circleCenterY_buf); - partOfPT5 = alpaka::getPtrNative(buf.partOfPT5_buf); - partOfT5 = alpaka::getPtrNative(buf.partOfT5_buf); - partOfPT3 = alpaka::getPtrNative(buf.partOfPT3_buf); + segmentIndices = buf.segmentIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + nTriplets = buf.nTriplets_buf.data(); + totOccupancyTriplets = buf.totOccupancyTriplets_buf.data(); + nMemoryLocations = buf.nMemoryLocations_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + betaIn = buf.betaIn_buf.data(); + circleRadius = buf.circleRadius_buf.data(); + circleCenterX = buf.circleCenterX_buf.data(); + circleCenterY = buf.circleCenterY_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + partOfT5 = buf.partOfT5_buf.data(); + partOfPT3 = buf.partOfPT3_buf.data(); #ifdef CUT_VALUE_DEBUG - zOut = alpaka::getPtrNative(buf.zOut_buf); - rtOut = alpaka::getPtrNative(buf.rtOut_buf); - betaInCut = alpaka::getPtrNative(buf.betaInCut_buf); + zOut = buf.zOut_buf.data(); + rtOut = buf.rtOut_buf.data(); + betaInCut = buf.betaInCut_buf.data(); #endif } }; From c6a246874122bc22055440db295f3bc370a5f053 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 8 Aug 2024 15:35:06 -0700 Subject: [PATCH 09/20] lst::createWorkDiv now depends on Acc to avoid ODR; allocBufWrapper correct parameter is TDev --- RecoTracker/LSTCore/interface/Constants.h | 16 +++++------ .../LSTCore/interface/alpaka/Constants.h | 27 +++++++++---------- RecoTracker/LSTCore/src/ModuleMethods.h | 3 ++- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/RecoTracker/LSTCore/interface/Constants.h b/RecoTracker/LSTCore/interface/Constants.h index 725cf5f46b224..c0c342b6ad8a0 100644 --- a/RecoTracker/LSTCore/interface/Constants.h +++ b/RecoTracker/LSTCore/interface/Constants.h @@ -14,23 +14,21 @@ namespace lst { using Buf = alpaka::Buf; // Allocation wrapper function to make integration of the caching allocator easier and reduce code boilerplate. - template - ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf, T> allocBufWrapper(TAcc const& devAccIn, - TSize nElements, - TQueue queue) { + template + ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf allocBufWrapper(TDev const& dev, TSize nElements, TQueue queue) { #ifdef CACHE_ALLOC return cms::alpakatools::allocCachedBuf( - devAccIn, queue, alpaka_common::Vec1D(static_cast(nElements))); + dev, queue, alpaka_common::Vec1D(static_cast(nElements))); #else - return alpaka::allocBuf(devAccIn, + return alpaka::allocBuf(dev, alpaka_common::Vec1D(static_cast(nElements))); #endif } // Second allocation wrapper function when queue is not given. Reduces code boilerplate. - template - ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf, T> allocBufWrapper(TAcc const& devAccIn, TSize nElements) { - return alpaka::allocBuf(devAccIn, + template + ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf allocBufWrapper(TDev const& dev, TSize nElements) { + return alpaka::allocBuf(dev, alpaka_common::Vec1D(static_cast(nElements))); } diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h index e2ebd979a59a3..029d5ebcb80b7 100644 --- a/RecoTracker/LSTCore/interface/alpaka/Constants.h +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -36,25 +36,24 @@ namespace lst { #endif // Adjust grid and block sizes based on backend configuration - template - ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv3D createWorkDiv(const Vec& blocksPerGrid, - const Vec& threadsPerBlock, - const Vec& elementsPerThreadArg) { + template > + ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, + const Vec& threadsPerBlock, + const Vec& elementsPerThreadArg) { Vec adjustedBlocks = blocksPerGrid; Vec adjustedThreads = threadsPerBlock; - // Serial execution, so all launch parameters set to 1. -#if defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) - adjustedBlocks = Vec::all(static_cast(1)); - adjustedThreads = Vec::all(static_cast(1)); -#endif + // special overrides for CPU/host cases + if constexpr (std::is_same_v) { + adjustedBlocks = Vec::all(static_cast(1)); - // Threads enabled, set number of blocks to 1. -#if defined(ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED) - adjustedBlocks = Vec::all(static_cast(1)); -#endif + if constexpr (alpaka::accMatchesTags) { + // Serial execution, set threads to 1 as well + adjustedThreads = Vec::all(static_cast(1)); // probably redundant + } + } - return WorkDiv3D(adjustedBlocks, adjustedThreads, elementsPerThreadArg); + return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThreadArg); } // The constants below are usually used in functions like alpaka::math::min(), diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h index 196212defdfa6..bf51e262f69e5 100644 --- a/RecoTracker/LSTCore/src/ModuleMethods.h +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -12,6 +12,7 @@ #include "RecoTracker/LSTCore/interface/PixelMap.h" #include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" namespace lst { struct ModuleMetaData { @@ -80,7 +81,7 @@ namespace lst { nPixels = connectedPix_size; // Now we re-initialize connectedPixels_buf since nPixels is now known - modulesBuf.connectedPixels_buf = allocBufWrapper(cms::alpakatools::host(), nPixels); + modulesBuf.connectedPixels_buf = cms::alpakatools::make_host_buffer(nPixels); modulesBuf.data_.setData(modulesBuf); unsigned int* connectedPixels = modulesBuf.connectedPixels_buf.data(); From 80ffdc3cdc26110a999dd883d3596b7eba55677b Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Mon, 12 Aug 2024 11:51:40 -0700 Subject: [PATCH 10/20] Changed syntax of atomic operations --- RecoTracker/LSTCore/src/alpaka/Hit.h | 10 ++++---- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 7 +++--- .../LSTCore/src/alpaka/PixelQuintuplet.h | 6 ++--- RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 6 ++--- RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 12 +++++----- RecoTracker/LSTCore/src/alpaka/Segment.h | 10 ++++---- .../LSTCore/src/alpaka/TrackCandidate.h | 24 +++++++++---------- RecoTracker/LSTCore/src/alpaka/Triplet.h | 13 ++++++---- 8 files changed, 47 insertions(+), 41 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h index c14ac26124e6d..da2aa4c7ce3ad 100644 --- a/RecoTracker/LSTCore/src/alpaka/Hit.h +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -244,13 +244,15 @@ namespace lst { hitsInGPU.lowEdgeYs[ihit] = ihit_y - 2.5f * sin_phi; } // Need to set initial value if index hasn't been seen before. - int old = alpaka::atomicOp( - acc, &(hitsInGPU.hitRanges[lastModuleIndex * 2]), -1, static_cast(ihit)); + int old = alpaka::atomicCas( + acc, &(hitsInGPU.hitRanges[lastModuleIndex * 2]), -1, static_cast(ihit), alpaka::hierarchy::Threads{}); // For subsequent visits, stores the min value. if (old != -1) - alpaka::atomicOp(acc, &hitsInGPU.hitRanges[lastModuleIndex * 2], static_cast(ihit)); + alpaka::atomicMin( + acc, &hitsInGPU.hitRanges[lastModuleIndex * 2], static_cast(ihit), alpaka::hierarchy::Threads{}); - alpaka::atomicOp(acc, &hitsInGPU.hitRanges[lastModuleIndex * 2 + 1], static_cast(ihit)); + alpaka::atomicMax( + acc, &hitsInGPU.hitRanges[lastModuleIndex * 2 + 1], static_cast(ihit), alpaka::hierarchy::Threads{}); } } }; diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index 86a22d943c33f..e51e5bfdf8d1a 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -932,13 +932,14 @@ namespace lst { rtUpper); if (success) { int totOccupancyMDs = - alpaka::atomicOp(acc, &mdsInGPU.totOccupancyMDs[lowerModuleIndex], 1u); + alpaka::atomicAdd(acc, &mdsInGPU.totOccupancyMDs[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); if (totOccupancyMDs >= (rangesInGPU.miniDoubletModuleOccupancy[lowerModuleIndex])) { #ifdef WARNINGS printf("Mini-doublet excess alert! Module index = %d\n", lowerModuleIndex); #endif } else { - int mdModuleIndex = alpaka::atomicOp(acc, &mdsInGPU.nMDs[lowerModuleIndex], 1u); + int mdModuleIndex = + alpaka::atomicAdd(acc, &mdsInGPU.nMDs[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); unsigned int mdIndex = rangesInGPU.miniDoubletModuleIndices[lowerModuleIndex] + mdModuleIndex; addMDToMemory(acc, @@ -1041,7 +1042,7 @@ namespace lst { #endif } - unsigned int nTotMDs = alpaka::atomicOp(acc, &nTotalMDs, occupancy); + unsigned int nTotMDs = alpaka::atomicAdd(acc, &nTotalMDs, occupancy, alpaka::hierarchy::Threads{}); rangesInGPU.miniDoubletModuleIndices[i] = nTotMDs; rangesInGPU.miniDoubletModuleOccupancy[i] = occupancy; diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index ee172f9e05f6e..44a58cf612621 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -901,15 +901,15 @@ namespace lst { centerY, static_cast(i_pLS)); if (success) { - unsigned int totOccupancyPixelQuintuplets = - alpaka::atomicOp(acc, pixelQuintupletsInGPU.totOccupancyPixelQuintuplets, 1u); + unsigned int totOccupancyPixelQuintuplets = alpaka::atomicAdd( + acc, pixelQuintupletsInGPU.totOccupancyPixelQuintuplets, 1u, alpaka::hierarchy::Threads{}); if (totOccupancyPixelQuintuplets >= n_max_pixel_quintuplets) { #ifdef WARNINGS printf("Pixel Quintuplet excess alert!\n"); #endif } else { unsigned int pixelQuintupletIndex = - alpaka::atomicOp(acc, pixelQuintupletsInGPU.nPixelQuintuplets, 1u); + alpaka::atomicAdd(acc, pixelQuintupletsInGPU.nPixelQuintuplets, 1u, alpaka::hierarchy::Threads{}); float eta = __H2F(quintupletsInGPU.eta[quintupletIndex]); float phi = __H2F(quintupletsInGPU.phi[quintupletIndex]); diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 3b6faffbce426..0897517fd6a9d 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -1025,15 +1025,15 @@ namespace lst { float phi_pix = segmentsInGPU.phi[i_pLS]; float pt = segmentsInGPU.ptIn[i_pLS]; float score = rPhiChiSquared + rPhiChiSquaredInwards; - unsigned int totOccupancyPixelTriplets = - alpaka::atomicOp(acc, pixelTripletsInGPU.totOccupancyPixelTriplets, 1u); + unsigned int totOccupancyPixelTriplets = alpaka::atomicAdd( + acc, pixelTripletsInGPU.totOccupancyPixelTriplets, 1u, alpaka::hierarchy::Threads{}); if (totOccupancyPixelTriplets >= n_max_pixel_triplets) { #ifdef WARNINGS printf("Pixel Triplet excess alert!\n"); #endif } else { unsigned int pixelTripletIndex = - alpaka::atomicOp(acc, pixelTripletsInGPU.nPixelTriplets, 1u); + alpaka::atomicAdd(acc, pixelTripletsInGPU.nPixelTriplets, 1u, alpaka::hierarchy::Threads{}); addPixelTripletToMemory(mdsInGPU, segmentsInGPU, tripletsInGPU, diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 1165d33f6da5e..8ed3786f610a6 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -2602,15 +2602,15 @@ namespace lst { TightCutFlag); if (success) { - int totOccupancyQuintuplets = - alpaka::atomicOp(acc, &quintupletsInGPU.totOccupancyQuintuplets[lowerModule1], 1u); + int totOccupancyQuintuplets = alpaka::atomicAdd( + acc, &quintupletsInGPU.totOccupancyQuintuplets[lowerModule1], 1u, alpaka::hierarchy::Threads{}); if (totOccupancyQuintuplets >= rangesInGPU.quintupletModuleOccupancy[lowerModule1]) { #ifdef WARNINGS printf("Quintuplet excess alert! Module index = %d\n", lowerModule1); #endif } else { - int quintupletModuleIndex = - alpaka::atomicOp(acc, &quintupletsInGPU.nQuintuplets[lowerModule1], 1u); + int quintupletModuleIndex = alpaka::atomicAdd( + acc, &quintupletsInGPU.nQuintuplets[lowerModule1], 1u, alpaka::hierarchy::Threads{}); //this if statement should never get executed! if (rangesInGPU.quintupletModuleIndices[lowerModule1] == -1) { #ifdef WARNINGS @@ -2700,7 +2700,7 @@ namespace lst { if (module_subdets == lst::Endcap and module_layers > 1) continue; - int nEligibleT5Modules = alpaka::atomicOp(acc, &nEligibleT5Modulesx, 1); + int nEligibleT5Modules = alpaka::atomicAdd(acc, &nEligibleT5Modulesx, 1, alpaka::hierarchy::Threads{}); if (module_layers <= 3 && module_subdets == 5) category_number = 0; @@ -2749,7 +2749,7 @@ namespace lst { #endif } - int nTotQ = alpaka::atomicOp(acc, &nTotalQuintupletsx, occupancy); + int nTotQ = alpaka::atomicAdd(acc, &nTotalQuintupletsx, occupancy, alpaka::hierarchy::Threads{}); rangesInGPU.quintupletModuleIndices[i] = nTotQ; rangesInGPU.indicesOfEligibleT5Modules[nEligibleT5Modules] = i; rangesInGPU.quintupletModuleOccupancy[i] = occupancy; diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 6e79bacfa4902..8256926a3f75c 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -763,15 +763,15 @@ namespace lst { dPhiChange, dPhiChangeMin, dPhiChangeMax)) { - unsigned int totOccupancySegments = alpaka::atomicOp( - acc, &segmentsInGPU.totOccupancySegments[innerLowerModuleIndex], 1u); + unsigned int totOccupancySegments = alpaka::atomicAdd( + acc, &segmentsInGPU.totOccupancySegments[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); if (static_cast(totOccupancySegments) >= rangesInGPU.segmentModuleOccupancy[innerLowerModuleIndex]) { #ifdef WARNINGS printf("Segment excess alert! Module index = %d\n", innerLowerModuleIndex); #endif } else { - unsigned int segmentModuleIdx = - alpaka::atomicOp(acc, &segmentsInGPU.nSegments[innerLowerModuleIndex], 1u); + unsigned int segmentModuleIdx = alpaka::atomicAdd( + acc, &segmentsInGPU.nSegments[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); unsigned int segmentIdx = rangesInGPU.segmentModuleIndices[innerLowerModuleIndex] + segmentModuleIdx; addSegmentToMemory(segmentsInGPU, @@ -882,7 +882,7 @@ namespace lst { #endif } - int nTotSegs = alpaka::atomicOp(acc, &nTotalSegments, occupancy); + int nTotSegs = alpaka::atomicAdd(acc, &nTotalSegments, occupancy, alpaka::hierarchy::Threads{}); rangesInGPU.segmentModuleIndices[i] = nTotSegs; rangesInGPU.segmentModuleOccupancy[i] = occupancy; } diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index ede4dd9471e8e..704cdb21c1f5c 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -401,17 +401,17 @@ namespace lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidates, 1u); + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); if (trackCandidateIdx >= n_max_pixel_track_candidates) // This is done before any non-pixel TCs are added { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pT3"); #endif - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidates, 1u); + alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidatespT3, 1u); + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespT3, 1u, alpaka::hierarchy::Threads{}); float radius = 0.5f * (__H2F(pixelTripletsInGPU.pixelRadius[pixelTripletIndex]) + __H2F(pixelTripletsInGPU.tripletRadius[pixelTripletIndex])); @@ -457,7 +457,7 @@ namespace lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidates, 1u); + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); if (trackCandidateIdx - *trackCandidatesInGPU.nTrackCandidatespT5 - *trackCandidatesInGPU.nTrackCandidatespT3 >= n_max_nonpixel_track_candidates) // pT5 and pT3 TCs have been added, but not pLS TCs @@ -465,10 +465,10 @@ namespace lst { #ifdef WARNINGS printf("Track Candidate excess alert! Type = T5"); #endif - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidates, 1u); + alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidatesT5, 1u); + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatesT5, 1u, alpaka::hierarchy::Threads{}); addTrackCandidateToMemory(trackCandidatesInGPU, 4 /*track candidate type T5=4*/, quintupletIndex, @@ -505,18 +505,18 @@ namespace lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidates, 1u); + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); if (trackCandidateIdx - *trackCandidatesInGPU.nTrackCandidatesT5 >= n_max_pixel_track_candidates) // T5 TCs have already been added { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pLS"); #endif - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidates, 1u); + alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidatespLS, 1u); + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespLS, 1u, alpaka::hierarchy::Threads{}); addpLSTrackCandidateToMemory(trackCandidatesInGPU, pixelArrayIndex, trackCandidateIdx, @@ -546,17 +546,17 @@ namespace lst { continue; unsigned int trackCandidateIdx = - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidates, 1u); + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); if (trackCandidateIdx >= n_max_pixel_track_candidates) // No other TCs have been added yet { #ifdef WARNINGS printf("Track Candidate excess alert! Type = pT5"); #endif - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidates, 1u); + alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); break; } else { - alpaka::atomicOp(acc, trackCandidatesInGPU.nTrackCandidatespT5, 1u); + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespT5, 1u, alpaka::hierarchy::Threads{}); float radius = 0.5f * (__H2F(pixelQuintupletsInGPU.pixelRadius[pixelQuintupletIndex]) + __H2F(pixelQuintupletsInGPU.quintupletRadius[pixelQuintupletIndex])); diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index f5a216724c1da..4aa219ba0821c 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -868,16 +868,19 @@ namespace lst { circleCenterY); if (success) { - unsigned int totOccupancyTriplets = alpaka::atomicOp( - acc, &tripletsInGPU.totOccupancyTriplets[innerInnerLowerModuleIndex], 1u); + unsigned int totOccupancyTriplets = + alpaka::atomicAdd(acc, + &tripletsInGPU.totOccupancyTriplets[innerInnerLowerModuleIndex], + 1u, + alpaka::hierarchy::Threads{}); if (static_cast(totOccupancyTriplets) >= rangesInGPU.tripletModuleOccupancy[innerInnerLowerModuleIndex]) { #ifdef WARNINGS printf("Triplet excess alert! Module index = %d\n", innerInnerLowerModuleIndex); #endif } else { - unsigned int tripletModuleIndex = - alpaka::atomicOp(acc, &tripletsInGPU.nTriplets[innerInnerLowerModuleIndex], 1u); + unsigned int tripletModuleIndex = alpaka::atomicAdd( + acc, &tripletsInGPU.nTriplets[innerInnerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); unsigned int tripletIndex = rangesInGPU.tripletModuleIndices[innerInnerLowerModuleIndex] + tripletModuleIndex; #ifdef CUT_VALUE_DEBUG @@ -1009,7 +1012,7 @@ namespace lst { } rangesInGPU.tripletModuleOccupancy[i] = occupancy; - unsigned int nTotT = alpaka::atomicOp(acc, &nTotalTriplets, occupancy); + unsigned int nTotT = alpaka::atomicAdd(acc, &nTotalTriplets, occupancy, alpaka::hierarchy::Threads{}); rangesInGPU.tripletModuleIndices[i] = nTotT; } From a0432cce254d354bd744b8ddea3676cab87e3b4b Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Mon, 12 Aug 2024 12:09:26 -0700 Subject: [PATCH 11/20] Reduce number of moduleConnections_ lookups --- RecoTracker/LSTCore/src/ModuleConnectionMap.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/RecoTracker/LSTCore/src/ModuleConnectionMap.cc b/RecoTracker/LSTCore/src/ModuleConnectionMap.cc index 732b8e155fb4e..fe0826bbd80e6 100644 --- a/RecoTracker/LSTCore/src/ModuleConnectionMap.cc +++ b/RecoTracker/LSTCore/src/ModuleConnectionMap.cc @@ -69,15 +69,17 @@ void lst::ModuleConnectionMap::add(std::string const& filename) { connected_detids.push_back(connected_detid); } + auto& thisModuleConnections = moduleConnections_.at(detid); + // Concatenate - moduleConnections_[detid].insert(moduleConnections_[detid].end(), connected_detids.begin(), connected_detids.end()); + thisModuleConnections.insert(thisModuleConnections.end(), connected_detids.begin(), connected_detids.end()); // Sort - std::sort(moduleConnections_[detid].begin(), moduleConnections_[detid].end()); + std::sort(thisModuleConnections.begin(), thisModuleConnections.end()); // Unique - moduleConnections_[detid].erase(std::unique(moduleConnections_[detid].begin(), moduleConnections_[detid].end()), - moduleConnections_[detid].end()); + thisModuleConnections.erase(std::unique(thisModuleConnections.begin(), thisModuleConnections.end()), + thisModuleConnections.end()); } } From 22192ec1932fc2cb8886152bbb4b2d94d6e1c0c3 Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Mon, 12 Aug 2024 12:19:53 -0700 Subject: [PATCH 12/20] Add include for fp16 on HIP --- RecoTracker/LSTCore/interface/alpaka/Constants.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h index e2ebd979a59a3..2e199897ee92c 100644 --- a/RecoTracker/LSTCore/interface/alpaka/Constants.h +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -3,8 +3,10 @@ #include "RecoTracker/LSTCore/interface/Constants.h" -#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED +#if defined ALPAKA_ACC_GPU_CUDA_ENABLED #include +#elif defined ALPAKA_ACC_GPU_HIP_ENABLED +#include #endif namespace lst { From 43ce20eee979ffc8b41d38629e91605d7cce3c54 Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Mon, 12 Aug 2024 15:26:08 -0700 Subject: [PATCH 13/20] explicitly require 1D single block kernels to use Acc1D and have one block with asserts --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 77 ++++++------------- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 14 +++- RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 16 +++- RecoTracker/LSTCore/src/alpaka/Segment.h | 16 +++- .../LSTCore/src/alpaka/TrackCandidate.h | 16 +++- RecoTracker/LSTCore/src/alpaka/Triplet.h | 16 +++- 6 files changed, 81 insertions(+), 74 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 9e46c96a4488c..cc8872438dfe7 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -255,13 +255,10 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - Vec3D const threadsPerBlockCreateMD{1, 1, 1024}; - Vec3D const blocksPerGridCreateMD{1, 1, 1}; - WorkDiv3D const createMDArrayRangesGPU_workDiv = - createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread); + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; - alpaka::exec( + alpaka::exec( queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); @@ -281,13 +278,10 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them - Vec3D const threadsPerBlockCreateSeg{1, 1, 1024}; - Vec3D const blocksPerGridCreateSeg{1, 1, 1}; - WorkDiv3D const createSegmentArrayRanges_workDiv = - createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); + WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); lst::createSegmentArrayRanges createSegmentArrayRanges_kernel; - alpaka::exec(queue, + alpaka::exec(queue, createSegmentArrayRanges_workDiv, createSegmentArrayRanges_kernel, *modulesBuffers_.data(), @@ -388,13 +382,10 @@ void lst::Event::createMiniDoublets() { alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - Vec3D const threadsPerBlockCreateMD{1, 1, 1024}; - Vec3D const blocksPerGridCreateMD{1, 1, 1}; - WorkDiv3D const createMDArrayRangesGPU_workDiv = - createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread); + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; - alpaka::exec( + alpaka::exec( queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); @@ -424,13 +415,10 @@ void lst::Event::createMiniDoublets() { *mdsInGPU, *rangesInGPU); - Vec3D const threadsPerBlockAddMD{1, 1, 1024}; - Vec3D const blocksPerGridAddMD{1, 1, 1}; - WorkDiv3D const addMiniDoubletRangesToEventExplicit_workDiv = - createWorkDiv(blocksPerGridAddMD, threadsPerBlockAddMD, elementsPerThread); + WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); lst::addMiniDoubletRangesToEventExplicit addMiniDoubletRangesToEventExplicit_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addMiniDoubletRangesToEventExplicit_workDiv, addMiniDoubletRangesToEventExplicit_kernel, *modulesBuffers_.data(), @@ -465,13 +453,10 @@ void lst::Event::createSegmentsWithModuleMap() { *segmentsInGPU, *rangesInGPU); - Vec3D const threadsPerBlockAddSeg{1, 1, 1024}; - Vec3D const blocksPerGridAddSeg{1, 1, 1}; - WorkDiv3D const addSegmentRangesToEventExplicit_workDiv = - createWorkDiv(blocksPerGridAddSeg, threadsPerBlockAddSeg, elementsPerThread); + WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); lst::addSegmentRangesToEventExplicit addSegmentRangesToEventExplicit_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addSegmentRangesToEventExplicit_workDiv, addSegmentRangesToEventExplicit_kernel, *modulesBuffers_.data(), @@ -485,13 +470,10 @@ void lst::Event::createSegmentsWithModuleMap() { void lst::Event::createTriplets() { if (tripletsInGPU == nullptr) { - Vec3D const threadsPerBlockCreateTrip{1, 1, 1024}; - Vec3D const blocksPerGridCreateTrip{1, 1, 1}; - WorkDiv3D const createTripletArrayRanges_workDiv = - createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); + WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); lst::createTripletArrayRanges createTripletArrayRanges_kernel; - alpaka::exec(queue, + alpaka::exec(queue, createTripletArrayRanges_workDiv, createTripletArrayRanges_kernel, *modulesBuffers_.data(), @@ -563,13 +545,10 @@ void lst::Event::createTriplets() { index_gpu_buf.data(), nonZeroModules); - Vec3D const threadsPerBlockAddTrip{1, 1, 1024}; - Vec3D const blocksPerGridAddTrip{1, 1, 1}; - WorkDiv3D const addTripletRangesToEventExplicit_workDiv = - createWorkDiv(blocksPerGridAddTrip, threadsPerBlockAddTrip, elementsPerThread); + WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); lst::addTripletRangesToEventExplicit addTripletRangesToEventExplicit_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addTripletRangesToEventExplicit_workDiv, addTripletRangesToEventExplicit_kernel, *modulesBuffers_.data(), @@ -604,13 +583,10 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ *segmentsInGPU, *pixelQuintupletsInGPU); - Vec3D const threadsPerBlock_addpT3asTrackCandidatesInGPU{1, 1, 512}; - Vec3D const blocksPerGrid_addpT3asTrackCandidatesInGPU{1, 1, 1}; - WorkDiv3D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv( - blocksPerGrid_addpT3asTrackCandidatesInGPU, threadsPerBlock_addpT3asTrackCandidatesInGPU, elementsPerThread); + WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); lst::addpT3asTrackCandidatesInGPU addpT3asTrackCandidatesInGPU_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addpT3asTrackCandidatesInGPU_workDiv, addpT3asTrackCandidatesInGPU_kernel, nLowerModules_, @@ -849,13 +825,10 @@ void lst::Event::createPixelTriplets() { } void lst::Event::createQuintuplets() { - Vec3D const threadsPerBlockCreateQuints{1, 1, 1024}; - Vec3D const blocksPerGridCreateQuints{1, 1, 1}; - WorkDiv3D const createEligibleModulesListForQuintupletsGPU_workDiv = - createWorkDiv(blocksPerGridCreateQuints, threadsPerBlockCreateQuints, elementsPerThread); + WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = createWorkDiv({1}, {1024}, {1}); lst::createEligibleModulesListForQuintupletsGPU createEligibleModulesListForQuintupletsGPU_kernel; - alpaka::exec(queue, + alpaka::exec(queue, createEligibleModulesListForQuintupletsGPU_workDiv, createEligibleModulesListForQuintupletsGPU_kernel, *modulesBuffers_.data(), @@ -910,13 +883,10 @@ void lst::Event::createQuintuplets() { *quintupletsInGPU, *rangesInGPU); - Vec3D const threadsPerBlockAddQuint{1, 1, 1024}; - Vec3D const blocksPerGridAddQuint{1, 1, 1}; - WorkDiv3D const addQuintupletRangesToEventExplicit_workDiv = - createWorkDiv(blocksPerGridAddQuint, threadsPerBlockAddQuint, elementsPerThread); + WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); lst::addQuintupletRangesToEventExplicit addQuintupletRangesToEventExplicit_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addQuintupletRangesToEventExplicit_workDiv, addQuintupletRangesToEventExplicit_kernel, *modulesBuffers_.data(), @@ -1044,13 +1014,10 @@ void lst::Event::createPixelQuintuplets() { removeDupPixelQuintupletsInGPUFromMap_kernel, *pixelQuintupletsInGPU); - Vec3D const threadsPerBlockAddpT5asTrackCan{1, 1, 256}; - Vec3D const blocksPerGridAddpT5asTrackCan{1, 1, 1}; - WorkDiv3D const addpT5asTrackCandidateInGPU_workDiv = - createWorkDiv(blocksPerGridAddpT5asTrackCan, threadsPerBlockAddpT5asTrackCan, elementsPerThread); + WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = createWorkDiv({1}, {256}, {1}); lst::addpT5asTrackCandidateInGPU addpT5asTrackCandidateInGPU_kernel; - alpaka::exec(queue, + alpaka::exec(queue, addpT5asTrackCandidateInGPU_workDiv, addpT5asTrackCandidateInGPU_kernel, nLowerModules_, diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index b4cbd500c7bf8..c00015384b77b 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -968,6 +968,10 @@ namespace lst { ALPAKA_FN_ACC void operator()(TAcc const& acc, struct lst::Modules modulesInGPU, struct lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -978,10 +982,10 @@ namespace lst { } alpaka::syncBlockThreads(acc); - // Initialize variables outside of the for loop. + // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { short module_rings = modulesInGPU.rings[i]; short module_layers = modulesInGPU.layers[i]; short module_subdets = modulesInGPU.subdets[i]; @@ -1062,10 +1066,14 @@ namespace lst { struct lst::MiniDoublets mdsInGPU, struct lst::ObjectRanges rangesInGPU, struct lst::Hits hitsInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (mdsInGPU.nMDs[i] == 0 or hitsInGPU.hitRanges[i * 2] == -1) { rangesInGPU.mdRanges[i * 2] = -1; rangesInGPU.mdRanges[i * 2 + 1] = -1; diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 49eb3b1902c9a..07b5f50dd57de 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -2669,6 +2669,10 @@ namespace lst { lst::Modules modulesInGPU, lst::Triplets tripletsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -2681,10 +2685,10 @@ namespace lst { } alpaka::syncBlockThreads(acc); - // Initialize variables outside of the for loop. + // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (int i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (int i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { // Condition for a quintuple to exist for a module // TCs don't exist for layers 5 and 6 barrel, and layers 2,3,4,5 endcap short module_rings = modulesInGPU.rings[i]; @@ -2756,7 +2760,7 @@ namespace lst { // Wait for all threads to finish before reporting final values alpaka::syncBlockThreads(acc); - if (globalThreadIdx[2] == 0) { + if (cms::alpakatools::once_per_block(acc)) { *rangesInGPU.nEligibleT5Modules = static_cast(nEligibleT5Modulesx); *rangesInGPU.device_nTotalQuints = static_cast(nTotalQuintupletsx); } @@ -2769,10 +2773,14 @@ namespace lst { lst::Modules modulesInGPU, lst::Quintuplets quintupletsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (quintupletsInGPU.nQuintuplets[i] == 0 or rangesInGPU.quintupletModuleIndices[i] == -1) { rangesInGPU.quintupletRanges[i * 2] = -1; rangesInGPU.quintupletRanges[i * 2 + 1] = -1; diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index cee59e316064a..cc8470f911a8b 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -801,6 +801,10 @@ namespace lst { lst::Modules modulesInGPU, lst::ObjectRanges rangesInGPU, lst::MiniDoublets mdsInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -811,10 +815,10 @@ namespace lst { } alpaka::syncBlockThreads(acc); - // Initialize variables outside of the for loop. + // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (modulesInGPU.nConnectedModules[i] == 0) { rangesInGPU.segmentModuleIndices[i] = nTotalSegments; rangesInGPU.segmentModuleOccupancy[i] = 0; @@ -888,7 +892,7 @@ namespace lst { // Wait for all threads to finish before reporting final values alpaka::syncBlockThreads(acc); - if (globalThreadIdx[2] == 0) { + if (cms::alpakatools::once_per_block(acc)) { rangesInGPU.segmentModuleIndices[*modulesInGPU.nLowerModules] = nTotalSegments; *rangesInGPU.device_nTotalSegs = nTotalSegments; } @@ -901,10 +905,14 @@ namespace lst { lst::Modules modulesInGPU, lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (segmentsInGPU.nSegments[i] == 0) { rangesInGPU.segmentRanges[i * 2] = -1; rangesInGPU.segmentRanges[i * 2 + 1] = -1; diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 03e853cea7d7b..24ef4b94de0f2 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -389,13 +389,17 @@ namespace lst { lst::TrackCandidates trackCandidatesInGPU, lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); unsigned int nPixelTriplets = *pixelTripletsInGPU.nPixelTriplets; unsigned int pLS_offset = rangesInGPU.segmentModuleIndices[nLowerModules]; - for (unsigned int pixelTripletIndex = globalThreadIdx[2]; pixelTripletIndex < nPixelTriplets; - pixelTripletIndex += gridThreadExtent[2]) { + for (unsigned int pixelTripletIndex = globalThreadIdx[0]; pixelTripletIndex < nPixelTriplets; + pixelTripletIndex += gridThreadExtent[0]) { if ((pixelTripletsInGPU.isDup[pixelTripletIndex])) continue; @@ -534,13 +538,17 @@ namespace lst { lst::TrackCandidates trackCandidatesInGPU, lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets; unsigned int pLS_offset = rangesInGPU.segmentModuleIndices[nLowerModules]; - for (int pixelQuintupletIndex = globalThreadIdx[2]; pixelQuintupletIndex < nPixelQuintuplets; - pixelQuintupletIndex += gridThreadExtent[2]) { + for (int pixelQuintupletIndex = globalThreadIdx[0]; pixelQuintupletIndex < nPixelQuintuplets; + pixelQuintupletIndex += gridThreadExtent[0]) { if (pixelQuintupletsInGPU.isDup[pixelQuintupletIndex]) continue; diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index 3744dfb69e262..9fab052e6531f 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -928,6 +928,10 @@ namespace lst { lst::Modules modulesInGPU, lst::ObjectRanges rangesInGPU, lst::Segments segmentsInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -938,10 +942,10 @@ namespace lst { } alpaka::syncBlockThreads(acc); - // Initialize variables outside of the for loop. + // Create variables outside of the for loop. int occupancy, category_number, eta_number; - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (segmentsInGPU.nSegments[i] == 0) { rangesInGPU.tripletModuleIndices[i] = nTotalTriplets; rangesInGPU.tripletModuleOccupancy[i] = 0; @@ -1015,7 +1019,7 @@ namespace lst { // Wait for all threads to finish before reporting final values alpaka::syncBlockThreads(acc); - if (globalThreadIdx[2] == 0) { + if (cms::alpakatools::once_per_block(acc)) { *rangesInGPU.device_nTotalTrips = nTotalTriplets; } } @@ -1027,10 +1031,14 @@ namespace lst { lst::Modules modulesInGPU, lst::Triplets tripletsInGPU, lst::ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); - for (uint16_t i = globalThreadIdx[2]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[2]) { + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { if (tripletsInGPU.nTriplets[i] == 0) { rangesInGPU.tripletRanges[i * 2] = -1; rangesInGPU.tripletRanges[i * 2 + 1] = -1; From 7889093ac6bd4e1f2e01ee5452fca01e776d6f0c Mon Sep 17 00:00:00 2001 From: Slava Krutelyov Date: Thu, 15 Aug 2024 16:43:14 -0700 Subject: [PATCH 14/20] add synchronizations in callers of the event methods where it matters; make synchronization more explicit/flexible in names or function arguments --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 55 ++++++++++---- RecoTracker/LSTCore/src/alpaka/Event.h | 72 ++++++++++--------- RecoTracker/LSTCore/src/alpaka/LST.dev.cc | 18 +++-- RecoTracker/LSTCore/standalone/bin/lst.cc | 2 +- .../LSTCore/standalone/code/core/trkCore.cc | 10 +++ 5 files changed, 102 insertions(+), 55 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index cc8872438dfe7..f9757b0659691 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -4,7 +4,8 @@ using namespace ALPAKA_ACCELERATOR_NAMESPACE; -void lst::Event::init(bool verbose) { +void lst::Event::initSync(bool verbose) { + alpaka::wait(queue); // other calls can be asynchronous addObjects = verbose; hitsInGPU = nullptr; mdsInGPU = nullptr; @@ -46,7 +47,8 @@ void lst::Event::init(bool verbose) { } } -void lst::Event::resetEvent() { +void lst::Event::resetEventSync() { + alpaka::wait(queue); // synchronize to reset consistently //reset the arrays for (int i = 0; i < 6; i++) { n_hits_by_layer_barrel_[i] = 0; @@ -1358,7 +1360,7 @@ int lst::Event::getNumberOfT5TrackCandidates() { return *nTrackCandidatesT5_buf_h.data(); } -lst::HitsBuffer* lst::Event::getHits() //std::shared_ptr should take care of garbage collection +lst::HitsBuffer* lst::Event::getHits(bool sync) //std::shared_ptr should take care of garbage collection { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1376,11 +1378,13 @@ lst::HitsBuffer* lst::Event::getHits() //std::shared_ptr should alpaka::memcpy(queue, hitsInCPU->ys_buf, hitsBuffers->ys_buf, nHits); alpaka::memcpy(queue, hitsInCPU->zs_buf, hitsBuffers->zs_buf, nHits); alpaka::memcpy(queue, hitsInCPU->moduleIndices_buf, hitsBuffers->moduleIndices_buf, nHits); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return hitsInCPU; } -lst::HitsBuffer* lst::Event::getHitsInCMSSW() { +lst::HitsBuffer* lst::Event::getHitsInCMSSW(bool sync) { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); @@ -1392,11 +1396,13 @@ lst::HitsBuffer* lst::Event::getHitsInCMSSW() { *hitsInCPU->nHits_buf.data() = nHits; alpaka::memcpy(queue, hitsInCPU->idxs_buf, hitsBuffers->idxs_buf, nHits); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return hitsInCPU; } -lst::ObjectRangesBuffer* lst::Event::getRanges() { +lst::ObjectRangesBuffer* lst::Event::getRanges(bool sync) { if (rangesInCPU == nullptr) { rangesInCPU = new lst::ObjectRangesBuffer(nModules_, nLowerModules_, devHost, queue); rangesInCPU->setData(*rangesInCPU); @@ -1406,12 +1412,13 @@ lst::ObjectRangesBuffer* lst::Event::getRanges() { alpaka::memcpy(queue, rangesInCPU->miniDoubletModuleIndices_buf, rangesBuffers->miniDoubletModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->segmentModuleIndices_buf, rangesBuffers->segmentModuleIndices_buf); alpaka::memcpy(queue, rangesInCPU->tripletModuleIndices_buf, rangesBuffers->tripletModuleIndices_buf); - alpaka::wait(queue); // wait to get completed host data + if (sync) + alpaka::wait(queue); // wait to get completed host data } return rangesInCPU; } -lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { +lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets(bool sync) { if (mdsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based mdsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1428,11 +1435,13 @@ lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets() { alpaka::memcpy(queue, mdsInCPU->dphichanges_buf, miniDoubletsBuffers->dphichanges_buf, nMemHost); alpaka::memcpy(queue, mdsInCPU->nMDs_buf, miniDoubletsBuffers->nMDs_buf); alpaka::memcpy(queue, mdsInCPU->totOccupancyMDs_buf, miniDoubletsBuffers->totOccupancyMDs_buf); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return mdsInCPU; } -lst::SegmentsBuffer* lst::Event::getSegments() { +lst::SegmentsBuffer* lst::Event::getSegments(bool sync) { if (segmentsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based segmentsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1463,11 +1472,13 @@ lst::SegmentsBuffer* lst::Event::getSegments() { alpaka::memcpy(queue, segmentsInCPU->isDup_buf, segmentsBuffers->isDup_buf); alpaka::memcpy(queue, segmentsInCPU->isQuad_buf, segmentsBuffers->isQuad_buf); alpaka::memcpy(queue, segmentsInCPU->score_buf, segmentsBuffers->score_buf); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return segmentsInCPU; } -lst::TripletsBuffer* lst::Event::getTriplets() { +lst::TripletsBuffer* lst::Event::getTriplets(bool sync) { if (tripletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based tripletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1498,11 +1509,13 @@ lst::TripletsBuffer* lst::Event::getTriplets() { alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->nTriplets_buf, tripletsBuffers->nTriplets_buf); alpaka::memcpy(queue, tripletsInCPU->totOccupancyTriplets_buf, tripletsBuffers->totOccupancyTriplets_buf); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return tripletsInCPU; } -lst::QuintupletsBuffer* lst::Event::getQuintuplets() { +lst::QuintupletsBuffer* lst::Event::getQuintuplets(bool sync) { if (quintupletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based quintupletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1533,11 +1546,13 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets() { alpaka::memcpy(queue, quintupletsInCPU->rzChiSquared_buf, quintupletsBuffers->rzChiSquared_buf, nMemHost); alpaka::memcpy( queue, quintupletsInCPU->nonAnchorChiSquared_buf, quintupletsBuffers->nonAnchorChiSquared_buf, nMemHost); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return quintupletsInCPU; } -lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { +lst::PixelTripletsBuffer* lst::Event::getPixelTriplets(bool sync) { if (pixelTripletsInCPU == nullptr) { // Get nPixelTriplets parameter to initialize host based quintupletsInCPU auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1571,11 +1586,13 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets() { alpaka::memcpy(queue, pixelTripletsInCPU->eta_buf, pixelTripletsBuffers->eta_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->phi_buf, pixelTripletsBuffers->phi_buf, nPixelTriplets); alpaka::memcpy(queue, pixelTripletsInCPU->score_buf, pixelTripletsBuffers->score_buf, nPixelTriplets); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return pixelTripletsInCPU; } -lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { +lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets(bool sync) { if (pixelQuintupletsInCPU == nullptr) { // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1606,11 +1623,13 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets() { queue, pixelQuintupletsInCPU->T5Indices_buf, pixelQuintupletsBuffers->T5Indices_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->isDup_buf, pixelQuintupletsBuffers->isDup_buf, nPixelQuintuplets); alpaka::memcpy(queue, pixelQuintupletsInCPU->score_buf, pixelQuintupletsBuffers->score_buf, nPixelQuintuplets); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return pixelQuintupletsInCPU; } -lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { +lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates(bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1643,11 +1662,13 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates() { trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return trackCandidatesInCPU; } -lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSSW() { +lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSSW(bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1670,16 +1691,20 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSS trackCandidatesInCPU->trackCandidateType_buf, trackCandidatesBuffers->trackCandidateType_buf, nTrackCanHost); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return trackCandidatesInCPU; } -lst::ModulesBuffer* lst::Event::getModules(bool isFull) { +lst::ModulesBuffer* lst::Event::getModules(bool isFull, bool sync) { if (modulesInCPU == nullptr) { // The last input here is just a small placeholder for the allocation. modulesInCPU = new lst::ModulesBuffer(devHost, nModules_, nPixels_); modulesInCPU->copyFromSrc(queue, modulesBuffers_, isFull); + if (sync) + alpaka::wait(queue); // host consumers expect filled data } return modulesInCPU; } diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 7e2a351a8b699..64365bb58bfa8 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -78,7 +78,7 @@ namespace lst { PixelTripletsBuffer* pixelTripletsInCPU; PixelQuintupletsBuffer* pixelQuintupletsInCPU; - void init(bool verbose); + void initSync(bool verbose); int* superbinCPU; int8_t* pixelTypeCPU; @@ -105,9 +105,10 @@ namespace lst { modulesBuffers_(deviceESData->modulesBuffers), pixelMapping_(*deviceESData->pixelMapping), endcapGeometryBuffers_(deviceESData->endcapGeometryBuffers) { - init(verbose); + initSync(verbose); } - void resetEvent(); + void resetEventSync(); // synchronizes + void wait() const { alpaka::wait(queue); } // Calls the appropriate hit function, then increments the counter void addHitToEvent(std::vector const& x, @@ -134,24 +135,21 @@ namespace lst { std::vector const& pixelType, std::vector const& isQuad); - // functions that map the objects to the appropriate modules - void addMiniDoubletsToEventExplicit(); - void addSegmentsToEventExplicit(); - void addTripletsToEventExplicit(); - void addQuintupletsToEventExplicit(); - void resetObjectsInModule(); - void createMiniDoublets(); void createSegmentsWithModuleMap(); void createTriplets(); - void createPixelTracklets(); - void createPixelTrackletsWithMap(); void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets); - void createExtendedTracks(); - void createQuintuplets(); void createPixelTriplets(); - void createPixelQuintuplets(); + void createQuintuplets(); void pixelLineSegmentCleaning(bool no_pls_dupclean); + void createPixelQuintuplets(); + + // functions that map the objects to the appropriate modules + void addMiniDoubletsToEventExplicit(); + void addSegmentsToEventExplicit(); + void addQuintupletsToEventExplicit(); + void addTripletsToEventExplicit(); + void resetObjectsInModule(); unsigned int getNumberOfHits(); unsigned int getNumberOfHitsByLayer(unsigned int layer); @@ -173,33 +171,37 @@ namespace lst { unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer); unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer); - int getNumberOfTrackCandidates(); - int getNumberOfPixelTrackCandidates(); - int getNumberOfPT5TrackCandidates(); - int getNumberOfPT3TrackCandidates(); - int getNumberOfT5TrackCandidates(); - int getNumberOfPLSTrackCandidates(); + int getNumberOfPixelTriplets(); + int getNumberOfPixelQuintuplets(); unsigned int getNumberOfQuintuplets(); unsigned int getNumberOfQuintupletsByLayer(unsigned int layer); unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer); unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer); - int getNumberOfPixelTriplets(); - int getNumberOfPixelQuintuplets(); + int getNumberOfTrackCandidates(); + int getNumberOfPT5TrackCandidates(); + int getNumberOfPT3TrackCandidates(); + int getNumberOfPLSTrackCandidates(); + int getNumberOfPixelTrackCandidates(); + int getNumberOfT5TrackCandidates(); - ObjectRangesBuffer* getRanges(); - HitsBuffer* getHits(); - HitsBuffer* getHitsInCMSSW(); - MiniDoubletsBuffer* getMiniDoublets(); - SegmentsBuffer* getSegments(); - TripletsBuffer* getTriplets(); - QuintupletsBuffer* getQuintuplets(); - TrackCandidatesBuffer* getTrackCandidates(); - TrackCandidatesBuffer* getTrackCandidatesInCMSSW(); - PixelTripletsBuffer* getPixelTriplets(); - PixelQuintupletsBuffer* getPixelQuintuplets(); - ModulesBuffer* getModules(bool isFull = false); + // sync adds alpaka::wait at the end of filling a buffer during lazy fill + // (has no effect on repeated calls) + // set to false may allow faster operation with concurrent calls of get* + // HANDLE WITH CARE + HitsBuffer* getHits(bool sync = true); + HitsBuffer* getHitsInCMSSW(bool sync = true); + ObjectRangesBuffer* getRanges(bool sync = true); + MiniDoubletsBuffer* getMiniDoublets(bool sync = true); + SegmentsBuffer* getSegments(bool sync = true); + TripletsBuffer* getTriplets(bool sync = true); + QuintupletsBuffer* getQuintuplets(bool sync = true); + PixelTripletsBuffer* getPixelTriplets(bool sync = true); + PixelQuintupletsBuffer* getPixelQuintuplets(bool sync = true); + TrackCandidatesBuffer* getTrackCandidates(bool sync = true); + TrackCandidatesBuffer* getTrackCandidatesInCMSSW(bool sync = true); + ModulesBuffer* getModules(bool isFull = false, bool sync = true); }; } // namespace lst diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index 940469e8682a2..f5ee7d7f52add 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -255,10 +255,11 @@ void lst::LST::getOutput(lst::Event& event) { std::vector tc_seedIdx; std::vector tc_trackCandidateType; - lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW()); + lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line lst::TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data(); unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { short trackCandidateType = trackCandidates->trackCandidateType[idx]; std::vector hit_idx = @@ -344,6 +345,7 @@ void lst::LST::run(Queue& queue, in_isQuad_vec_); event.createMiniDoublets(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Mini-doublets produced: %d\n", event.getNumberOfMiniDoublets()); printf("# of Mini-doublets produced barrel layer 1: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(0)); printf("# of Mini-doublets produced barrel layer 2: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(1)); @@ -360,6 +362,7 @@ void lst::LST::run(Queue& queue, event.createSegmentsWithModuleMap(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Segments produced: %d\n", event.getNumberOfSegments()); printf("# of Segments produced layer 1-2: %d\n", event.getNumberOfSegmentsByLayerBarrel(0)); printf("# of Segments produced layer 2-3: %d\n", event.getNumberOfSegmentsByLayerBarrel(1)); @@ -375,6 +378,7 @@ void lst::LST::run(Queue& queue, event.createTriplets(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of T3s produced: %d\n", event.getNumberOfTriplets()); printf("# of T3s produced layer 1-2-3: %d\n", event.getNumberOfTripletsByLayerBarrel(0)); printf("# of T3s produced layer 2-3-4: %d\n", event.getNumberOfTripletsByLayerBarrel(1)); @@ -392,6 +396,7 @@ void lst::LST::run(Queue& queue, event.createQuintuplets(); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Quintuplets produced: %d\n", event.getNumberOfQuintuplets()); printf("# of Quintuplets produced layer 1-2-3-4-5-6: %d\n", event.getNumberOfQuintupletsByLayerBarrel(0)); printf("# of Quintuplets produced layer 2: %d\n", event.getNumberOfQuintupletsByLayerBarrel(1)); @@ -409,15 +414,20 @@ void lst::LST::run(Queue& queue, event.pixelLineSegmentCleaning(no_pls_dupclean); event.createPixelQuintuplets(); - if (verbose) + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Pixel Quintuplets produced: %d\n", event.getNumberOfPixelQuintuplets()); + } event.createPixelTriplets(); - if (verbose) + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of Pixel T3s produced: %d\n", event.getNumberOfPixelTriplets()); + } event.createTrackCandidates(no_pls_dupclean, tc_pls_triplets); if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing printf("# of TrackCandidates produced: %d\n", event.getNumberOfTrackCandidates()); printf(" # of Pixel TrackCandidates produced: %d\n", event.getNumberOfPixelTrackCandidates()); printf(" # of pT5 TrackCandidates produced: %d\n", event.getNumberOfPT5TrackCandidates()); @@ -428,5 +438,5 @@ void lst::LST::run(Queue& queue, getOutput(event); - event.resetEvent(); + event.resetEventSync(); } diff --git a/RecoTracker/LSTCore/standalone/bin/lst.cc b/RecoTracker/LSTCore/standalone/bin/lst.cc index e67fe5b62d269..89bb43a3bcd4b 100644 --- a/RecoTracker/LSTCore/standalone/bin/lst.cc +++ b/RecoTracker/LSTCore/standalone/bin/lst.cc @@ -478,7 +478,7 @@ void run_lst() { // Clear this event TStopwatch my_timer; my_timer.Start(); - events.at(omp_get_thread_num())->resetEvent(); + events.at(omp_get_thread_num())->resetEventSync(); float timing_resetEvent = my_timer.RealTime(); timing_information.push_back({timing_input_loading, diff --git a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc index d6657c5e512f6..9277b60253a64 100644 --- a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc +++ b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc @@ -28,6 +28,7 @@ float runMiniDoublet(lst::Event *event, int evt) { std::cout << "Reco Mini-Doublet start " << evt << std::endl; my_timer.Start(); event->createMiniDoublets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float md_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) @@ -80,6 +81,7 @@ float runSegment(lst::Event *event) { std::cout << "Reco Segment start" << std::endl; my_timer.Start(); event->createSegmentsWithModuleMap(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float sg_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Segment processing time: " << sg_elapsed << " secs" << std::endl; @@ -117,6 +119,7 @@ float runT3(lst::Event *event) { std::cout << "Reco T3 start" << std::endl; my_timer.Start(); event->createTriplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float t3_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco T3 processing time: " << t3_elapsed << " secs" << std::endl; @@ -158,6 +161,7 @@ float runpT3(lst::Event *event) { std::cout << "Reco Pixel Triplet pT3 start" << std::endl; my_timer.Start(); event->createPixelTriplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float pt3_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco pT3 processing time: " << pt3_elapsed << " secs" << std::endl; @@ -174,6 +178,7 @@ float runQuintuplet(lst::Event *event) { std::cout << "Reco Quintuplet start" << std::endl; my_timer.Start(); event->createQuintuplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float t5_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Quintuplet processing time: " << t5_elapsed << " secs" << std::endl; @@ -219,6 +224,7 @@ float runPixelLineSegment(lst::Event *event, bool no_pls_dupclean) { std::cout << "Reco Pixel Line Segment start" << std::endl; my_timer.Start(); event->pixelLineSegmentCleaning(no_pls_dupclean); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float pls_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Pixel Line Segment processing time: " << pls_elapsed << " secs" << std::endl; @@ -233,6 +239,7 @@ float runPixelQuintuplet(lst::Event *event) { std::cout << "Reco Pixel Quintuplet start" << std::endl; my_timer.Start(); event->createPixelQuintuplets(); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float pt5_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco Pixel Quintuplet processing time: " << pt5_elapsed << " secs" << std::endl; @@ -249,6 +256,7 @@ float runTrackCandidate(lst::Event *event, bool no_pls_dupclean, bool tc_ std::cout << "Reco TrackCandidate start" << std::endl; my_timer.Start(); event->createTrackCandidates(no_pls_dupclean, tc_pls_triplets); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float tc_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Reco TrackCandidate processing time: " << tc_elapsed << " secs" << std::endl; @@ -892,6 +900,7 @@ float addInputsToEventPreLoad(lst::Event *event, superbin_vec, pixelType_vec, isQuad_vec); + event->wait(); // device side event calls are asynchronous: wait to measure time or print float hit_loading_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) @@ -1331,6 +1340,7 @@ void writeMetaData() { pixelType_vec, isQuad_vec); + event.wait(); // device side event calls are asynchronous: wait to measure time or print float hit_loading_elapsed = my_timer.RealTime(); if (ana.verbose >= 2) std::cout << "Loading inputs processing time: " << hit_loading_elapsed << " secs" << std::endl; From 8d2366bea51a0c8778c1256c0d6a9f27ac2edd94 Mon Sep 17 00:00:00 2001 From: Manos Vourliotis Date: Fri, 16 Aug 2024 09:15:04 -0700 Subject: [PATCH 15/20] Remove pass by const reference when std::move-ing --- RecoTracker/LST/interface/LSTOutput.h | 17 +++--- .../LST/interface/LSTPhase2OTHitsInput.h | 17 ++---- RecoTracker/LST/interface/LSTPixelSeedInput.h | 61 +++++++++---------- 3 files changed, 44 insertions(+), 51 deletions(-) diff --git a/RecoTracker/LST/interface/LSTOutput.h b/RecoTracker/LST/interface/LSTOutput.h index a337f107e35ba..1a2b91fd0f375 100644 --- a/RecoTracker/LST/interface/LSTOutput.h +++ b/RecoTracker/LST/interface/LSTOutput.h @@ -7,15 +7,14 @@ class LSTOutput { public: LSTOutput() = default; - LSTOutput(std::vector> const& hitIdx, - std::vector const& len, - std::vector const& seedIdx, - std::vector const& trackCandidateType) { - hitIdx_ = std::move(hitIdx); - len_ = std::move(len); - seedIdx_ = std::move(seedIdx); - trackCandidateType_ = std::move(trackCandidateType); - } + LSTOutput(std::vector> const hitIdx, + std::vector const len, + std::vector const seedIdx, + std::vector const trackCandidateType) + : hitIdx_(std::move(hitIdx)), + len_(std::move(len)), + seedIdx_(std::move(seedIdx)), + trackCandidateType_(std::move(trackCandidateType)) {} ~LSTOutput() = default; diff --git a/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h b/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h index 40b265db3edb7..0bb8e9f2ba05b 100644 --- a/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h +++ b/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h @@ -9,17 +9,12 @@ class LSTPhase2OTHitsInput { public: LSTPhase2OTHitsInput() = default; - LSTPhase2OTHitsInput(std::vector const& detId, - std::vector const& x, - std::vector const& y, - std::vector const& z, - std::vector const& hits) { - detId_ = std::move(detId); - x_ = std::move(x); - y_ = std::move(y); - z_ = std::move(z); - hits_ = std::move(hits); - } + LSTPhase2OTHitsInput(std::vector const detId, + std::vector const x, + std::vector const y, + std::vector const z, + std::vector const hits) + : detId_(std::move(detId)), x_(std::move(x)), y_(std::move(y)), z_(std::move(z)), hits_(std::move(hits)) {} ~LSTPhase2OTHitsInput() = default; diff --git a/RecoTracker/LST/interface/LSTPixelSeedInput.h b/RecoTracker/LST/interface/LSTPixelSeedInput.h index 2fb6a244a5648..efbc41d26a913 100644 --- a/RecoTracker/LST/interface/LSTPixelSeedInput.h +++ b/RecoTracker/LST/interface/LSTPixelSeedInput.h @@ -7,37 +7,36 @@ class LSTPixelSeedInput { public: LSTPixelSeedInput() = default; - LSTPixelSeedInput(std::vector const& px, - std::vector const& py, - std::vector const& pz, - std::vector const& dxy, - std::vector const& dz, - std::vector const& ptErr, - std::vector const& etaErr, - std::vector const& stateTrajGlbX, - std::vector const& stateTrajGlbY, - std::vector const& stateTrajGlbZ, - std::vector const& stateTrajGlbPx, - std::vector const& stateTrajGlbPy, - std::vector const& stateTrajGlbPz, - std::vector const& q, - std::vector> const& hitIdx) { - px_ = std::move(px); - py_ = std::move(py); - pz_ = std::move(pz); - dxy_ = std::move(dxy); - dz_ = std::move(dz); - ptErr_ = std::move(ptErr); - etaErr_ = std::move(etaErr); - stateTrajGlbX_ = std::move(stateTrajGlbX); - stateTrajGlbY_ = std::move(stateTrajGlbY); - stateTrajGlbZ_ = std::move(stateTrajGlbZ); - stateTrajGlbPx_ = std::move(stateTrajGlbPx); - stateTrajGlbPy_ = std::move(stateTrajGlbPy); - stateTrajGlbPz_ = std::move(stateTrajGlbPz); - q_ = std::move(q); - hitIdx_ = std::move(hitIdx); - } + LSTPixelSeedInput(std::vector const px, + std::vector const py, + std::vector const pz, + std::vector const dxy, + std::vector const dz, + std::vector const ptErr, + std::vector const etaErr, + std::vector const stateTrajGlbX, + std::vector const stateTrajGlbY, + std::vector const stateTrajGlbZ, + std::vector const stateTrajGlbPx, + std::vector const stateTrajGlbPy, + std::vector const stateTrajGlbPz, + std::vector const q, + std::vector> const hitIdx) + : px_(std::move(px)), + py_(std::move(py)), + pz_(std::move(pz)), + dxy_(std::move(dxy)), + dz_(std::move(dz)), + ptErr_(std::move(ptErr)), + etaErr_(std::move(etaErr)), + stateTrajGlbX_(std::move(stateTrajGlbX)), + stateTrajGlbY_(std::move(stateTrajGlbY)), + stateTrajGlbZ_(std::move(stateTrajGlbZ)), + stateTrajGlbPx_(std::move(stateTrajGlbPx)), + stateTrajGlbPy_(std::move(stateTrajGlbPy)), + stateTrajGlbPz_(std::move(stateTrajGlbPz)), + q_(std::move(q)), + hitIdx_(std::move(hitIdx)) {} ~LSTPixelSeedInput() = default; From d5e443a15b4f267fbf18084abbc0fc044a2e79b1 Mon Sep 17 00:00:00 2001 From: Manos Vourliotis Date: Fri, 16 Aug 2024 09:33:38 -0700 Subject: [PATCH 16/20] Remove user-defined destructors --- RecoTracker/LST/interface/LSTOutput.h | 2 -- RecoTracker/LST/interface/LSTPhase2OTHitsInput.h | 2 -- RecoTracker/LST/interface/LSTPixelSeedInput.h | 2 -- RecoTracker/LSTCore/interface/EndcapGeometry.h | 1 - RecoTracker/LSTCore/interface/ModuleConnectionMap.h | 1 - RecoTracker/LSTCore/interface/TiltedGeometry.h | 1 - RecoTracker/LSTCore/src/ModuleConnectionMap.cc | 2 -- 7 files changed, 11 deletions(-) diff --git a/RecoTracker/LST/interface/LSTOutput.h b/RecoTracker/LST/interface/LSTOutput.h index 1a2b91fd0f375..5be4f645d3416 100644 --- a/RecoTracker/LST/interface/LSTOutput.h +++ b/RecoTracker/LST/interface/LSTOutput.h @@ -16,8 +16,6 @@ class LSTOutput { seedIdx_(std::move(seedIdx)), trackCandidateType_(std::move(trackCandidateType)) {} - ~LSTOutput() = default; - enum LSTTCType { T5 = 4, pT3 = 5, pT5 = 7, pLS = 8 }; // Hit indices of each of the LST track candidates. diff --git a/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h b/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h index 0bb8e9f2ba05b..00fd77846c4c3 100644 --- a/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h +++ b/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h @@ -16,8 +16,6 @@ class LSTPhase2OTHitsInput { std::vector const hits) : detId_(std::move(detId)), x_(std::move(x)), y_(std::move(y)), z_(std::move(z)), hits_(std::move(hits)) {} - ~LSTPhase2OTHitsInput() = default; - std::vector const& detId() const { return detId_; } std::vector const& x() const { return x_; } std::vector const& y() const { return y_; } diff --git a/RecoTracker/LST/interface/LSTPixelSeedInput.h b/RecoTracker/LST/interface/LSTPixelSeedInput.h index efbc41d26a913..18d3768b2e0fc 100644 --- a/RecoTracker/LST/interface/LSTPixelSeedInput.h +++ b/RecoTracker/LST/interface/LSTPixelSeedInput.h @@ -38,8 +38,6 @@ class LSTPixelSeedInput { q_(std::move(q)), hitIdx_(std::move(hitIdx)) {} - ~LSTPixelSeedInput() = default; - std::vector const& px() const { return px_; } std::vector const& py() const { return py_; } std::vector const& pz() const { return pz_; } diff --git a/RecoTracker/LSTCore/interface/EndcapGeometry.h b/RecoTracker/LSTCore/interface/EndcapGeometry.h index 555955d83941c..1a84d89abf90d 100644 --- a/RecoTracker/LSTCore/interface/EndcapGeometry.h +++ b/RecoTracker/LSTCore/interface/EndcapGeometry.h @@ -23,7 +23,6 @@ namespace lst { EndcapGeometry() = default; EndcapGeometry(std::string const& filename); - ~EndcapGeometry() = default; void load(std::string const&); void fillGeoMapArraysExplicit(); diff --git a/RecoTracker/LSTCore/interface/ModuleConnectionMap.h b/RecoTracker/LSTCore/interface/ModuleConnectionMap.h index b3a931345b3a5..1d4445d3b423e 100644 --- a/RecoTracker/LSTCore/interface/ModuleConnectionMap.h +++ b/RecoTracker/LSTCore/interface/ModuleConnectionMap.h @@ -16,7 +16,6 @@ namespace lst { public: ModuleConnectionMap(); ModuleConnectionMap(std::string const& filename); - ~ModuleConnectionMap(); void load(std::string const&); void add(std::string const&); diff --git a/RecoTracker/LSTCore/interface/TiltedGeometry.h b/RecoTracker/LSTCore/interface/TiltedGeometry.h index b70a1d95a357b..420000dd38aa0 100644 --- a/RecoTracker/LSTCore/interface/TiltedGeometry.h +++ b/RecoTracker/LSTCore/interface/TiltedGeometry.h @@ -18,7 +18,6 @@ namespace lst { public: TiltedGeometry() = default; TiltedGeometry(std::string const& filename); - ~TiltedGeometry() = default; void load(std::string const&); diff --git a/RecoTracker/LSTCore/src/ModuleConnectionMap.cc b/RecoTracker/LSTCore/src/ModuleConnectionMap.cc index fe0826bbd80e6..d1b68b7f485bb 100644 --- a/RecoTracker/LSTCore/src/ModuleConnectionMap.cc +++ b/RecoTracker/LSTCore/src/ModuleConnectionMap.cc @@ -4,8 +4,6 @@ lst::ModuleConnectionMap::ModuleConnectionMap() {} lst::ModuleConnectionMap::ModuleConnectionMap(std::string const& filename) { load(filename); } -lst::ModuleConnectionMap::~ModuleConnectionMap() {} - void lst::ModuleConnectionMap::load(std::string const& filename) { moduleConnections_.clear(); From 787dfe1faca472b4dd067388964ca229c24452f0 Mon Sep 17 00:00:00 2001 From: Manos Vourliotis Date: Fri, 16 Aug 2024 10:38:02 -0700 Subject: [PATCH 17/20] Renaming of kernels --- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 62 +++++++++---------- RecoTracker/LSTCore/src/alpaka/Hit.h | 4 +- RecoTracker/LSTCore/src/alpaka/Kernels.h | 10 +-- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 28 ++++----- .../LSTCore/src/alpaka/PixelQuintuplet.h | 2 +- RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 2 +- RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 14 ++--- RecoTracker/LSTCore/src/alpaka/Segment.h | 8 +-- .../LSTCore/src/alpaka/TrackCandidate.h | 14 ++--- RecoTracker/LSTCore/src/alpaka/Triplet.h | 6 +- 10 files changed, 75 insertions(+), 75 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index f9757b0659691..74ad80a633e14 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -191,7 +191,7 @@ void lst::Event::addHitToEvent(std::vector const& x, Vec3D const blocksPerGrid1{1, 1, max_blocks}; WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); - hitLoopKernel hit_loop_kernel; + HitLoopKernel hit_loop_kernel; alpaka::exec(queue, hit_loop_workdiv, hit_loop_kernel, @@ -209,7 +209,7 @@ void lst::Event::addHitToEvent(std::vector const& x, Vec3D const blocksPerGrid2{1, 1, max_blocks}; WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); - moduleRangesKernel module_ranges_kernel; + ModuleRangesKernel module_ranges_kernel; alpaka::exec( queue, module_ranges_workdiv, module_ranges_kernel, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); } @@ -259,7 +259,7 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); - lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; + lst::CreateMDArrayRangesGPU createMDArrayRangesGPU_kernel; alpaka::exec( queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); @@ -282,7 +282,7 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); - lst::createSegmentArrayRanges createSegmentArrayRanges_kernel; + lst::CreateSegmentArrayRanges createSegmentArrayRanges_kernel; alpaka::exec(queue, createSegmentArrayRanges_workDiv, createSegmentArrayRanges_kernel, @@ -355,7 +355,7 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& Vec3D const blocksPerGrid{1, 1, max_blocks}; WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); - addPixelSegmentToEventKernel addPixelSegmentToEvent_kernel; + AddPixelSegmentToEventKernel addPixelSegmentToEvent_kernel; alpaka::exec(queue, addPixelSegmentToEvent_workdiv, addPixelSegmentToEvent_kernel, @@ -386,7 +386,7 @@ void lst::Event::createMiniDoublets() { WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); - lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; + lst::CreateMDArrayRangesGPU createMDArrayRangesGPU_kernel; alpaka::exec( queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); @@ -408,7 +408,7 @@ void lst::Event::createMiniDoublets() { WorkDiv3D const createMiniDoubletsInGPUv2_workDiv = createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, elementsPerThread); - lst::createMiniDoubletsInGPUv2 createMiniDoubletsInGPUv2_kernel; + lst::CreateMiniDoubletsInGPUv2 createMiniDoubletsInGPUv2_kernel; alpaka::exec(queue, createMiniDoubletsInGPUv2_workDiv, createMiniDoubletsInGPUv2_kernel, @@ -419,7 +419,7 @@ void lst::Event::createMiniDoublets() { WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); - lst::addMiniDoubletRangesToEventExplicit addMiniDoubletRangesToEventExplicit_kernel; + lst::AddMiniDoubletRangesToEventExplicit addMiniDoubletRangesToEventExplicit_kernel; alpaka::exec(queue, addMiniDoubletRangesToEventExplicit_workDiv, addMiniDoubletRangesToEventExplicit_kernel, @@ -446,7 +446,7 @@ void lst::Event::createSegmentsWithModuleMap() { WorkDiv3D const createSegmentsInGPUv2_workDiv = createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); - lst::createSegmentsInGPUv2 createSegmentsInGPUv2_kernel; + lst::CreateSegmentsInGPUv2 createSegmentsInGPUv2_kernel; alpaka::exec(queue, createSegmentsInGPUv2_workDiv, createSegmentsInGPUv2_kernel, @@ -457,7 +457,7 @@ void lst::Event::createSegmentsWithModuleMap() { WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); - lst::addSegmentRangesToEventExplicit addSegmentRangesToEventExplicit_kernel; + lst::AddSegmentRangesToEventExplicit addSegmentRangesToEventExplicit_kernel; alpaka::exec(queue, addSegmentRangesToEventExplicit_workDiv, addSegmentRangesToEventExplicit_kernel, @@ -474,7 +474,7 @@ void lst::Event::createTriplets() { if (tripletsInGPU == nullptr) { WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); - lst::createTripletArrayRanges createTripletArrayRanges_kernel; + lst::CreateTripletArrayRanges createTripletArrayRanges_kernel; alpaka::exec(queue, createTripletArrayRanges_workDiv, createTripletArrayRanges_kernel, @@ -535,7 +535,7 @@ void lst::Event::createTriplets() { WorkDiv3D const createTripletsInGPUv2_workDiv = createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); - lst::createTripletsInGPUv2 createTripletsInGPUv2_kernel; + lst::CreateTripletsInGPUv2 createTripletsInGPUv2_kernel; alpaka::exec(queue, createTripletsInGPUv2_workDiv, createTripletsInGPUv2_kernel, @@ -549,7 +549,7 @@ void lst::Event::createTriplets() { WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); - lst::addTripletRangesToEventExplicit addTripletRangesToEventExplicit_kernel; + lst::AddTripletRangesToEventExplicit addTripletRangesToEventExplicit_kernel; alpaka::exec(queue, addTripletRangesToEventExplicit_workDiv, addTripletRangesToEventExplicit_kernel, @@ -575,7 +575,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ WorkDiv3D const crossCleanpT3_workDiv = createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread); - lst::crossCleanpT3 crossCleanpT3_kernel; + lst::CrossCleanpT3 crossCleanpT3_kernel; alpaka::exec(queue, crossCleanpT3_workDiv, crossCleanpT3_kernel, @@ -587,7 +587,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); - lst::addpT3asTrackCandidatesInGPU addpT3asTrackCandidatesInGPU_kernel; + lst::AddpT3asTrackCandidatesInGPU addpT3asTrackCandidatesInGPU_kernel; alpaka::exec(queue, addpT3asTrackCandidatesInGPU_workDiv, addpT3asTrackCandidatesInGPU_kernel, @@ -608,7 +608,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); - lst::removeDupQuintupletsInGPUBeforeTC removeDupQuintupletsInGPUBeforeTC_kernel; + lst::RemoveDupQuintupletsInGPUBeforeTC removeDupQuintupletsInGPUBeforeTC_kernel; alpaka::exec(queue, removeDupQuintupletsInGPUBeforeTC_workDiv, removeDupQuintupletsInGPUBeforeTC_kernel, @@ -620,7 +620,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ WorkDiv3D const crossCleanT5_workDiv = createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread); - lst::crossCleanT5 crossCleanT5_kernel; + lst::CrossCleanT5 crossCleanT5_kernel; alpaka::exec(queue, crossCleanT5_workDiv, crossCleanT5_kernel, @@ -635,7 +635,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = createWorkDiv( blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, elementsPerThread); - lst::addT5asTrackCandidateInGPU addT5asTrackCandidateInGPU_kernel; + lst::AddT5asTrackCandidateInGPU addT5asTrackCandidateInGPU_kernel; alpaka::exec(queue, addT5asTrackCandidateInGPU_workDiv, addT5asTrackCandidateInGPU_kernel, @@ -650,7 +650,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ WorkDiv3D const checkHitspLS_workDiv = createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - lst::checkHitspLS checkHitspLS_kernel; + lst::CheckHitspLS checkHitspLS_kernel; alpaka::exec( queue, checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, true); } @@ -660,7 +660,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ WorkDiv3D const crossCleanpLS_workDiv = createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread); - lst::crossCleanpLS crossCleanpLS_kernel; + lst::CrossCleanpLS crossCleanpLS_kernel; alpaka::exec(queue, crossCleanpLS_workDiv, crossCleanpLS_kernel, @@ -678,7 +678,7 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = createWorkDiv( blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, elementsPerThread); - lst::addpLSasTrackCandidateInGPU addpLSasTrackCandidateInGPU_kernel; + lst::AddpLSasTrackCandidateInGPU addpLSasTrackCandidateInGPU_kernel; alpaka::exec(queue, addpLSasTrackCandidateInGPU_workDiv, addpLSasTrackCandidateInGPU_kernel, @@ -789,7 +789,7 @@ void lst::Event::createPixelTriplets() { WorkDiv3D const createPixelTripletsInGPUFromMapv2_workDiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); - lst::createPixelTripletsInGPUFromMapv2 createPixelTripletsInGPUFromMapv2_kernel; + lst::CreatePixelTripletsInGPUFromMapv2 createPixelTripletsInGPUFromMapv2_kernel; alpaka::exec(queue, createPixelTripletsInGPUFromMapv2_workDiv, createPixelTripletsInGPUFromMapv2_kernel, @@ -819,7 +819,7 @@ void lst::Event::createPixelTriplets() { WorkDiv3D const removeDupPixelTripletsInGPUFromMap_workDiv = createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); - lst::removeDupPixelTripletsInGPUFromMap removeDupPixelTripletsInGPUFromMap_kernel; + lst::RemoveDupPixelTripletsInGPUFromMap removeDupPixelTripletsInGPUFromMap_kernel; alpaka::exec(queue, removeDupPixelTripletsInGPUFromMap_workDiv, removeDupPixelTripletsInGPUFromMap_kernel, @@ -829,7 +829,7 @@ void lst::Event::createPixelTriplets() { void lst::Event::createQuintuplets() { WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = createWorkDiv({1}, {1024}, {1}); - lst::createEligibleModulesListForQuintupletsGPU createEligibleModulesListForQuintupletsGPU_kernel; + lst::CreateEligibleModulesListForQuintupletsGPU createEligibleModulesListForQuintupletsGPU_kernel; alpaka::exec(queue, createEligibleModulesListForQuintupletsGPU_workDiv, createEligibleModulesListForQuintupletsGPU_kernel, @@ -860,7 +860,7 @@ void lst::Event::createQuintuplets() { WorkDiv3D const createQuintupletsInGPUv2_workDiv = createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); - lst::createQuintupletsInGPUv2 createQuintupletsInGPUv2_kernel; + lst::CreateQuintupletsInGPUv2 createQuintupletsInGPUv2_kernel; alpaka::exec(queue, createQuintupletsInGPUv2_workDiv, createQuintupletsInGPUv2_kernel, @@ -877,7 +877,7 @@ void lst::Event::createQuintuplets() { WorkDiv3D const removeDupQuintupletsInGPUAfterBuild_workDiv = createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); - lst::removeDupQuintupletsInGPUAfterBuild removeDupQuintupletsInGPUAfterBuild_kernel; + lst::RemoveDupQuintupletsInGPUAfterBuild removeDupQuintupletsInGPUAfterBuild_kernel; alpaka::exec(queue, removeDupQuintupletsInGPUAfterBuild_workDiv, removeDupQuintupletsInGPUAfterBuild_kernel, @@ -887,7 +887,7 @@ void lst::Event::createQuintuplets() { WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); - lst::addQuintupletRangesToEventExplicit addQuintupletRangesToEventExplicit_kernel; + lst::AddQuintupletRangesToEventExplicit addQuintupletRangesToEventExplicit_kernel; alpaka::exec(queue, addQuintupletRangesToEventExplicit_workDiv, addQuintupletRangesToEventExplicit_kernel, @@ -907,7 +907,7 @@ void lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { WorkDiv3D const checkHitspLS_workDiv = createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - lst::checkHitspLS checkHitspLS_kernel; + lst::CheckHitspLS checkHitspLS_kernel; alpaka::exec( queue, checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, false); } @@ -990,7 +990,7 @@ void lst::Event::createPixelQuintuplets() { WorkDiv3D const createPixelQuintupletsInGPUFromMapv2_workDiv = createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); - lst::createPixelQuintupletsInGPUFromMapv2 createPixelQuintupletsInGPUFromMapv2_kernel; + lst::CreatePixelQuintupletsInGPUFromMapv2 createPixelQuintupletsInGPUFromMapv2_kernel; alpaka::exec(queue, createPixelQuintupletsInGPUFromMapv2_workDiv, createPixelQuintupletsInGPUFromMapv2_kernel, @@ -1010,7 +1010,7 @@ void lst::Event::createPixelQuintuplets() { WorkDiv3D const removeDupPixelQuintupletsInGPUFromMap_workDiv = createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); - lst::removeDupPixelQuintupletsInGPUFromMap removeDupPixelQuintupletsInGPUFromMap_kernel; + lst::RemoveDupPixelQuintupletsInGPUFromMap removeDupPixelQuintupletsInGPUFromMap_kernel; alpaka::exec(queue, removeDupPixelQuintupletsInGPUFromMap_workDiv, removeDupPixelQuintupletsInGPUFromMap_kernel, @@ -1018,7 +1018,7 @@ void lst::Event::createPixelQuintuplets() { WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = createWorkDiv({1}, {256}, {1}); - lst::addpT5asTrackCandidateInGPU addpT5asTrackCandidateInGPU_kernel; + lst::AddpT5asTrackCandidateInGPU addpT5asTrackCandidateInGPU_kernel; alpaka::exec(queue, addpT5asTrackCandidateInGPU_workDiv, addpT5asTrackCandidateInGPU_kernel, diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h index 3d43c94c62d40..cb95aa14538f3 100644 --- a/RecoTracker/LSTCore/src/alpaka/Hit.h +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -176,7 +176,7 @@ namespace lst { return -1; } - struct moduleRangesKernel { + struct ModuleRangesKernel { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -199,7 +199,7 @@ namespace lst { } }; - struct hitLoopKernel { + struct HitLoopKernel { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t Endcap, // Integer corresponding to endcap in module subdets diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h index 496a3f2ce0fb2..31f057017a766 100644 --- a/RecoTracker/LSTCore/src/alpaka/Kernels.h +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -142,7 +142,7 @@ namespace lst { matched[1] = nMatched; } - struct removeDupQuintupletsInGPUAfterBuild { + struct RemoveDupQuintupletsInGPUAfterBuild { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -192,7 +192,7 @@ namespace lst { } }; - struct removeDupQuintupletsInGPUBeforeTC { + struct RemoveDupQuintupletsInGPUBeforeTC { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Quintuplets quintupletsInGPU, @@ -267,7 +267,7 @@ namespace lst { } }; - struct removeDupPixelTripletsInGPUFromMap { + struct RemoveDupPixelTripletsInGPUFromMap { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::PixelTriplets pixelTripletsInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); @@ -304,7 +304,7 @@ namespace lst { } }; - struct removeDupPixelQuintupletsInGPUFromMap { + struct RemoveDupPixelQuintupletsInGPUFromMap { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::PixelQuintuplets pixelQuintupletsInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); @@ -331,7 +331,7 @@ namespace lst { } }; - struct checkHitspLS { + struct CheckHitspLS { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index 60e1a7428edf5..a31747499e9aa 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -281,7 +281,7 @@ namespace lst { return false; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(struct lst::Modules const& modulesInGPU, uint16_t moduleIndex) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(lst::Modules const& modulesInGPU, uint16_t moduleIndex) { float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; @@ -867,13 +867,13 @@ namespace lst { return alpaka::math::abs(acc, dPhiChange) < miniCut; } - struct createMiniDoubletsInGPUv2 { + struct CreateMiniDoubletsInGPUv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - struct lst::Modules modulesInGPU, - struct lst::Hits hitsInGPU, - struct lst::MiniDoublets mdsInGPU, - struct lst::ObjectRanges rangesInGPU) const { + lst::Modules modulesInGPU, + lst::Hits hitsInGPU, + lst::MiniDoublets mdsInGPU, + lst::ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -964,11 +964,11 @@ namespace lst { } }; - struct createMDArrayRangesGPU { + struct CreateMDArrayRangesGPU { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - struct lst::Modules modulesInGPU, - struct lst::ObjectRanges rangesInGPU) const { + lst::Modules modulesInGPU, + lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -1060,13 +1060,13 @@ namespace lst { } }; - struct addMiniDoubletRangesToEventExplicit { + struct AddMiniDoubletRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - struct lst::Modules modulesInGPU, - struct lst::MiniDoublets mdsInGPU, - struct lst::ObjectRanges rangesInGPU, - struct lst::Hits hitsInGPU) const { + lst::Modules modulesInGPU, + lst::MiniDoublets mdsInGPU, + lst::ObjectRanges rangesInGPU, + lst::Hits hitsInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index 06ba169c88bea..12161acc08de0 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -832,7 +832,7 @@ namespace lst { return RMSE; } - struct createPixelQuintupletsInGPUFromMapv2 { + struct CreatePixelQuintupletsInGPUFromMapv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 5cc5cf1d31a7a..0c78efcafc87f 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -925,7 +925,7 @@ namespace lst { return true; }; - struct createPixelTripletsInGPUFromMapv2 { + struct CreatePixelTripletsInGPUFromMapv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 4d957a0a1402e..e930d753dada6 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -2185,10 +2185,10 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgo(TAcc const& acc, - struct lst::Modules& modulesInGPU, - struct lst::MiniDoublets& mdsInGPU, - struct lst::Segments& segmentsInGPU, - struct lst::Triplets& tripletsInGPU, + lst::Modules& modulesInGPU, + lst::MiniDoublets& mdsInGPU, + lst::Segments& segmentsInGPU, + lst::Triplets& tripletsInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -2534,7 +2534,7 @@ namespace lst { return true; } - struct createQuintupletsInGPUv2 { + struct CreateQuintupletsInGPUv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -2663,7 +2663,7 @@ namespace lst { } }; - struct createEligibleModulesListForQuintupletsGPU { + struct CreateEligibleModulesListForQuintupletsGPU { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -2767,7 +2767,7 @@ namespace lst { } }; - struct addQuintupletRangesToEventExplicit { + struct AddQuintupletRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 6b44ddfbe24b7..9f9f0a6e025a5 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -701,7 +701,7 @@ namespace lst { } } - struct createSegmentsInGPUv2 { + struct CreateSegmentsInGPUv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -795,7 +795,7 @@ namespace lst { } }; - struct createSegmentArrayRanges { + struct CreateSegmentArrayRanges { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -899,7 +899,7 @@ namespace lst { } }; - struct addSegmentRangesToEventExplicit { + struct AddSegmentRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -924,7 +924,7 @@ namespace lst { } }; - struct addPixelSegmentToEventKernel { + struct AddPixelSegmentToEventKernel { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 0439050e100d2..da24db9a2a0d2 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -204,7 +204,7 @@ namespace lst { return npMatched; } - struct crossCleanpT3 { + struct CrossCleanpT3 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -245,7 +245,7 @@ namespace lst { } }; - struct crossCleanT5 { + struct CrossCleanT5 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -300,7 +300,7 @@ namespace lst { } }; - struct crossCleanpLS { + struct CrossCleanpLS { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -381,7 +381,7 @@ namespace lst { } }; - struct addpT3asTrackCandidatesInGPU { + struct AddpT3asTrackCandidatesInGPU { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, @@ -437,7 +437,7 @@ namespace lst { } }; - struct addT5asTrackCandidateInGPU { + struct AddT5asTrackCandidateInGPU { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, @@ -491,7 +491,7 @@ namespace lst { } }; - struct addpLSasTrackCandidateInGPU { + struct AddpLSasTrackCandidateInGPU { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, @@ -530,7 +530,7 @@ namespace lst { } }; - struct addpT5asTrackCandidateInGPU { + struct AddpT5asTrackCandidateInGPU { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index c5ac432ebf310..15d48e62c41f4 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -808,7 +808,7 @@ namespace lst { return true; } - struct createTripletsInGPUv2 { + struct CreateTripletsInGPUv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -925,7 +925,7 @@ namespace lst { } }; - struct createTripletArrayRanges { + struct CreateTripletArrayRanges { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, @@ -1028,7 +1028,7 @@ namespace lst { } }; - struct addTripletRangesToEventExplicit { + struct AddTripletRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, From 5cce69a3d8140f161e16ac3b2cbbb8706fc26ab7 Mon Sep 17 00:00:00 2001 From: Manos Vourliotis Date: Fri, 16 Aug 2024 11:04:46 -0700 Subject: [PATCH 18/20] Code format and checks --- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index a31747499e9aa..01166fe520348 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -966,9 +966,7 @@ namespace lst { struct CreateMDArrayRangesGPU { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); From 9f6d61262e352d4c05b71824c68ba5849ca22dec Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Fri, 16 Aug 2024 12:55:26 -0700 Subject: [PATCH 19/20] Moved LST and Event classes to ALPAKA_ACCELERATOR_NAMESPACE --- .../plugins/alpaka/LSTModulesDevESProducer.cc | 4 +- RecoTracker/LST/plugins/alpaka/LSTProducer.cc | 4 +- RecoTracker/LSTCore/interface/LST.h | 184 +++--- .../LSTCore/interface/alpaka/Constants.h | 4 +- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 581 +++++++++--------- RecoTracker/LSTCore/src/alpaka/Event.h | 372 ++++++----- RecoTracker/LSTCore/src/alpaka/LST.dev.cc | 117 ++-- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 4 +- RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 4 +- RecoTracker/LSTCore/src/alpaka/Segment.h | 4 +- .../LSTCore/src/alpaka/TrackCandidate.h | 4 +- RecoTracker/LSTCore/src/alpaka/Triplet.h | 4 +- RecoTracker/LSTCore/standalone/bin/lst.cc | 18 +- .../standalone/code/core/AccessHelper.cc | 114 ++-- .../standalone/code/core/AccessHelper.h | 2 +- .../LSTCore/standalone/code/core/trkCore.cc | 24 +- .../LSTCore/standalone/code/core/trkCore.h | 2 +- .../standalone/code/core/write_lst_ntuple.cc | 54 +- .../standalone/code/core/write_lst_ntuple.h | 2 +- 19 files changed, 734 insertions(+), 768 deletions(-) diff --git a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc index 908eb796e581b..c1d815210bd53 100644 --- a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc +++ b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc @@ -22,8 +22,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { descriptions.addWithDefaultLabel(desc); } - std::unique_ptr> produce(TrackerRecoGeometryRecord const& iRecord) { - return lst::loadAndFillESHost(); + std::unique_ptr<::lst::LSTESData> produce(TrackerRecoGeometryRecord const& iRecord) { + return ::lst::loadAndFillESHost(); } }; diff --git a/RecoTracker/LST/plugins/alpaka/LSTProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc index 18bd7c25a9aec..6365eb9822483 100644 --- a/RecoTracker/LST/plugins/alpaka/LSTProducer.cc +++ b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc @@ -87,11 +87,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: edm::EDGetTokenT lstPixelSeedInputToken_; edm::EDGetTokenT lstPhase2OTHitsInputToken_; - device::ESGetToken, TrackerRecoGeometryRecord> lstESToken_; + device::ESGetToken<::lst::LSTESData, TrackerRecoGeometryRecord> lstESToken_; const bool verbose_, nopLSDupClean_, tcpLSTriplets_; edm::EDPutTokenT lstOutputToken_; - lst::LST lst_; + lst::LST lst_; }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/LSTCore/interface/LST.h b/RecoTracker/LSTCore/interface/LST.h index ac23bd09a7ecf..a83399cbd8356 100644 --- a/RecoTracker/LSTCore/interface/LST.h +++ b/RecoTracker/LSTCore/interface/LST.h @@ -8,105 +8,101 @@ #include #include -namespace lst { - template - class Event; +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace lst { + class Event; - template - class LST; + class LST { + public: + LST() = default; - template - class LST { - public: - LST() = default; + void run(Queue& queue, + bool verbose, + ::lst::LSTESData const* deviceESData, + std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z, + bool no_pls_dupclean, + bool tc_pls_triplets); + std::vector> const& hits() const { return out_tc_hitIdxs_; } + std::vector const& len() const { return out_tc_len_; } + std::vector const& seedIdx() const { return out_tc_seedIdx_; } + std::vector const& trackCandidateType() const { return out_tc_trackCandidateType_; } - template - void run(TQueue& queue, - bool verbose, - LSTESData> const* deviceESData, - std::vector const& see_px, - std::vector const& see_py, - std::vector const& see_pz, - std::vector const& see_dxy, - std::vector const& see_dz, - std::vector const& see_ptErr, - std::vector const& see_etaErr, - std::vector const& see_stateTrajGlbX, - std::vector const& see_stateTrajGlbY, - std::vector const& see_stateTrajGlbZ, - std::vector const& see_stateTrajGlbPx, - std::vector const& see_stateTrajGlbPy, - std::vector const& see_stateTrajGlbPz, - std::vector const& see_q, - std::vector> const& see_hitIdx, - std::vector const& ph2_detId, - std::vector const& ph2_x, - std::vector const& ph2_y, - std::vector const& ph2_z, - bool no_pls_dupclean, - bool tc_pls_triplets); - std::vector> const& hits() const { return out_tc_hitIdxs_; } - std::vector const& len() const { return out_tc_len_; } - std::vector const& seedIdx() const { return out_tc_seedIdx_; } - std::vector const& trackCandidateType() const { return out_tc_trackCandidateType_; } + private: + void prepareInput(std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z); - private: - void prepareInput(std::vector const& see_px, - std::vector const& see_py, - std::vector const& see_pz, - std::vector const& see_dxy, - std::vector const& see_dz, - std::vector const& see_ptErr, - std::vector const& see_etaErr, - std::vector const& see_stateTrajGlbX, - std::vector const& see_stateTrajGlbY, - std::vector const& see_stateTrajGlbZ, - std::vector const& see_stateTrajGlbPx, - std::vector const& see_stateTrajGlbPy, - std::vector const& see_stateTrajGlbPz, - std::vector const& see_q, - std::vector> const& see_hitIdx, - std::vector const& ph2_detId, - std::vector const& ph2_x, - std::vector const& ph2_y, - std::vector const& ph2_z); + void getOutput(Event& event); + std::vector getHitIdxs(short trackCandidateType, + unsigned int TCIdx, + unsigned int const* TCHitIndices, + unsigned int const* hitIndices); - void getOutput(lst::Event& event); - std::vector getHitIdxs(short trackCandidateType, - unsigned int TCIdx, - unsigned int const* TCHitIndices, - unsigned int const* hitIndices); + // Input and output vectors + std::vector in_trkX_; + std::vector in_trkY_; + std::vector in_trkZ_; + std::vector in_hitId_; + std::vector in_hitIdxs_; + std::vector in_hitIndices_vec0_; + std::vector in_hitIndices_vec1_; + std::vector in_hitIndices_vec2_; + std::vector in_hitIndices_vec3_; + std::vector in_deltaPhi_vec_; + std::vector in_ptIn_vec_; + std::vector in_ptErr_vec_; + std::vector in_px_vec_; + std::vector in_py_vec_; + std::vector in_pz_vec_; + std::vector in_eta_vec_; + std::vector in_etaErr_vec_; + std::vector in_phi_vec_; + std::vector in_charge_vec_; + std::vector in_seedIdx_vec_; + std::vector in_superbin_vec_; + std::vector in_pixelType_vec_; + std::vector in_isQuad_vec_; + std::vector> out_tc_hitIdxs_; + std::vector out_tc_len_; + std::vector out_tc_seedIdx_; + std::vector out_tc_trackCandidateType_; + }; - // Input and output vectors - std::vector in_trkX_; - std::vector in_trkY_; - std::vector in_trkZ_; - std::vector in_hitId_; - std::vector in_hitIdxs_; - std::vector in_hitIndices_vec0_; - std::vector in_hitIndices_vec1_; - std::vector in_hitIndices_vec2_; - std::vector in_hitIndices_vec3_; - std::vector in_deltaPhi_vec_; - std::vector in_ptIn_vec_; - std::vector in_ptErr_vec_; - std::vector in_px_vec_; - std::vector in_py_vec_; - std::vector in_pz_vec_; - std::vector in_eta_vec_; - std::vector in_etaErr_vec_; - std::vector in_phi_vec_; - std::vector in_charge_vec_; - std::vector in_seedIdx_vec_; - std::vector in_superbin_vec_; - std::vector in_pixelType_vec_; - std::vector in_isQuad_vec_; - std::vector> out_tc_hitIdxs_; - std::vector out_tc_len_; - std::vector out_tc_seedIdx_; - std::vector out_tc_trackCandidateType_; - }; - -} // namespace lst + } // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE #endif diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h index 14ab5d8efe7f8..459989670ccdd 100644 --- a/RecoTracker/LSTCore/interface/alpaka/Constants.h +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -11,7 +11,7 @@ namespace lst { - using namespace ALPAKA_ACCELERATOR_NAMESPACE; + using namespace alpaka_common; // Half precision wrapper functions. #if defined(FP16_Base) @@ -46,7 +46,7 @@ namespace lst { Vec adjustedThreads = threadsPerBlock; // special overrides for CPU/host cases - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { adjustedBlocks = Vec::all(static_cast(1)); if constexpr (alpaka::accMatchesTags) { diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index f9757b0659691..97c4300456c8c 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -2,9 +2,12 @@ #include "Event.h" -using namespace ALPAKA_ACCELERATOR_NAMESPACE; +using Device = ALPAKA_ACCELERATOR_NAMESPACE::Device; +using Queue = ALPAKA_ACCELERATOR_NAMESPACE::Queue; +using Acc1D = ALPAKA_ACCELERATOR_NAMESPACE::Acc1D; +using Acc3D = ALPAKA_ACCELERATOR_NAMESPACE::Acc3D; -void lst::Event::initSync(bool verbose) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::initSync(bool verbose) { alpaka::wait(queue); // other calls can be asynchronous addObjects = verbose; hitsInGPU = nullptr; @@ -47,7 +50,7 @@ void lst::Event::initSync(bool verbose) { } } -void lst::Event::resetEventSync() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::resetEventSync() { alpaka::wait(queue); // synchronize to reset consistently //reset the arrays for (int i = 0; i < 6; i++) { @@ -154,24 +157,24 @@ void lst::Event::resetEventSync() { } } -void lst::Event::addHitToEvent(std::vector const& x, - std::vector const& y, - std::vector const& z, - std::vector const& detId, - std::vector const& idxInNtuple) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple) { // Use the actual number of hits instead of a max. unsigned int nHits = x.size(); // Initialize space on device/host for next event. if (hitsInGPU == nullptr) { - hitsInGPU = new lst::Hits(); - hitsBuffers = new lst::HitsBuffer(nModules_, nHits, devAcc, queue); + hitsInGPU = new ::lst::Hits(); + hitsBuffers = new ::lst::HitsBuffer(nModules_, nHits, devAcc, queue); hitsInGPU->setData(*hitsBuffers); } if (rangesInGPU == nullptr) { - rangesInGPU = new lst::ObjectRanges(); - rangesBuffers = new lst::ObjectRangesBuffer(nModules_, nLowerModules_, devAcc, queue); + rangesInGPU = new ::lst::ObjectRanges(); + rangesBuffers = new ::lst::ObjectRangesBuffer(nModules_, nLowerModules_, devAcc, queue); rangesInGPU->setData(*rangesBuffers); } @@ -188,15 +191,14 @@ void lst::Event::addHitToEvent(std::vector const& x, alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock1{1, 1, 256}; - Vec3D const blocksPerGrid1{1, 1, max_blocks}; - WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); + Vec3D const blocksPerGrid1{1, 1, ::lst::max_blocks}; + WorkDiv3D const hit_loop_workdiv = ::lst::createWorkDiv(blocksPerGrid1, threadsPerBlock1, ::lst::elementsPerThread); - hitLoopKernel hit_loop_kernel; alpaka::exec(queue, hit_loop_workdiv, - hit_loop_kernel, - Endcap, - TwoS, + ::lst::hitLoopKernel{}, + ::lst::Endcap, + ::lst::TwoS, nModules_, nEndCapMap_, endcapGeometryBuffers_.geoMapDetId_buf.data(), @@ -206,41 +208,41 @@ void lst::Event::addHitToEvent(std::vector const& x, nHits); Vec3D const threadsPerBlock2{1, 1, 256}; - Vec3D const blocksPerGrid2{1, 1, max_blocks}; - WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); + Vec3D const blocksPerGrid2{1, 1, ::lst::max_blocks}; + WorkDiv3D const module_ranges_workdiv = + ::lst::createWorkDiv(blocksPerGrid2, threadsPerBlock2, ::lst::elementsPerThread); - moduleRangesKernel module_ranges_kernel; alpaka::exec( - queue, module_ranges_workdiv, module_ranges_kernel, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); + queue, module_ranges_workdiv, ::lst::moduleRangesKernel{}, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); } -void lst::Event::addPixelSegmentToEvent(std::vector const& hitIndices0, - std::vector const& hitIndices1, - std::vector const& hitIndices2, - std::vector const& hitIndices3, - std::vector const& dPhiChange, - std::vector const& ptIn, - std::vector const& ptErr, - std::vector const& px, - std::vector const& py, - std::vector const& pz, - std::vector const& eta, - std::vector const& etaErr, - std::vector const& phi, - std::vector const& charge, - std::vector const& seedIdx, - std::vector const& superbin, - std::vector const& pixelType, - std::vector const& isQuad) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad) { unsigned int size = ptIn.size(); - if (size > n_max_pixel_segments_per_module) { + if (size > ::lst::n_max_pixel_segments_per_module) { printf( "*********************************************************\n" "* Warning: Pixel line segments will be truncated. *\n" "* You need to increase n_max_pixel_segments_per_module. *\n" "*********************************************************\n"); - size = n_max_pixel_segments_per_module; + size = ::lst::n_max_pixel_segments_per_module; } unsigned int mdSize = 2 * size; @@ -253,25 +255,24 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& // Create a host buffer for a value to be passed to the device auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; + *pixelMaxMDs_buf_h.data() = ::lst::n_max_pixel_md_per_modules; alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createMDArrayRangesGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; alpaka::exec( - queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); + queue, createMDArrayRangesGPU_workDiv, ::lst::createMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); alpaka::wait(queue); // wait to get the data before manipulation - *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += ::lst::n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); - mdsInGPU = new lst::MiniDoublets(); - miniDoubletsBuffers = new lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); + mdsInGPU = new ::lst::MiniDoublets(); + miniDoubletsBuffers = new ::lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_buf_h); @@ -280,12 +281,11 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them - WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createSegmentArrayRanges_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::createSegmentArrayRanges createSegmentArrayRanges_kernel; alpaka::exec(queue, createSegmentArrayRanges_workDiv, - createSegmentArrayRanges_kernel, + ::lst::createSegmentArrayRanges{}, *modulesBuffers_.data(), *rangesInGPU, *mdsInGPU); @@ -295,21 +295,21 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& alpaka::memcpy(queue, nTotalSegments_view, rangesBuffers->device_nTotalSegs_buf); alpaka::wait(queue); // wait to get the value before manipulation - nTotalSegments_ += n_max_pixel_segments_per_module; + nTotalSegments_ += ::lst::n_max_pixel_segments_per_module; - segmentsInGPU = new lst::Segments(); - segmentsBuffers = new lst::SegmentsBuffer( - nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); + segmentsInGPU = new ::lst::Segments(); + segmentsBuffers = new ::lst::SegmentsBuffer( + nTotalSegments_, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); alpaka::memcpy(queue, segmentsBuffers->nMemoryLocations_buf, nTotalSegments_view); } - auto hitIndices0_dev = allocBufWrapper(devAcc, size, queue); - auto hitIndices1_dev = allocBufWrapper(devAcc, size, queue); - auto hitIndices2_dev = allocBufWrapper(devAcc, size, queue); - auto hitIndices3_dev = allocBufWrapper(devAcc, size, queue); - auto dPhiChange_dev = allocBufWrapper(devAcc, size, queue); + auto hitIndices0_dev = ::lst::allocBufWrapper(devAcc, size, queue); + auto hitIndices1_dev = ::lst::allocBufWrapper(devAcc, size, queue); + auto hitIndices2_dev = ::lst::allocBufWrapper(devAcc, size, queue); + auto hitIndices3_dev = ::lst::allocBufWrapper(devAcc, size, queue); + auto dPhiChange_dev = ::lst::allocBufWrapper(devAcc, size, queue); alpaka::memcpy(queue, hitIndices0_dev, hitIndices0, size); alpaka::memcpy(queue, hitIndices1_dev, hitIndices1, size); @@ -352,13 +352,13 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock{1, 1, 256}; - Vec3D const blocksPerGrid{1, 1, max_blocks}; - WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + Vec3D const blocksPerGrid{1, 1, ::lst::max_blocks}; + WorkDiv3D const addPixelSegmentToEvent_workdiv = + ::lst::createWorkDiv(blocksPerGrid, threadsPerBlock, ::lst::elementsPerThread); - addPixelSegmentToEventKernel addPixelSegmentToEvent_kernel; alpaka::exec(queue, addPixelSegmentToEvent_workdiv, - addPixelSegmentToEvent_kernel, + ::lst::addPixelSegmentToEventKernel{}, *modulesBuffers_.data(), *rangesInGPU, *hitsInGPU, @@ -373,56 +373,53 @@ void lst::Event::addPixelSegmentToEvent(std::vector const& size); } -void lst::Event::createMiniDoublets() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createMiniDoublets() { // Create a view for the element nLowerModules_ inside rangesBuffers->miniDoubletModuleOccupancy auto dst_view_miniDoubletModuleOccupancy = alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); // Create a host buffer for a value to be passed to the device auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; + *pixelMaxMDs_buf_h.data() = ::lst::n_max_pixel_md_per_modules; alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createMDArrayRangesGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::createMDArrayRangesGPU createMDArrayRangesGPU_kernel; alpaka::exec( - queue, createMDArrayRangesGPU_workDiv, createMDArrayRangesGPU_kernel, *modulesBuffers_.data(), *rangesInGPU); + queue, createMDArrayRangesGPU_workDiv, ::lst::createMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); alpaka::wait(queue); // wait to get the data before manipulation - *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += ::lst::n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); if (mdsInGPU == nullptr) { - mdsInGPU = new lst::MiniDoublets(); - miniDoubletsBuffers = new lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); + mdsInGPU = new ::lst::MiniDoublets(); + miniDoubletsBuffers = new ::lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); } Vec3D const threadsPerBlockCreateMDInGPU{1, 16, 32}; Vec3D const blocksPerGridCreateMDInGPU{1, nLowerModules_ / threadsPerBlockCreateMDInGPU[1], 1}; WorkDiv3D const createMiniDoubletsInGPUv2_workDiv = - createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, ::lst::elementsPerThread); - lst::createMiniDoubletsInGPUv2 createMiniDoubletsInGPUv2_kernel; alpaka::exec(queue, createMiniDoubletsInGPUv2_workDiv, - createMiniDoubletsInGPUv2_kernel, + ::lst::createMiniDoubletsInGPUv2{}, *modulesBuffers_.data(), *hitsInGPU, *mdsInGPU, *rangesInGPU); - WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::addMiniDoubletRangesToEventExplicit addMiniDoubletRangesToEventExplicit_kernel; alpaka::exec(queue, addMiniDoubletRangesToEventExplicit_workDiv, - addMiniDoubletRangesToEventExplicit_kernel, + ::lst::addMiniDoubletRangesToEventExplicit{}, *modulesBuffers_.data(), *mdsInGPU, *rangesInGPU, @@ -433,34 +430,32 @@ void lst::Event::createMiniDoublets() { } } -void lst::Event::createSegmentsWithModuleMap() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createSegmentsWithModuleMap() { if (segmentsInGPU == nullptr) { - segmentsInGPU = new lst::Segments(); - segmentsBuffers = new lst::SegmentsBuffer( - nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); + segmentsInGPU = new ::lst::Segments(); + segmentsBuffers = new ::lst::SegmentsBuffer( + nTotalSegments_, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); } Vec3D const threadsPerBlockCreateSeg{1, 1, 64}; Vec3D const blocksPerGridCreateSeg{1, 1, nLowerModules_}; WorkDiv3D const createSegmentsInGPUv2_workDiv = - createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, ::lst::elementsPerThread); - lst::createSegmentsInGPUv2 createSegmentsInGPUv2_kernel; alpaka::exec(queue, createSegmentsInGPUv2_workDiv, - createSegmentsInGPUv2_kernel, + ::lst::createSegmentsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, *rangesInGPU); - WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::addSegmentRangesToEventExplicit addSegmentRangesToEventExplicit_kernel; alpaka::exec(queue, addSegmentRangesToEventExplicit_workDiv, - addSegmentRangesToEventExplicit_kernel, + ::lst::addSegmentRangesToEventExplicit{}, *modulesBuffers_.data(), *segmentsInGPU, *rangesInGPU); @@ -470,14 +465,13 @@ void lst::Event::createSegmentsWithModuleMap() { } } -void lst::Event::createTriplets() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { if (tripletsInGPU == nullptr) { - WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createTripletArrayRanges_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::createTripletArrayRanges createTripletArrayRanges_kernel; alpaka::exec(queue, createTripletArrayRanges_workDiv, - createTripletArrayRanges_kernel, + ::lst::createTripletArrayRanges{}, *modulesBuffers_.data(), *rangesInGPU, *segmentsInGPU); @@ -488,8 +482,8 @@ void lst::Event::createTriplets() { alpaka::memcpy(queue, maxTriplets_buf_h, rangesBuffers->device_nTotalTrips_buf); alpaka::wait(queue); // wait to get the value before using it - tripletsInGPU = new lst::Triplets(); - tripletsBuffers = new lst::TripletsBuffer(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); + tripletsInGPU = new ::lst::Triplets(); + tripletsBuffers = new ::lst::TripletsBuffer(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); tripletsInGPU->setData(*tripletsBuffers); alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf_h); @@ -527,18 +521,17 @@ void lst::Event::createTriplets() { } // Allocate and copy to device index - auto index_gpu_buf = allocBufWrapper(devAcc, nLowerModules_, queue); + auto index_gpu_buf = ::lst::allocBufWrapper(devAcc, nLowerModules_, queue); alpaka::memcpy(queue, index_gpu_buf, index_buf_h, nonZeroModules); Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; - Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; + Vec3D const blocksPerGridCreateTrip{::lst::max_blocks, 1, 1}; WorkDiv3D const createTripletsInGPUv2_workDiv = - createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, ::lst::elementsPerThread); - lst::createTripletsInGPUv2 createTripletsInGPUv2_kernel; alpaka::exec(queue, createTripletsInGPUv2_workDiv, - createTripletsInGPUv2_kernel, + ::lst::createTripletsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -547,12 +540,11 @@ void lst::Event::createTriplets() { index_gpu_buf.data(), nonZeroModules); - WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addTripletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::addTripletRangesToEventExplicit addTripletRangesToEventExplicit_kernel; alpaka::exec(queue, addTripletRangesToEventExplicit_workDiv, - addTripletRangesToEventExplicit_kernel, + ::lst::addTripletRangesToEventExplicit{}, *modulesBuffers_.data(), *tripletsInGPU, *rangesInGPU); @@ -562,35 +554,33 @@ void lst::Event::createTriplets() { } } -void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { if (trackCandidatesInGPU == nullptr) { - trackCandidatesInGPU = new lst::TrackCandidates(); - trackCandidatesBuffers = new lst::TrackCandidatesBuffer( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc, queue); + trackCandidatesInGPU = new ::lst::TrackCandidates(); + trackCandidatesBuffers = new ::lst::TrackCandidatesBuffer( + ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devAcc, queue); trackCandidatesInGPU->setData(*trackCandidatesBuffers); } Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; Vec3D const blocksPerGrid_crossCleanpT3{1, 4, 20}; WorkDiv3D const crossCleanpT3_workDiv = - createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread); + ::lst::createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, ::lst::elementsPerThread); - lst::crossCleanpT3 crossCleanpT3_kernel; alpaka::exec(queue, crossCleanpT3_workDiv, - crossCleanpT3_kernel, + ::lst::crossCleanpT3{}, *modulesBuffers_.data(), *rangesInGPU, *pixelTripletsInGPU, *segmentsInGPU, *pixelQuintupletsInGPU); - WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); + WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = ::lst::createWorkDiv({1}, {512}, {1}); - lst::addpT3asTrackCandidatesInGPU addpT3asTrackCandidatesInGPU_kernel; alpaka::exec(queue, addpT3asTrackCandidatesInGPU_workDiv, - addpT3asTrackCandidatesInGPU_kernel, + ::lst::addpT3asTrackCandidatesInGPU{}, nLowerModules_, *pixelTripletsInGPU, *trackCandidatesInGPU, @@ -606,24 +596,22 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = - createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, ::lst::elementsPerThread); - lst::removeDupQuintupletsInGPUBeforeTC removeDupQuintupletsInGPUBeforeTC_kernel; alpaka::exec(queue, removeDupQuintupletsInGPUBeforeTC_workDiv, - removeDupQuintupletsInGPUBeforeTC_kernel, + ::lst::removeDupQuintupletsInGPUBeforeTC{}, *quintupletsInGPU, *rangesInGPU); Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; - Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks}; + Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, ::lst::max_blocks}; WorkDiv3D const crossCleanT5_workDiv = - createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread); + ::lst::createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, ::lst::elementsPerThread); - lst::crossCleanT5 crossCleanT5_kernel; alpaka::exec(queue, crossCleanT5_workDiv, - crossCleanT5_kernel, + ::lst::crossCleanT5{}, *modulesBuffers_.data(), *quintupletsInGPU, *pixelQuintupletsInGPU, @@ -632,13 +620,12 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ Vec3D const threadsPerBlock_addT5asTrackCandidateInGPU{1, 8, 128}; Vec3D const blocksPerGrid_addT5asTrackCandidateInGPU{1, 8, 10}; - WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = createWorkDiv( - blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, elementsPerThread); + WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = ::lst::createWorkDiv( + blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, ::lst::elementsPerThread); - lst::addT5asTrackCandidateInGPU addT5asTrackCandidateInGPU_kernel; alpaka::exec(queue, addT5asTrackCandidateInGPU_workDiv, - addT5asTrackCandidateInGPU_kernel, + ::lst::addT5asTrackCandidateInGPU{}, nLowerModules_, *quintupletsInGPU, *trackCandidatesInGPU, @@ -646,24 +633,22 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; - Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; + Vec3D const blocksPerGridCheckHitspLS{1, ::lst::max_blocks * 4, ::lst::max_blocks / 4}; WorkDiv3D const checkHitspLS_workDiv = - createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, ::lst::elementsPerThread); - lst::checkHitspLS checkHitspLS_kernel; alpaka::exec( - queue, checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, true); + queue, checkHitspLS_workDiv, ::lst::checkHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, true); } Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; Vec3D const blocksPerGrid_crossCleanpLS{1, 4, 20}; WorkDiv3D const crossCleanpLS_workDiv = - createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread); + ::lst::createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, ::lst::elementsPerThread); - lst::crossCleanpLS crossCleanpLS_kernel; alpaka::exec(queue, crossCleanpLS_workDiv, - crossCleanpLS_kernel, + ::lst::crossCleanpLS{}, *modulesBuffers_.data(), *rangesInGPU, *pixelTripletsInGPU, @@ -674,24 +659,23 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ *quintupletsInGPU); Vec3D const threadsPerBlock_addpLSasTrackCandidateInGPU{1, 1, 384}; - Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, max_blocks}; - WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = createWorkDiv( - blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, elementsPerThread); + Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, ::lst::max_blocks}; + WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = ::lst::createWorkDiv( + blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, ::lst::elementsPerThread); - lst::addpLSasTrackCandidateInGPU addpLSasTrackCandidateInGPU_kernel; alpaka::exec(queue, addpLSasTrackCandidateInGPU_workDiv, - addpLSasTrackCandidateInGPU_kernel, + ::lst::addpLSasTrackCandidateInGPU{}, nLowerModules_, *trackCandidatesInGPU, *segmentsInGPU, tc_pls_triplets); // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached - auto nTrackCanpT5Host_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCanpT3Host_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCanpLSHost_buf = allocBufWrapper(devHost, 1, queue); - auto nTrackCanT5Host_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCanpT5Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nTrackCanpT3Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nTrackCanpLSHost_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nTrackCanT5Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nTrackCanpT5Host_buf, trackCandidatesBuffers->nTrackCandidatespT5_buf); alpaka::memcpy(queue, nTrackCanpT3Host_buf, trackCandidatesBuffers->nTrackCandidatespT3_buf); alpaka::memcpy(queue, nTrackCanpLSHost_buf, trackCandidatesBuffers->nTrackCandidatespLS_buf); @@ -702,26 +686,27 @@ void lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_ auto nTrackCandidatespT3 = *nTrackCanpT3Host_buf.data(); auto nTrackCandidatespLS = *nTrackCanpLSHost_buf.data(); auto nTrackCandidatesT5 = *nTrackCanT5Host_buf.data(); - if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || - (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { + if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == ::lst::n_max_pixel_track_candidates) || + (nTrackCandidatesT5 == ::lst::n_max_nonpixel_track_candidates)) { printf( "****************************************************************************************************\n" "* Warning: Track candidates were possibly truncated. *\n" - "* You may need to increase either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates. *\n" + "* You may need to increase either ::lst::n_max_pixel_track_candidates or " + "::lst::n_max_nonpixel_track_candidates. *\n" "* Run the code with the WARNINGS flag activated for more details. *\n" "****************************************************************************************************\n"); } } -void lst::Event::createPixelTriplets() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { if (pixelTripletsInGPU == nullptr) { - pixelTripletsInGPU = new lst::PixelTriplets(); - pixelTripletsBuffers = new lst::PixelTripletsBuffer(n_max_pixel_triplets, devAcc, queue); + pixelTripletsInGPU = new ::lst::PixelTriplets(); + pixelTripletsBuffers = new ::lst::PixelTripletsBuffer(::lst::n_max_pixel_triplets, devAcc, queue); pixelTripletsInGPU->setData(*pixelTripletsBuffers); } - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto superbins_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); @@ -737,25 +722,25 @@ void lst::Event::createPixelTriplets() { alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using - auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelSize_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelIndex_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelSize_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelIndex_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); unsigned int* connectedPixelSize_host = connectedPixelSize_host_buf.data(); unsigned int* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); - int pixelIndexOffsetPos = - pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; - int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] + - pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; + int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::lst::size_superbins - 1] + + pixelMapping_.connectedPixelsSizes[::lst::size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::lst::size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[::lst::size_superbins - 1] + pixelIndexOffsetPos; // TODO: check if a map/reduction to just eligible pLSs would speed up the kernel // the current selection still leaves a significant fraction of unmatchable pLSs for (unsigned int i = 0; i < nInnerSegments; i++) { // loop over # pLS int8_t pixelType = pixelTypes[i]; // Get pixel type for this pLS int superbin = superbins[i]; // Get superbin for this pixel - if ((superbin < 0) or (superbin >= (int)size_superbins) or (pixelType > 2) or (pixelType < 0)) { + if ((superbin < 0) or (superbin >= (int)::lst::size_superbins) or (pixelType > 2) or (pixelType < 0)) { connectedPixelSize_host[i] = 0; connectedPixelIndex_host[i] = 0; continue; @@ -787,12 +772,11 @@ void lst::Event::createPixelTriplets() { Vec3D const threadsPerBlock{1, 4, 32}; Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; WorkDiv3D const createPixelTripletsInGPUFromMapv2_workDiv = - createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + ::lst::createWorkDiv(blocksPerGrid, threadsPerBlock, ::lst::elementsPerThread); - lst::createPixelTripletsInGPUFromMapv2 createPixelTripletsInGPUFromMapv2_kernel; alpaka::exec(queue, createPixelTripletsInGPUFromMapv2_workDiv, - createPixelTripletsInGPUFromMapv2_kernel, + ::lst::createPixelTripletsInGPUFromMapv2{}, *modulesBuffers_.data(), *rangesInGPU, *mdsInGPU, @@ -804,7 +788,7 @@ void lst::Event::createPixelTriplets() { nInnerSegments); #ifdef WARNINGS - auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); + auto nPixelTriplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); alpaka::wait(queue); // wait to get the value before using it @@ -817,28 +801,26 @@ void lst::Event::createPixelTriplets() { //seems like more blocks lead to conflicting writes Vec3D const blocksPerGridDupPixTrip{1, 40, 1}; WorkDiv3D const removeDupPixelTripletsInGPUFromMap_workDiv = - createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, ::lst::elementsPerThread); - lst::removeDupPixelTripletsInGPUFromMap removeDupPixelTripletsInGPUFromMap_kernel; alpaka::exec(queue, removeDupPixelTripletsInGPUFromMap_workDiv, - removeDupPixelTripletsInGPUFromMap_kernel, + ::lst::removeDupPixelTripletsInGPUFromMap{}, *pixelTripletsInGPU); } -void lst::Event::createQuintuplets() { - WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = createWorkDiv({1}, {1024}, {1}); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { + WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::createEligibleModulesListForQuintupletsGPU createEligibleModulesListForQuintupletsGPU_kernel; alpaka::exec(queue, createEligibleModulesListForQuintupletsGPU_workDiv, - createEligibleModulesListForQuintupletsGPU_kernel, + ::lst::createEligibleModulesListForQuintupletsGPU{}, *modulesBuffers_.data(), *tripletsInGPU, *rangesInGPU); - auto nEligibleT5Modules_buf = allocBufWrapper(devHost, 1, queue); - auto nTotalQuintuplets_buf = allocBufWrapper(devHost, 1, queue); + auto nEligibleT5Modules_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nTotalQuintuplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nEligibleT5Modules_buf, rangesBuffers->nEligibleT5Modules_buf); alpaka::memcpy(queue, nTotalQuintuplets_buf, rangesBuffers->device_nTotalQuints_buf); @@ -848,8 +830,8 @@ void lst::Event::createQuintuplets() { auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); if (quintupletsInGPU == nullptr) { - quintupletsInGPU = new lst::Quintuplets(); - quintupletsBuffers = new lst::QuintupletsBuffer(nTotalQuintuplets, nLowerModules_, devAcc, queue); + quintupletsInGPU = new ::lst::Quintuplets(); + quintupletsBuffers = new ::lst::QuintupletsBuffer(nTotalQuintuplets, nLowerModules_, devAcc, queue); quintupletsInGPU->setData(*quintupletsBuffers); alpaka::memcpy(queue, quintupletsBuffers->nMemoryLocations_buf, nTotalQuintuplets_buf); @@ -858,12 +840,11 @@ void lst::Event::createQuintuplets() { Vec3D const threadsPerBlockQuints{1, 8, 32}; Vec3D const blocksPerGridQuints{std::max((int)nEligibleT5Modules, 1), 1, 1}; WorkDiv3D const createQuintupletsInGPUv2_workDiv = - createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, ::lst::elementsPerThread); - lst::createQuintupletsInGPUv2 createQuintupletsInGPUv2_kernel; alpaka::exec(queue, createQuintupletsInGPUv2_workDiv, - createQuintupletsInGPUv2_kernel, + ::lst::createQuintupletsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -873,24 +854,22 @@ void lst::Event::createQuintuplets() { nEligibleT5Modules); Vec3D const threadsPerBlockDupQuint{1, 16, 16}; - Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1}; + Vec3D const blocksPerGridDupQuint{::lst::max_blocks, 1, 1}; WorkDiv3D const removeDupQuintupletsInGPUAfterBuild_workDiv = - createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, ::lst::elementsPerThread); - lst::removeDupQuintupletsInGPUAfterBuild removeDupQuintupletsInGPUAfterBuild_kernel; alpaka::exec(queue, removeDupQuintupletsInGPUAfterBuild_workDiv, - removeDupQuintupletsInGPUAfterBuild_kernel, + ::lst::removeDupQuintupletsInGPUAfterBuild{}, *modulesBuffers_.data(), *quintupletsInGPU, *rangesInGPU); - WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); - lst::addQuintupletRangesToEventExplicit addQuintupletRangesToEventExplicit_kernel; alpaka::exec(queue, addQuintupletRangesToEventExplicit_workDiv, - addQuintupletRangesToEventExplicit_kernel, + ::lst::addQuintupletRangesToEventExplicit{}, *modulesBuffers_.data(), *quintupletsInGPU, *rangesInGPU); @@ -900,34 +879,33 @@ void lst::Event::createQuintuplets() { } } -void lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; - Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; + Vec3D const blocksPerGridCheckHitspLS{1, ::lst::max_blocks * 4, ::lst::max_blocks / 4}; WorkDiv3D const checkHitspLS_workDiv = - createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, ::lst::elementsPerThread); - lst::checkHitspLS checkHitspLS_kernel; alpaka::exec( - queue, checkHitspLS_workDiv, checkHitspLS_kernel, *modulesBuffers_.data(), *segmentsInGPU, false); + queue, checkHitspLS_workDiv, ::lst::checkHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, false); } } -void lst::Event::createPixelQuintuplets() { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { if (pixelQuintupletsInGPU == nullptr) { - pixelQuintupletsInGPU = new lst::PixelQuintuplets(); - pixelQuintupletsBuffers = new lst::PixelQuintupletsBuffer(n_max_pixel_quintuplets, devAcc, queue); + pixelQuintupletsInGPU = new ::lst::PixelQuintuplets(); + pixelQuintupletsBuffers = new ::lst::PixelQuintupletsBuffer(::lst::n_max_pixel_quintuplets, devAcc, queue); pixelQuintupletsInGPU->setData(*pixelQuintupletsBuffers); } if (trackCandidatesInGPU == nullptr) { - trackCandidatesInGPU = new lst::TrackCandidates(); - trackCandidatesBuffers = new lst::TrackCandidatesBuffer( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc, queue); + trackCandidatesInGPU = new ::lst::TrackCandidates(); + trackCandidatesBuffers = new ::lst::TrackCandidatesBuffer( + ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devAcc, queue); trackCandidatesInGPU->setData(*trackCandidatesBuffers); } - auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto superbins_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); @@ -943,24 +921,24 @@ void lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using - auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); - auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelSize_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelIndex_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelSize_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelIndex_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); auto* connectedPixelSize_host = connectedPixelSize_host_buf.data(); auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); - int pixelIndexOffsetPos = - pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; - int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] + - pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; + int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::lst::size_superbins - 1] + + pixelMapping_.connectedPixelsSizes[::lst::size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::lst::size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[::lst::size_superbins - 1] + pixelIndexOffsetPos; // Loop over # pLS for (unsigned int i = 0; i < nInnerSegments; i++) { int8_t pixelType = pixelTypes[i]; // Get pixel type for this pLS int superbin = superbins[i]; // Get superbin for this pixel - if ((superbin < 0) or (superbin >= (int)size_superbins) or (pixelType > 2) or (pixelType < 0)) { + if ((superbin < 0) or (superbin >= (int)::lst::size_superbins) or (pixelType > 2) or (pixelType < 0)) { connectedPixelIndex_host[i] = 0; connectedPixelSize_host[i] = 0; continue; @@ -986,14 +964,13 @@ void lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; - Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; + Vec3D const blocksPerGridCreatePixQuints{16, ::lst::max_blocks, 1}; WorkDiv3D const createPixelQuintupletsInGPUFromMapv2_workDiv = - createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, ::lst::elementsPerThread); - lst::createPixelQuintupletsInGPUFromMapv2 createPixelQuintupletsInGPUFromMapv2_kernel; alpaka::exec(queue, createPixelQuintupletsInGPUFromMapv2_workDiv, - createPixelQuintupletsInGPUFromMapv2_kernel, + ::lst::createPixelQuintupletsInGPUFromMapv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -1006,22 +983,20 @@ void lst::Event::createPixelQuintuplets() { *rangesInGPU); Vec3D const threadsPerBlockDupPix{1, 16, 16}; - Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; + Vec3D const blocksPerGridDupPix{1, ::lst::max_blocks, 1}; WorkDiv3D const removeDupPixelQuintupletsInGPUFromMap_workDiv = - createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); + ::lst::createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, ::lst::elementsPerThread); - lst::removeDupPixelQuintupletsInGPUFromMap removeDupPixelQuintupletsInGPUFromMap_kernel; alpaka::exec(queue, removeDupPixelQuintupletsInGPUFromMap_workDiv, - removeDupPixelQuintupletsInGPUFromMap_kernel, + ::lst::removeDupPixelQuintupletsInGPUFromMap{}, *pixelQuintupletsInGPU); - WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = createWorkDiv({1}, {256}, {1}); + WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = ::lst::createWorkDiv({1}, {256}, {1}); - lst::addpT5asTrackCandidateInGPU addpT5asTrackCandidateInGPU_kernel; alpaka::exec(queue, addpT5asTrackCandidateInGPU_workDiv, - addpT5asTrackCandidateInGPU_kernel, + ::lst::addpT5asTrackCandidateInGPU{}, nLowerModules_, *pixelQuintupletsInGPU, *trackCandidatesInGPU, @@ -1029,7 +1004,7 @@ void lst::Event::createPixelQuintuplets() { *rangesInGPU); #ifdef WARNINGS - auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); + auto nPixelQuintuplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); alpaka::wait(queue); // wait to get the value before using it @@ -1038,18 +1013,18 @@ void lst::Event::createPixelQuintuplets() { #endif } -void lst::Event::addMiniDoubletsToEventExplicit() { - auto nMDsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addMiniDoubletsToEventExplicit() { + auto nMDsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nMDsCPU_buf, miniDoubletsBuffers->nMDs_buf, nLowerModules_); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - auto module_hitRanges_buf = allocBufWrapper(devHost, nLowerModules_ * 2, queue); + auto module_hitRanges_buf = ::lst::allocBufWrapper(devHost, nLowerModules_ * 2, queue); alpaka::memcpy(queue, module_hitRanges_buf, hitsBuffers->hitRanges_buf, nLowerModules_ * 2u); alpaka::wait(queue); // wait for inputs before using them @@ -1061,7 +1036,7 @@ void lst::Event::addMiniDoubletsToEventExplicit() { for (unsigned int i = 0; i < nLowerModules_; i++) { if (!(nMDsCPU[i] == 0 or module_hitRanges[i * 2] == -1)) { - if (module_subdets[i] == Barrel) { + if (module_subdets[i] == ::lst::Barrel) { n_minidoublets_by_layer_barrel_[module_layers[i] - 1] += nMDsCPU[i]; } else { n_minidoublets_by_layer_endcap_[module_layers[i] - 1] += nMDsCPU[i]; @@ -1070,15 +1045,15 @@ void lst::Event::addMiniDoubletsToEventExplicit() { } } -void lst::Event::addSegmentsToEventExplicit() { - auto nSegmentsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addSegmentsToEventExplicit() { + auto nSegmentsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nSegmentsCPU_buf, segmentsBuffers->nSegments_buf, nLowerModules_); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); alpaka::wait(queue); // wait for inputs before using them @@ -1089,7 +1064,7 @@ void lst::Event::addSegmentsToEventExplicit() { for (unsigned int i = 0; i < nLowerModules_; i++) { if (!(nSegmentsCPU[i] == 0)) { - if (module_subdets[i] == Barrel) { + if (module_subdets[i] == ::lst::Barrel) { n_segments_by_layer_barrel_[module_layers[i] - 1] += nSegmentsCPU[i]; } else { n_segments_by_layer_endcap_[module_layers[i] - 1] += nSegmentsCPU[i]; @@ -1098,18 +1073,18 @@ void lst::Event::addSegmentsToEventExplicit() { } } -void lst::Event::addQuintupletsToEventExplicit() { - auto nQuintupletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addQuintupletsToEventExplicit() { + auto nQuintupletsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nQuintupletsCPU_buf, quintupletsBuffers->nQuintuplets_buf); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nModules_, queue); + auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - auto module_quintupletModuleIndices_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_quintupletModuleIndices_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_quintupletModuleIndices_buf, rangesBuffers->quintupletModuleIndices_buf); alpaka::wait(queue); // wait for inputs before using them @@ -1121,7 +1096,7 @@ void lst::Event::addQuintupletsToEventExplicit() { for (uint16_t i = 0; i < nLowerModules_; i++) { if (!(nQuintupletsCPU[i] == 0 or module_quintupletModuleIndices[i] == -1)) { - if (module_subdets[i] == Barrel) { + if (module_subdets[i] == ::lst::Barrel) { n_quintuplets_by_layer_barrel_[module_layers[i] - 1] += nQuintupletsCPU[i]; } else { n_quintuplets_by_layer_endcap_[module_layers[i] - 1] += nQuintupletsCPU[i]; @@ -1130,15 +1105,15 @@ void lst::Event::addQuintupletsToEventExplicit() { } } -void lst::Event::addTripletsToEventExplicit() { - auto nTripletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addTripletsToEventExplicit() { + auto nTripletsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nTripletsCPU_buf, tripletsBuffers->nTriplets_buf); // FIXME: replace by ES host data - auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); alpaka::wait(queue); // wait for inputs before using them @@ -1149,7 +1124,7 @@ void lst::Event::addTripletsToEventExplicit() { for (uint16_t i = 0; i < nLowerModules_; i++) { if (nTripletsCPU[i] != 0) { - if (module_subdets[i] == Barrel) { + if (module_subdets[i] == ::lst::Barrel) { n_triplets_by_layer_barrel_[module_layers[i] - 1] += nTripletsCPU[i]; } else { n_triplets_by_layer_endcap_[module_layers[i] - 1] += nTripletsCPU[i]; @@ -1158,7 +1133,7 @@ void lst::Event::addTripletsToEventExplicit() { } } -unsigned int lst::Event::getNumberOfHits() { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHits() { unsigned int hits = 0; for (auto& it : n_hits_by_layer_barrel_) { hits += it; @@ -1170,22 +1145,22 @@ unsigned int lst::Event::getNumberOfHits() { return hits; } -unsigned int lst::Event::getNumberOfHitsByLayer(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayer(unsigned int layer) { if (layer == 6) return n_hits_by_layer_barrel_[layer]; else return n_hits_by_layer_barrel_[layer] + n_hits_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfHitsByLayerBarrel(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayerBarrel(unsigned int layer) { return n_hits_by_layer_barrel_[layer]; } -unsigned int lst::Event::getNumberOfHitsByLayerEndcap(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayerEndcap(unsigned int layer) { return n_hits_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfMiniDoublets() { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoublets() { unsigned int miniDoublets = 0; for (auto& it : n_minidoublets_by_layer_barrel_) { miniDoublets += it; @@ -1197,22 +1172,22 @@ unsigned int lst::Event::getNumberOfMiniDoublets() { return miniDoublets; } -unsigned int lst::Event::getNumberOfMiniDoubletsByLayer(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayer(unsigned int layer) { if (layer == 6) return n_minidoublets_by_layer_barrel_[layer]; else return n_minidoublets_by_layer_barrel_[layer] + n_minidoublets_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) { return n_minidoublets_by_layer_barrel_[layer]; } -unsigned int lst::Event::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) { return n_minidoublets_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfSegments() { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegments() { unsigned int segments = 0; for (auto& it : n_segments_by_layer_barrel_) { segments += it; @@ -1224,22 +1199,22 @@ unsigned int lst::Event::getNumberOfSegments() { return segments; } -unsigned int lst::Event::getNumberOfSegmentsByLayer(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayer(unsigned int layer) { if (layer == 6) return n_segments_by_layer_barrel_[layer]; else return n_segments_by_layer_barrel_[layer] + n_segments_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfSegmentsByLayerBarrel(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayerBarrel(unsigned int layer) { return n_segments_by_layer_barrel_[layer]; } -unsigned int lst::Event::getNumberOfSegmentsByLayerEndcap(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayerEndcap(unsigned int layer) { return n_segments_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfTriplets() { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTriplets() { unsigned int triplets = 0; for (auto& it : n_triplets_by_layer_barrel_) { triplets += it; @@ -1251,22 +1226,22 @@ unsigned int lst::Event::getNumberOfTriplets() { return triplets; } -unsigned int lst::Event::getNumberOfTripletsByLayer(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayer(unsigned int layer) { if (layer == 6) return n_triplets_by_layer_barrel_[layer]; else return n_triplets_by_layer_barrel_[layer] + n_triplets_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfTripletsByLayerBarrel(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayerBarrel(unsigned int layer) { return n_triplets_by_layer_barrel_[layer]; } -unsigned int lst::Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { return n_triplets_by_layer_endcap_[layer]; } -int lst::Event::getNumberOfPixelTriplets() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTriplets() { auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); @@ -1274,7 +1249,7 @@ int lst::Event::getNumberOfPixelTriplets() { return *nPixelTriplets_buf_h.data(); } -int lst::Event::getNumberOfPixelQuintuplets() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelQuintuplets() { auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); @@ -1282,7 +1257,7 @@ int lst::Event::getNumberOfPixelQuintuplets() { return *nPixelQuintuplets_buf_h.data(); } -unsigned int lst::Event::getNumberOfQuintuplets() { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintuplets() { unsigned int quintuplets = 0; for (auto& it : n_quintuplets_by_layer_barrel_) { quintuplets += it; @@ -1294,22 +1269,22 @@ unsigned int lst::Event::getNumberOfQuintuplets() { return quintuplets; } -unsigned int lst::Event::getNumberOfQuintupletsByLayer(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayer(unsigned int layer) { if (layer == 6) return n_quintuplets_by_layer_barrel_[layer]; else return n_quintuplets_by_layer_barrel_[layer] + n_quintuplets_by_layer_endcap_[layer]; } -unsigned int lst::Event::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) { return n_quintuplets_by_layer_barrel_[layer]; } -unsigned int lst::Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { +unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { return n_quintuplets_by_layer_endcap_[layer]; } -int lst::Event::getNumberOfTrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); @@ -1317,7 +1292,7 @@ int lst::Event::getNumberOfTrackCandidates() { return *nTrackCandidates_buf_h.data(); } -int lst::Event::getNumberOfPT5TrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT5TrackCandidates() { auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers->nTrackCandidatespT5_buf); @@ -1326,7 +1301,7 @@ int lst::Event::getNumberOfPT5TrackCandidates() { return *nTrackCandidatesPT5_buf_h.data(); } -int lst::Event::getNumberOfPT3TrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT3TrackCandidates() { auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers->nTrackCandidatespT3_buf); @@ -1334,7 +1309,7 @@ int lst::Event::getNumberOfPT3TrackCandidates() { return *nTrackCandidatesPT3_buf_h.data(); } -int lst::Event::getNumberOfPLSTrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPLSTrackCandidates() { auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers->nTrackCandidatespLS_buf); @@ -1342,7 +1317,7 @@ int lst::Event::getNumberOfPLSTrackCandidates() { return *nTrackCandidatesPLS_buf_h.data(); } -int lst::Event::getNumberOfPixelTrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1352,7 +1327,7 @@ int lst::Event::getNumberOfPixelTrackCandidates() { return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); } -int lst::Event::getNumberOfT5TrackCandidates() { +int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfT5TrackCandidates() { auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); @@ -1360,7 +1335,8 @@ int lst::Event::getNumberOfT5TrackCandidates() { return *nTrackCandidatesT5_buf_h.data(); } -lst::HitsBuffer* lst::Event::getHits(bool sync) //std::shared_ptr should take care of garbage collection +lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getHits( + bool sync) //std::shared_ptr should take care of garbage collection { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1368,7 +1344,7 @@ lst::HitsBuffer* lst::Event::getHits(bool sync) //std::shared_p alpaka::wait(queue); // wait for the value before using auto const nHits = *nHits_buf_h.data(); - hitsInCPU = new lst::HitsBuffer(nModules_, nHits, devHost, queue); + hitsInCPU = new ::lst::HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); *hitsInCPU->nHits_buf.data() = nHits; @@ -1384,14 +1360,14 @@ lst::HitsBuffer* lst::Event::getHits(bool sync) //std::shared_p return hitsInCPU; } -lst::HitsBuffer* lst::Event::getHitsInCMSSW(bool sync) { +lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getHitsInCMSSW(bool sync) { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); alpaka::wait(queue); // wait for the value before using auto const nHits = *nHits_buf_h.data(); - hitsInCPU = new lst::HitsBuffer(nModules_, nHits, devHost, queue); + hitsInCPU = new ::lst::HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); *hitsInCPU->nHits_buf.data() = nHits; @@ -1402,9 +1378,9 @@ lst::HitsBuffer* lst::Event::getHitsInCMSSW(bool sync) { return hitsInCPU; } -lst::ObjectRangesBuffer* lst::Event::getRanges(bool sync) { +lst::ObjectRangesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getRanges(bool sync) { if (rangesInCPU == nullptr) { - rangesInCPU = new lst::ObjectRangesBuffer(nModules_, nLowerModules_, devHost, queue); + rangesInCPU = new ::lst::ObjectRangesBuffer(nModules_, nLowerModules_, devHost, queue); rangesInCPU->setData(*rangesInCPU); alpaka::memcpy(queue, rangesInCPU->hitRanges_buf, rangesBuffers->hitRanges_buf); @@ -1418,7 +1394,7 @@ lst::ObjectRangesBuffer* lst::Event::getRanges(bool sync) { return rangesInCPU; } -lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets(bool sync) { +lst::MiniDoubletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getMiniDoublets(bool sync) { if (mdsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based mdsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1426,7 +1402,7 @@ lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets(bool sync) alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - mdsInCPU = new lst::MiniDoubletsBuffer(nMemHost, nLowerModules_, devHost, queue); + mdsInCPU = new ::lst::MiniDoubletsBuffer(nMemHost, nLowerModules_, devHost, queue); mdsInCPU->setData(*mdsInCPU); *mdsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1441,7 +1417,7 @@ lst::MiniDoubletsBuffer* lst::Event::getMiniDoublets(bool sync) return mdsInCPU; } -lst::SegmentsBuffer* lst::Event::getSegments(bool sync) { +lst::SegmentsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getSegments(bool sync) { if (segmentsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based segmentsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1449,8 +1425,8 @@ lst::SegmentsBuffer* lst::Event::getSegments(bool sync) { alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - segmentsInCPU = - new lst::SegmentsBuffer(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, devHost, queue); + segmentsInCPU = new ::lst::SegmentsBuffer( + nMemHost, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devHost, queue); segmentsInCPU->setData(*segmentsInCPU); *segmentsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1478,7 +1454,7 @@ lst::SegmentsBuffer* lst::Event::getSegments(bool sync) { return segmentsInCPU; } -lst::TripletsBuffer* lst::Event::getTriplets(bool sync) { +lst::TripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTriplets(bool sync) { if (tripletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based tripletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1486,7 +1462,7 @@ lst::TripletsBuffer* lst::Event::getTriplets(bool sync) { alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - tripletsInCPU = new lst::TripletsBuffer(nMemHost, nLowerModules_, devHost, queue); + tripletsInCPU = new ::lst::TripletsBuffer(nMemHost, nLowerModules_, devHost, queue); tripletsInCPU->setData(*tripletsInCPU); *tripletsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1501,9 +1477,12 @@ lst::TripletsBuffer* lst::Event::getTriplets(bool sync) { alpaka::memcpy(queue, tripletsInCPU->rtLo_buf, tripletsBuffers->rtLo_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->rtHi_buf, tripletsBuffers->rtHi_buf, nMemHost); #endif - alpaka::memcpy(queue, tripletsInCPU->hitIndices_buf, tripletsBuffers->hitIndices_buf, Params_T3::kHits * nMemHost); alpaka::memcpy( - queue, tripletsInCPU->logicalLayers_buf, tripletsBuffers->logicalLayers_buf, Params_T3::kLayers * nMemHost); + queue, tripletsInCPU->hitIndices_buf, tripletsBuffers->hitIndices_buf, ::lst::Params_T3::kHits * nMemHost); + alpaka::memcpy(queue, + tripletsInCPU->logicalLayers_buf, + tripletsBuffers->logicalLayers_buf, + ::lst::Params_T3::kLayers * nMemHost); alpaka::memcpy(queue, tripletsInCPU->segmentIndices_buf, tripletsBuffers->segmentIndices_buf, 2 * nMemHost); alpaka::memcpy(queue, tripletsInCPU->betaIn_buf, tripletsBuffers->betaIn_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost); @@ -1515,7 +1494,7 @@ lst::TripletsBuffer* lst::Event::getTriplets(bool sync) { return tripletsInCPU; } -lst::QuintupletsBuffer* lst::Event::getQuintuplets(bool sync) { +lst::QuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getQuintuplets(bool sync) { if (quintupletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based quintupletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1523,7 +1502,7 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets(bool sync) { alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - quintupletsInCPU = new lst::QuintupletsBuffer(nMemHost, nLowerModules_, devHost, queue); + quintupletsInCPU = new ::lst::QuintupletsBuffer(nMemHost, nLowerModules_, devHost, queue); quintupletsInCPU->setData(*quintupletsInCPU); *quintupletsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1534,7 +1513,7 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets(bool sync) { alpaka::memcpy(queue, quintupletsInCPU->lowerModuleIndices_buf, quintupletsBuffers->lowerModuleIndices_buf, - Params_T5::kLayers * nMemHost); + ::lst::Params_T5::kLayers * nMemHost); alpaka::memcpy(queue, quintupletsInCPU->innerRadius_buf, quintupletsBuffers->innerRadius_buf, nMemHost); alpaka::memcpy(queue, quintupletsInCPU->bridgeRadius_buf, quintupletsBuffers->bridgeRadius_buf, nMemHost); alpaka::memcpy(queue, quintupletsInCPU->outerRadius_buf, quintupletsBuffers->outerRadius_buf, nMemHost); @@ -1552,7 +1531,7 @@ lst::QuintupletsBuffer* lst::Event::getQuintuplets(bool sync) { return quintupletsInCPU; } -lst::PixelTripletsBuffer* lst::Event::getPixelTriplets(bool sync) { +lst::PixelTripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getPixelTriplets(bool sync) { if (pixelTripletsInCPU == nullptr) { // Get nPixelTriplets parameter to initialize host based quintupletsInCPU auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1560,7 +1539,7 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets(bool sync alpaka::wait(queue); // wait for the value before using auto const nPixelTriplets = *nPixelTriplets_buf_h.data(); - pixelTripletsInCPU = new lst::PixelTripletsBuffer(nPixelTriplets, devHost, queue); + pixelTripletsInCPU = new ::lst::PixelTripletsBuffer(nPixelTriplets, devHost, queue); pixelTripletsInCPU->setData(*pixelTripletsInCPU); *pixelTripletsInCPU->nPixelTriplets_buf.data() = nPixelTriplets; @@ -1592,7 +1571,8 @@ lst::PixelTripletsBuffer* lst::Event::getPixelTriplets(bool sync return pixelTripletsInCPU; } -lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets(bool sync) { +lst::PixelQuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getPixelQuintuplets( + bool sync) { if (pixelQuintupletsInCPU == nullptr) { // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1600,7 +1580,7 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets(boo alpaka::wait(queue); // wait for the value before using auto const nPixelQuintuplets = *nPixelQuintuplets_buf_h.data(); - pixelQuintupletsInCPU = new lst::PixelQuintupletsBuffer(nPixelQuintuplets, devHost, queue); + pixelQuintupletsInCPU = new ::lst::PixelQuintupletsBuffer(nPixelQuintuplets, devHost, queue); pixelQuintupletsInCPU->setData(*pixelQuintupletsInCPU); *pixelQuintupletsInCPU->nPixelQuintuplets_buf.data() = nPixelQuintuplets; @@ -1629,7 +1609,8 @@ lst::PixelQuintupletsBuffer* lst::Event::getPixelQuintuplets(boo return pixelQuintupletsInCPU; } -lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates(bool sync) { +lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTrackCandidates( + bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1637,21 +1618,21 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates(bool alpaka::wait(queue); auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - trackCandidatesInCPU = new lst::TrackCandidatesBuffer( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); + trackCandidatesInCPU = new ::lst::TrackCandidatesBuffer( + ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, - Params_pT5::kHits * nTrackCanHost); + ::lst::Params_pT5::kHits * nTrackCanHost); alpaka::memcpy( queue, trackCandidatesInCPU->pixelSeedIndex_buf, trackCandidatesBuffers->pixelSeedIndex_buf, nTrackCanHost); alpaka::memcpy(queue, trackCandidatesInCPU->logicalLayers_buf, trackCandidatesBuffers->logicalLayers_buf, - Params_pT5::kLayers * nTrackCanHost); + ::lst::Params_pT5::kLayers * nTrackCanHost); alpaka::memcpy(queue, trackCandidatesInCPU->directObjectIndices_buf, trackCandidatesBuffers->directObjectIndices_buf, @@ -1668,7 +1649,8 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidates(bool return trackCandidatesInCPU; } -lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSSW(bool sync) { +lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTrackCandidatesInCMSSW( + bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1676,15 +1658,15 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSS alpaka::wait(queue); // wait for the value before using auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - trackCandidatesInCPU = new lst::TrackCandidatesBuffer( - n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); + trackCandidatesInCPU = new ::lst::TrackCandidatesBuffer( + ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, - Params_pT5::kHits * nTrackCanHost); + ::lst::Params_pT5::kHits * nTrackCanHost); alpaka::memcpy( queue, trackCandidatesInCPU->pixelSeedIndex_buf, trackCandidatesBuffers->pixelSeedIndex_buf, nTrackCanHost); alpaka::memcpy(queue, @@ -1697,10 +1679,11 @@ lst::TrackCandidatesBuffer* lst::Event::getTrackCandidatesInCMSS return trackCandidatesInCPU; } -lst::ModulesBuffer* lst::Event::getModules(bool isFull, bool sync) { +lst::ModulesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getModules(bool isFull, + bool sync) { if (modulesInCPU == nullptr) { // The last input here is just a small placeholder for the allocation. - modulesInCPU = new lst::ModulesBuffer(devHost, nModules_, nPixels_); + modulesInCPU = new ::lst::ModulesBuffer(devHost, nModules_, nPixels_); modulesInCPU->copyFromSrc(queue, modulesBuffers_, isFull); if (sync) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 64365bb58bfa8..3c3549f96d41e 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -17,192 +17,188 @@ #include "HeterogeneousCore/AlpakaInterface/interface/host.h" -namespace lst { - - using namespace ALPAKA_ACCELERATOR_NAMESPACE; - - template - class Event; - - template <> - class Event { - private: - Queue queue; - Device devAcc; - DevHost devHost; - bool addObjects; - - std::array n_hits_by_layer_barrel_; - std::array n_hits_by_layer_endcap_; - std::array n_minidoublets_by_layer_barrel_; - std::array n_minidoublets_by_layer_endcap_; - std::array n_segments_by_layer_barrel_; - std::array n_segments_by_layer_endcap_; - std::array n_triplets_by_layer_barrel_; - std::array n_triplets_by_layer_endcap_; - std::array n_trackCandidates_by_layer_barrel_; - std::array n_trackCandidates_by_layer_endcap_; - std::array n_quintuplets_by_layer_barrel_; - std::array n_quintuplets_by_layer_endcap_; - unsigned int nTotalSegments_; - - //Device stuff - ObjectRanges* rangesInGPU; - ObjectRangesBuffer* rangesBuffers; - Hits* hitsInGPU; - HitsBuffer* hitsBuffers; - MiniDoublets* mdsInGPU; - MiniDoubletsBuffer* miniDoubletsBuffers; - Segments* segmentsInGPU; - SegmentsBuffer* segmentsBuffers; - Triplets* tripletsInGPU; - TripletsBuffer* tripletsBuffers; - Quintuplets* quintupletsInGPU; - QuintupletsBuffer* quintupletsBuffers; - TrackCandidates* trackCandidatesInGPU; - TrackCandidatesBuffer* trackCandidatesBuffers; - PixelTriplets* pixelTripletsInGPU; - PixelTripletsBuffer* pixelTripletsBuffers; - PixelQuintuplets* pixelQuintupletsInGPU; - PixelQuintupletsBuffer* pixelQuintupletsBuffers; - - //CPU interface stuff - ObjectRangesBuffer* rangesInCPU; - HitsBuffer* hitsInCPU; - MiniDoubletsBuffer* mdsInCPU; - SegmentsBuffer* segmentsInCPU; - TripletsBuffer* tripletsInCPU; - TrackCandidatesBuffer* trackCandidatesInCPU; - ModulesBuffer* modulesInCPU; - QuintupletsBuffer* quintupletsInCPU; - PixelTripletsBuffer* pixelTripletsInCPU; - PixelQuintupletsBuffer* pixelQuintupletsInCPU; - - void initSync(bool verbose); - - int* superbinCPU; - int8_t* pixelTypeCPU; - - const uint16_t nModules_; - const uint16_t nLowerModules_; - const unsigned int nPixels_; - const unsigned int nEndCapMap_; - ModulesBuffer const& modulesBuffers_; - PixelMap const& pixelMapping_; - EndcapGeometryBuffer const& endcapGeometryBuffers_; - - public: - // Constructor used for CMSSW integration. Uses an external queue. - template - Event(bool verbose, TQueue const& q, const LSTESData* deviceESData) - : queue(q), - devAcc(alpaka::getDev(q)), - devHost(cms::alpakatools::host()), - nModules_(deviceESData->nModules), - nLowerModules_(deviceESData->nLowerModules), - nPixels_(deviceESData->nPixels), - nEndCapMap_(deviceESData->nEndCapMap), - modulesBuffers_(deviceESData->modulesBuffers), - pixelMapping_(*deviceESData->pixelMapping), - endcapGeometryBuffers_(deviceESData->endcapGeometryBuffers) { - initSync(verbose); - } - void resetEventSync(); // synchronizes - void wait() const { alpaka::wait(queue); } - - // Calls the appropriate hit function, then increments the counter - void addHitToEvent(std::vector const& x, - std::vector const& y, - std::vector const& z, - std::vector const& detId, - std::vector const& idxInNtuple); - void addPixelSegmentToEvent(std::vector const& hitIndices0, - std::vector const& hitIndices1, - std::vector const& hitIndices2, - std::vector const& hitIndices3, - std::vector const& dPhiChange, - std::vector const& ptIn, - std::vector const& ptErr, - std::vector const& px, - std::vector const& py, - std::vector const& pz, - std::vector const& eta, - std::vector const& etaErr, - std::vector const& phi, - std::vector const& charge, - std::vector const& seedIdx, - std::vector const& superbin, - std::vector const& pixelType, - std::vector const& isQuad); - - void createMiniDoublets(); - void createSegmentsWithModuleMap(); - void createTriplets(); - void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets); - void createPixelTriplets(); - void createQuintuplets(); - void pixelLineSegmentCleaning(bool no_pls_dupclean); - void createPixelQuintuplets(); - - // functions that map the objects to the appropriate modules - void addMiniDoubletsToEventExplicit(); - void addSegmentsToEventExplicit(); - void addQuintupletsToEventExplicit(); - void addTripletsToEventExplicit(); - void resetObjectsInModule(); - - unsigned int getNumberOfHits(); - unsigned int getNumberOfHitsByLayer(unsigned int layer); - unsigned int getNumberOfHitsByLayerBarrel(unsigned int layer); - unsigned int getNumberOfHitsByLayerEndcap(unsigned int layer); - - unsigned int getNumberOfMiniDoublets(); - unsigned int getNumberOfMiniDoubletsByLayer(unsigned int layer); - unsigned int getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer); - unsigned int getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer); - - unsigned int getNumberOfSegments(); - unsigned int getNumberOfSegmentsByLayer(unsigned int layer); - unsigned int getNumberOfSegmentsByLayerBarrel(unsigned int layer); - unsigned int getNumberOfSegmentsByLayerEndcap(unsigned int layer); - - unsigned int getNumberOfTriplets(); - unsigned int getNumberOfTripletsByLayer(unsigned int layer); - unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer); - unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer); - - int getNumberOfPixelTriplets(); - int getNumberOfPixelQuintuplets(); - - unsigned int getNumberOfQuintuplets(); - unsigned int getNumberOfQuintupletsByLayer(unsigned int layer); - unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer); - unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer); - - int getNumberOfTrackCandidates(); - int getNumberOfPT5TrackCandidates(); - int getNumberOfPT3TrackCandidates(); - int getNumberOfPLSTrackCandidates(); - int getNumberOfPixelTrackCandidates(); - int getNumberOfT5TrackCandidates(); - - // sync adds alpaka::wait at the end of filling a buffer during lazy fill - // (has no effect on repeated calls) - // set to false may allow faster operation with concurrent calls of get* - // HANDLE WITH CARE - HitsBuffer* getHits(bool sync = true); - HitsBuffer* getHitsInCMSSW(bool sync = true); - ObjectRangesBuffer* getRanges(bool sync = true); - MiniDoubletsBuffer* getMiniDoublets(bool sync = true); - SegmentsBuffer* getSegments(bool sync = true); - TripletsBuffer* getTriplets(bool sync = true); - QuintupletsBuffer* getQuintuplets(bool sync = true); - PixelTripletsBuffer* getPixelTriplets(bool sync = true); - PixelQuintupletsBuffer* getPixelQuintuplets(bool sync = true); - TrackCandidatesBuffer* getTrackCandidates(bool sync = true); - TrackCandidatesBuffer* getTrackCandidatesInCMSSW(bool sync = true); - ModulesBuffer* getModules(bool isFull = false, bool sync = true); - }; - -} // namespace lst +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace lst { + + class Event { + private: + Queue queue; + Device devAcc; + DevHost devHost; + bool addObjects; + + std::array n_hits_by_layer_barrel_; + std::array n_hits_by_layer_endcap_; + std::array n_minidoublets_by_layer_barrel_; + std::array n_minidoublets_by_layer_endcap_; + std::array n_segments_by_layer_barrel_; + std::array n_segments_by_layer_endcap_; + std::array n_triplets_by_layer_barrel_; + std::array n_triplets_by_layer_endcap_; + std::array n_trackCandidates_by_layer_barrel_; + std::array n_trackCandidates_by_layer_endcap_; + std::array n_quintuplets_by_layer_barrel_; + std::array n_quintuplets_by_layer_endcap_; + unsigned int nTotalSegments_; + + //Device stuff + ::lst::ObjectRanges* rangesInGPU; + ::lst::ObjectRangesBuffer* rangesBuffers; + ::lst::Hits* hitsInGPU; + ::lst::HitsBuffer* hitsBuffers; + ::lst::MiniDoublets* mdsInGPU; + ::lst::MiniDoubletsBuffer* miniDoubletsBuffers; + ::lst::Segments* segmentsInGPU; + ::lst::SegmentsBuffer* segmentsBuffers; + ::lst::Triplets* tripletsInGPU; + ::lst::TripletsBuffer* tripletsBuffers; + ::lst::Quintuplets* quintupletsInGPU; + ::lst::QuintupletsBuffer* quintupletsBuffers; + ::lst::TrackCandidates* trackCandidatesInGPU; + ::lst::TrackCandidatesBuffer* trackCandidatesBuffers; + ::lst::PixelTriplets* pixelTripletsInGPU; + ::lst::PixelTripletsBuffer* pixelTripletsBuffers; + ::lst::PixelQuintuplets* pixelQuintupletsInGPU; + ::lst::PixelQuintupletsBuffer* pixelQuintupletsBuffers; + + //CPU interface stuff + ::lst::ObjectRangesBuffer* rangesInCPU; + ::lst::HitsBuffer* hitsInCPU; + ::lst::MiniDoubletsBuffer* mdsInCPU; + ::lst::SegmentsBuffer* segmentsInCPU; + ::lst::TripletsBuffer* tripletsInCPU; + ::lst::TrackCandidatesBuffer* trackCandidatesInCPU; + ::lst::ModulesBuffer* modulesInCPU; + ::lst::QuintupletsBuffer* quintupletsInCPU; + ::lst::PixelTripletsBuffer* pixelTripletsInCPU; + ::lst::PixelQuintupletsBuffer* pixelQuintupletsInCPU; + + void initSync(bool verbose); + + int* superbinCPU; + int8_t* pixelTypeCPU; + + const uint16_t nModules_; + const uint16_t nLowerModules_; + const unsigned int nPixels_; + const unsigned int nEndCapMap_; + ::lst::ModulesBuffer const& modulesBuffers_; + ::lst::PixelMap const& pixelMapping_; + ::lst::EndcapGeometryBuffer const& endcapGeometryBuffers_; + + public: + // Constructor used for CMSSW integration. Uses an external queue. + Event(bool verbose, Queue const& q, const ::lst::LSTESData* deviceESData) + : queue(q), + devAcc(alpaka::getDev(q)), + devHost(cms::alpakatools::host()), + nModules_(deviceESData->nModules), + nLowerModules_(deviceESData->nLowerModules), + nPixels_(deviceESData->nPixels), + nEndCapMap_(deviceESData->nEndCapMap), + modulesBuffers_(deviceESData->modulesBuffers), + pixelMapping_(*deviceESData->pixelMapping), + endcapGeometryBuffers_(deviceESData->endcapGeometryBuffers) { + initSync(verbose); + } + void resetEventSync(); // synchronizes + void wait() const { alpaka::wait(queue); } + + // Calls the appropriate hit function, then increments the counter + void addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple); + void addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad); + + void createMiniDoublets(); + void createSegmentsWithModuleMap(); + void createTriplets(); + void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets); + void createPixelTriplets(); + void createQuintuplets(); + void pixelLineSegmentCleaning(bool no_pls_dupclean); + void createPixelQuintuplets(); + + // functions that map the objects to the appropriate modules + void addMiniDoubletsToEventExplicit(); + void addSegmentsToEventExplicit(); + void addQuintupletsToEventExplicit(); + void addTripletsToEventExplicit(); + void resetObjectsInModule(); + + unsigned int getNumberOfHits(); + unsigned int getNumberOfHitsByLayer(unsigned int layer); + unsigned int getNumberOfHitsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfHitsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfMiniDoublets(); + unsigned int getNumberOfMiniDoubletsByLayer(unsigned int layer); + unsigned int getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfSegments(); + unsigned int getNumberOfSegmentsByLayer(unsigned int layer); + unsigned int getNumberOfSegmentsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfSegmentsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfTriplets(); + unsigned int getNumberOfTripletsByLayer(unsigned int layer); + unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer); + + int getNumberOfPixelTriplets(); + int getNumberOfPixelQuintuplets(); + + unsigned int getNumberOfQuintuplets(); + unsigned int getNumberOfQuintupletsByLayer(unsigned int layer); + unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer); + + int getNumberOfTrackCandidates(); + int getNumberOfPT5TrackCandidates(); + int getNumberOfPT3TrackCandidates(); + int getNumberOfPLSTrackCandidates(); + int getNumberOfPixelTrackCandidates(); + int getNumberOfT5TrackCandidates(); + + // sync adds alpaka::wait at the end of filling a buffer during lazy fill + // (has no effect on repeated calls) + // set to false may allow faster operation with concurrent calls of get* + // HANDLE WITH CARE + ::lst::HitsBuffer* getHits(bool sync = true); + ::lst::HitsBuffer* getHitsInCMSSW(bool sync = true); + ::lst::ObjectRangesBuffer* getRanges(bool sync = true); + ::lst::MiniDoubletsBuffer* getMiniDoublets(bool sync = true); + ::lst::SegmentsBuffer* getSegments(bool sync = true); + ::lst::TripletsBuffer* getTriplets(bool sync = true); + ::lst::QuintupletsBuffer* getQuintuplets(bool sync = true); + ::lst::PixelTripletsBuffer* getPixelTriplets(bool sync = true); + ::lst::PixelQuintupletsBuffer* getPixelQuintuplets(bool sync = true); + ::lst::TrackCandidatesBuffer* getTrackCandidates(bool sync = true); + ::lst::TrackCandidatesBuffer* getTrackCandidatesInCMSSW(bool sync = true); + ::lst::ModulesBuffer* getModules(bool isFull = false, bool sync = true); + }; + + } // namespace lst + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE #endif diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index f5ee7d7f52add..e3e9909045a6d 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -19,26 +19,25 @@ namespace { } } // namespace -template <> -void lst::LST::prepareInput(std::vector const& see_px, - std::vector const& see_py, - std::vector const& see_pz, - std::vector const& see_dxy, - std::vector const& see_dz, - std::vector const& see_ptErr, - std::vector const& see_etaErr, - std::vector const& see_stateTrajGlbX, - std::vector const& see_stateTrajGlbY, - std::vector const& see_stateTrajGlbZ, - std::vector const& see_stateTrajGlbPx, - std::vector const& see_stateTrajGlbPy, - std::vector const& see_stateTrajGlbPz, - std::vector const& see_q, - std::vector> const& see_hitIdx, - std::vector const& ph2_detId, - std::vector const& ph2_x, - std::vector const& ph2_y, - std::vector const& ph2_z) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::prepareInput(std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z) { unsigned int count = 0; auto n_see = see_stateTrajGlbPx.size(); std::vector px_vec; @@ -212,25 +211,24 @@ void lst::LST::prepareInput(std::vector const& see_px, in_isQuad_vec_ = isQuad_vec; } -template <> -std::vector lst::LST::getHitIdxs(short trackCandidateType, - unsigned int TCIdx, - unsigned int const* TCHitIndices, - unsigned int const* hitIndices) { +std::vector ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::getHitIdxs(short trackCandidateType, + unsigned int TCIdx, + unsigned int const* TCHitIndices, + unsigned int const* hitIndices) { std::vector hits; unsigned int maxNHits = 0; if (trackCandidateType == 7) - maxNHits = Params_pT5::kHits; // pT5 + maxNHits = ::lst::Params_pT5::kHits; // pT5 else if (trackCandidateType == 5) - maxNHits = Params_pT3::kHits; // pT3 + maxNHits = ::lst::Params_pT3::kHits; // pT3 else if (trackCandidateType == 4) - maxNHits = Params_T5::kHits; // T5 + maxNHits = ::lst::Params_T5::kHits; // T5 else if (trackCandidateType == 8) - maxNHits = Params_pLS::kHits; // pLS + maxNHits = ::lst::Params_pLS::kHits; // pLS for (unsigned int i = 0; i < maxNHits; i++) { - unsigned int hitIdxInGPU = TCHitIndices[Params_pT5::kHits * TCIdx + i]; + unsigned int hitIdxInGPU = TCHitIndices[::lst::Params_pT5::kHits * TCIdx + i]; unsigned int hitIdx = (trackCandidateType == 8) ? hitIdxInGPU @@ -248,15 +246,14 @@ std::vector lst::LST::getHitIdxs(short trackCandidateType, return hits; } -template <> -void lst::LST::getOutput(lst::Event& event) { +void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::getOutput(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event& event) { std::vector> tc_hitIdxs; std::vector tc_len; std::vector tc_seedIdx; std::vector tc_trackCandidateType; - lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line - lst::TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data(); + ::lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line + ::lst::TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data(); unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; @@ -277,33 +274,31 @@ void lst::LST::getOutput(lst::Event& event) { out_tc_trackCandidateType_ = tc_trackCandidateType; } -template <> -template <> -void lst::LST::run(Queue& queue, - bool verbose, - LSTESData const* deviceESData, - std::vector const& see_px, - std::vector const& see_py, - std::vector const& see_pz, - std::vector const& see_dxy, - std::vector const& see_dz, - std::vector const& see_ptErr, - std::vector const& see_etaErr, - std::vector const& see_stateTrajGlbX, - std::vector const& see_stateTrajGlbY, - std::vector const& see_stateTrajGlbZ, - std::vector const& see_stateTrajGlbPx, - std::vector const& see_stateTrajGlbPy, - std::vector const& see_stateTrajGlbPz, - std::vector const& see_q, - std::vector> const& see_hitIdx, - std::vector const& ph2_detId, - std::vector const& ph2_x, - std::vector const& ph2_y, - std::vector const& ph2_z, - bool no_pls_dupclean, - bool tc_pls_triplets) { - auto event = lst::Event(verbose, queue, deviceESData); +void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::run(Queue& queue, + bool verbose, + ::lst::LSTESData const* deviceESData, + std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z, + bool no_pls_dupclean, + bool tc_pls_triplets) { + auto event = ALPAKA_ACCELERATOR_NAMESPACE::lst::Event(verbose, queue, deviceESData); prepareInput(see_px, see_py, see_pz, diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index 60e1a7428edf5..47b46d1b749dc 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -970,7 +970,7 @@ namespace lst { struct lst::Modules modulesInGPU, struct lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); @@ -1068,7 +1068,7 @@ namespace lst { struct lst::ObjectRanges rangesInGPU, struct lst::Hits hitsInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 4d957a0a1402e..09290d7ba025f 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -2670,7 +2670,7 @@ namespace lst { lst::Triplets tripletsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); @@ -2774,7 +2774,7 @@ namespace lst { lst::Quintuplets quintupletsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 6b44ddfbe24b7..7f47ff5d9a87b 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -802,7 +802,7 @@ namespace lst { lst::ObjectRanges rangesInGPU, lst::MiniDoublets mdsInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); @@ -906,7 +906,7 @@ namespace lst { lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 0439050e100d2..99faae02c286e 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -390,7 +390,7 @@ namespace lst { lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); @@ -539,7 +539,7 @@ namespace lst { lst::Segments segmentsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index c5ac432ebf310..b20cf2ebea660 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -932,7 +932,7 @@ namespace lst { lst::ObjectRanges rangesInGPU, lst::Segments segmentsInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); @@ -1035,7 +1035,7 @@ namespace lst { lst::Triplets tripletsInGPU, lst::ObjectRanges rangesInGPU) const { // implementation is 1D with a single block - static_assert(std::is_same_v, "Should be Acc1D"); + static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); auto const globalThreadIdx = alpaka::getIdx(acc); diff --git a/RecoTracker/LSTCore/standalone/bin/lst.cc b/RecoTracker/LSTCore/standalone/bin/lst.cc index 89bb43a3bcd4b..060308e4dabab 100644 --- a/RecoTracker/LSTCore/standalone/bin/lst.cc +++ b/RecoTracker/LSTCore/standalone/bin/lst.cc @@ -2,8 +2,6 @@ #include -using namespace ALPAKA_ACCELERATOR_NAMESPACE; - //___________________________________________________________________________________________________________________________________________________________________________________________ int main(int argc, char **argv) { //******************************************************************************** @@ -255,7 +253,7 @@ int main(int argc, char **argv) { // Printing out the option settings overview std::cout << "=========================================================" << std::endl; - std::cout << " Running for Acc = " << alpaka::getAccName() << std::endl; + std::cout << " Running for Acc = " << alpaka::getAccName() << std::endl; std::cout << " Setting of the analysis job based on provided arguments " << std::endl; std::cout << "---------------------------------------------------------" << std::endl; std::cout << " ana.input_file_list_tstring: " << ana.input_file_list_tstring << std::endl; @@ -298,17 +296,18 @@ int main(int argc, char **argv) { //___________________________________________________________________________________________________________________________________________________________________________________________ void run_lst() { - Device devAcc = alpaka::getDevByIdx(ALPAKA_ACCELERATOR_NAMESPACE::Platform{}, 0u); - std::vector queues; + ALPAKA_ACCELERATOR_NAMESPACE::Device devAcc = alpaka::getDevByIdx(ALPAKA_ACCELERATOR_NAMESPACE::Platform{}, 0u); + std::vector queues; for (int s = 0; s < ana.streams; s++) { - queues.push_back(Queue(devAcc)); + queues.push_back(ALPAKA_ACCELERATOR_NAMESPACE::Queue(devAcc)); } // Load various maps used in the lst reconstruction TStopwatch full_timer; full_timer.Start(); auto hostESData = lst::loadAndFillESHost(); - auto deviceESData = cms::alpakatools::CopyToDevice>::copyAsync(queues[0], *hostESData.get()); + auto deviceESData = + cms::alpakatools::CopyToDevice>::copyAsync(queues[0], *hostESData.get()); float timeForMapLoading = full_timer.RealTime() * 1000; if (ana.do_write_ntuple) { @@ -384,9 +383,10 @@ void run_lst() { full_timer.Reset(); full_timer.Start(); - std::vector *> events; + std::vector events; for (int s = 0; s < ana.streams; s++) { - lst::Event *event = new lst::Event(ana.verbose >= 2, queues[s], &deviceESData); + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event = + new ALPAKA_ACCELERATOR_NAMESPACE::lst::Event(ana.verbose >= 2, queues[s], &deviceESData); events.push_back(event); } float timeForEventCreation = full_timer.RealTime() * 1000; diff --git a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc index 76cfa9760b71a..bf513865ffbed 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc +++ b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc @@ -1,14 +1,12 @@ #include "AccessHelper.h" -using namespace ALPAKA_ACCELERATOR_NAMESPACE; - // =============== // ----* Hit *---- // =============== //____________________________________________________________________________________________ std::tuple, std::vector> convertHitsToHitIdxsAndHitTypes( - lst::Event* event, std::vector hits) { + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, std::vector hits) { lst::Hits const* hitsEvt = event->getHits()->data(); std::vector hitidxs; std::vector hittypes; @@ -27,7 +25,7 @@ std::tuple, std::vector> convertHitsToHi // =============== //____________________________________________________________________________________________ -std::vector getPixelHitsFrompLS(lst::Event* event, unsigned int pLS) { +std::vector getPixelHitsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { lst::Segments const* segments = event->getSegments()->data(); lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); @@ -46,7 +44,7 @@ std::vector getPixelHitsFrompLS(lst::Event* event, unsigned } //____________________________________________________________________________________________ -std::vector getPixelHitIdxsFrompLS(lst::Event* event, unsigned int pLS) { +std::vector getPixelHitIdxsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { lst::Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getPixelHitsFrompLS(event, pLS); std::vector hitidxs; @@ -56,15 +54,15 @@ std::vector getPixelHitIdxsFrompLS(lst::Event* event, unsig } //____________________________________________________________________________________________ -std::vector getPixelHitTypesFrompLS(lst::Event* event, unsigned int pLS) { +std::vector getPixelHitTypesFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { std::vector hits = getPixelHitsFrompLS(event, pLS); std::vector hittypes(hits.size(), 0); return hittypes; } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompLS(lst::Event* event, - unsigned pLS) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompLS( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pLS) { return convertHitsToHitIdxsAndHitTypes(event, getPixelHitsFrompLS(event, pLS)); } @@ -73,7 +71,7 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getHitsFromMD(lst::Event* event, unsigned int MD) { +std::vector getHitsFromMD(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int MD) { lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); unsigned int hit_1 = miniDoublets->anchorHitIndices[MD]; unsigned int hit_2 = miniDoublets->outerHitIndices[MD]; @@ -81,8 +79,8 @@ std::vector getHitsFromMD(lst::Event* event, unsigned int M } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromMD(lst::Event* event, - unsigned MD) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromMD( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned MD) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromMD(event, MD)); } @@ -91,7 +89,7 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getMDsFromLS(lst::Event* event, unsigned int LS) { +std::vector getMDsFromLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int LS) { lst::Segments const* segments = event->getSegments()->data(); unsigned int MD_1 = segments->mdIndices[2 * LS]; unsigned int MD_2 = segments->mdIndices[2 * LS + 1]; @@ -99,7 +97,7 @@ std::vector getMDsFromLS(lst::Event* event, unsigned int LS } //____________________________________________________________________________________________ -std::vector getHitsFromLS(lst::Event* event, unsigned int LS) { +std::vector getHitsFromLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int LS) { std::vector MDs = getMDsFromLS(event, LS); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -107,8 +105,8 @@ std::vector getHitsFromLS(lst::Event* event, unsigned int L } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromLS(lst::Event* event, - unsigned LS) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromLS( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned LS) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromLS(event, LS)); } @@ -117,7 +115,7 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromT3(lst::Event* event, unsigned int T3) { +std::vector getLSsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { lst::Triplets const* triplets = event->getTriplets()->data(); unsigned int LS_1 = triplets->segmentIndices[2 * T3]; unsigned int LS_2 = triplets->segmentIndices[2 * T3 + 1]; @@ -125,7 +123,7 @@ std::vector getLSsFromT3(lst::Event* event, unsigned int T3 } //____________________________________________________________________________________________ -std::vector getMDsFromT3(lst::Event* event, unsigned int T3) { +std::vector getMDsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { std::vector LSs = getLSsFromT3(event, T3); std::vector MDs_0 = getMDsFromLS(event, LSs[0]); std::vector MDs_1 = getMDsFromLS(event, LSs[1]); @@ -133,7 +131,7 @@ std::vector getMDsFromT3(lst::Event* event, unsigned int T3 } //____________________________________________________________________________________________ -std::vector getHitsFromT3(lst::Event* event, unsigned int T3) { +std::vector getHitsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { std::vector MDs = getMDsFromT3(event, T3); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -142,8 +140,8 @@ std::vector getHitsFromT3(lst::Event* event, unsigned int T } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromT3(lst::Event* event, - unsigned T3) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromT3( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned T3) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromT3(event, T3)); } @@ -152,7 +150,7 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getT3sFromT5(lst::Event* event, unsigned int T5) { +std::vector getT3sFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); unsigned int T3_1 = quintuplets->tripletIndices[2 * T5]; unsigned int T3_2 = quintuplets->tripletIndices[2 * T5 + 1]; @@ -160,7 +158,7 @@ std::vector getT3sFromT5(lst::Event* event, unsigned int T5 } //____________________________________________________________________________________________ -std::vector getLSsFromT5(lst::Event* event, unsigned int T5) { +std::vector getLSsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { std::vector T3s = getT3sFromT5(event, T5); std::vector LSs_0 = getLSsFromT3(event, T3s[0]); std::vector LSs_1 = getLSsFromT3(event, T3s[1]); @@ -168,7 +166,7 @@ std::vector getLSsFromT5(lst::Event* event, unsigned int T5 } //____________________________________________________________________________________________ -std::vector getMDsFromT5(lst::Event* event, unsigned int T5) { +std::vector getMDsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { std::vector LSs = getLSsFromT5(event, T5); std::vector MDs_0 = getMDsFromLS(event, LSs[0]); std::vector MDs_1 = getMDsFromLS(event, LSs[1]); @@ -178,7 +176,7 @@ std::vector getMDsFromT5(lst::Event* event, unsigned int T5 } //____________________________________________________________________________________________ -std::vector getHitsFromT5(lst::Event* event, unsigned int T5) { +std::vector getHitsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { std::vector MDs = getMDsFromT5(event, T5); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -189,7 +187,7 @@ std::vector getHitsFromT5(lst::Event* event, unsigned int T } //____________________________________________________________________________________________ -std::vector getHitIdxsFromT5(lst::Event* event, unsigned int T5) { +std::vector getHitIdxsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { lst::Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFromT5(event, T5); std::vector hitidxs; @@ -198,7 +196,7 @@ std::vector getHitIdxsFromT5(lst::Event* event, unsigned in return hitidxs; } //____________________________________________________________________________________________ -std::vector getModuleIdxsFromT5(lst::Event* event, unsigned int T5) { +std::vector getModuleIdxsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { std::vector hits = getHitsFromT5(event, T5); std::vector module_idxs; lst::Hits const* hitsEvt = event->getHits()->data(); @@ -208,14 +206,14 @@ std::vector getModuleIdxsFromT5(lst::Event* event, unsigned return module_idxs; } //____________________________________________________________________________________________ -std::vector getHitTypesFromT5(lst::Event* event, unsigned int T5) { +std::vector getHitTypesFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { return {4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; ; } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromT5(lst::Event* event, - unsigned T5) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromT5( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned T5) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromT5(event, T5)); } @@ -224,7 +222,7 @@ std::tuple, std::vector> getHitIdxsAndHi // =============== //____________________________________________________________________________________________ -unsigned int getPixelLSFrompT3(lst::Event* event, unsigned int pT3) { +unsigned int getPixelLSFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); lst::Modules const* modulesEvt = event->getModules()->data(); @@ -233,37 +231,38 @@ unsigned int getPixelLSFrompT3(lst::Event* event, unsigned int pT3) { } //____________________________________________________________________________________________ -unsigned int getT3FrompT3(lst::Event* event, unsigned int pT3) { +unsigned int getT3FrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); return pixelTriplets->tripletIndices[pT3]; } //____________________________________________________________________________________________ -std::vector getLSsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getLSsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getLSsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getMDsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getMDsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getMDsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getOuterTrackerHitsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getOuterTrackerHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, + unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getHitsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getPixelHitsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getPixelHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); return getPixelHitsFrompLS(event, pLS); } //____________________________________________________________________________________________ -std::vector getHitsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); unsigned int T3 = getT3FrompT3(event, pT3); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); @@ -273,7 +272,7 @@ std::vector getHitsFrompT3(lst::Event* event, unsigned int } //____________________________________________________________________________________________ -std::vector getHitIdxsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getHitIdxsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { lst::Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFrompT3(event, pT3); std::vector hitidxs; @@ -282,7 +281,7 @@ std::vector getHitIdxsFrompT3(lst::Event* event, unsigned i return hitidxs; } //____________________________________________________________________________________________ -std::vector getModuleIdxsFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getModuleIdxsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { std::vector hits = getOuterTrackerHitsFrompT3(event, pT3); std::vector module_idxs; lst::Hits const* hitsEvt = event->getHits()->data(); @@ -292,7 +291,7 @@ std::vector getModuleIdxsFrompT3(lst::Event* event, unsigne return module_idxs; } //____________________________________________________________________________________________ -std::vector getHitTypesFrompT3(lst::Event* event, unsigned int pT3) { +std::vector getHitTypesFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); // pixel Hits list will be either 3 or 4 and depending on it return accordingly @@ -303,8 +302,8 @@ std::vector getHitTypesFrompT3(lst::Event* event, unsigned } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompT3(lst::Event* event, - unsigned pT3) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompT3( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pT3) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFrompT3(event, pT3)); } @@ -313,7 +312,7 @@ std::tuple, std::vector> getHitIdxsAndHi // =============== //____________________________________________________________________________________________ -unsigned int getPixelLSFrompT5(lst::Event* event, unsigned int pT5) { +unsigned int getPixelLSFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); lst::Modules const* modulesEvt = event->getModules()->data(); @@ -322,43 +321,44 @@ unsigned int getPixelLSFrompT5(lst::Event* event, unsigned int pT5) { } //____________________________________________________________________________________________ -unsigned int getT5FrompT5(lst::Event* event, unsigned int pT5) { +unsigned int getT5FrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); return pixelQuintuplets->T5Indices[pT5]; } //____________________________________________________________________________________________ -std::vector getT3sFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getT3sFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getT3sFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getLSsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getLSsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getLSsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getMDsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getMDsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getMDsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getOuterTrackerHitsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getOuterTrackerHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, + unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getHitsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getPixelHitsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getPixelHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); return getPixelHitsFrompLS(event, pLS); } //____________________________________________________________________________________________ -std::vector getHitsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); unsigned int T5 = getT5FrompT5(event, pT5); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); @@ -368,7 +368,7 @@ std::vector getHitsFrompT5(lst::Event* event, unsigned int } //____________________________________________________________________________________________ -std::vector getHitIdxsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getHitIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { lst::Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFrompT5(event, pT5); std::vector hitidxs; @@ -378,7 +378,7 @@ std::vector getHitIdxsFrompT5(lst::Event* event, unsigned i } //____________________________________________________________________________________________ -std::vector getModuleIdxsFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getModuleIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { std::vector hits = getOuterTrackerHitsFrompT5(event, pT5); std::vector module_idxs; lst::Hits const* hitsEvt = event->getHits()->data(); @@ -389,7 +389,7 @@ std::vector getModuleIdxsFrompT5(lst::Event* event, unsigne } //____________________________________________________________________________________________ -std::vector getHitTypesFrompT5(lst::Event* event, unsigned int pT5) { +std::vector getHitTypesFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); // pixel Hits list will be either 3 or 4 and depending on it return accordingly @@ -400,8 +400,8 @@ std::vector getHitTypesFrompT5(lst::Event* event, unsigned } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompT5(lst::Event* event, - unsigned pT5) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompT5( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pT5) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFrompT5(event, pT5)); } @@ -410,7 +410,7 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromTC(lst::Event* event, unsigned int TC) { +std::vector getLSsFromTC(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int TC) { // Get the type of the track candidate lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[TC]; @@ -432,8 +432,8 @@ std::vector getLSsFromTC(lst::Event* event, unsigned int TC } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromTC(lst::Event* event, - unsigned TC) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromTC( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned TC) { // Get the type of the track candidate lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[TC]; diff --git a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.h b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.h index d0924518eeb4d..6c856b22915d4 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.h +++ b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.h @@ -5,7 +5,7 @@ #include #include "Event.h" -using LSTEvent = lst::Event; +using LSTEvent = ALPAKA_ACCELERATOR_NAMESPACE::lst::Event; enum { kpT5 = 7, kpT3 = 5, kT5 = 4, kpLS = 8 }; diff --git a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc index 9277b60253a64..73b5daabbfc1a 100644 --- a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc +++ b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc @@ -1,7 +1,5 @@ #include "trkCore.h" -using namespace ALPAKA_ACCELERATOR_NAMESPACE; - //___________________________________________________________________________________________________________________________________________________________________________________________ bool goodEvent() { if (ana.specific_event_index >= 0) { @@ -22,7 +20,7 @@ bool goodEvent() { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runMiniDoublet(lst::Event *event, int evt) { +float runMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, int evt) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Mini-Doublet start " << evt << std::endl; @@ -75,7 +73,7 @@ float runMiniDoublet(lst::Event *event, int evt) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runSegment(lst::Event *event) { +float runSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Segment start" << std::endl; @@ -113,7 +111,7 @@ float runSegment(lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runT3(lst::Event *event) { +float runT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco T3 start" << std::endl; @@ -155,7 +153,7 @@ float runT3(lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runpT3(lst::Event *event) { +float runpT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Triplet pT3 start" << std::endl; @@ -172,7 +170,7 @@ float runpT3(lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runQuintuplet(lst::Event *event) { +float runQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Quintuplet start" << std::endl; @@ -218,7 +216,7 @@ float runQuintuplet(lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runPixelLineSegment(lst::Event *event, bool no_pls_dupclean) { +float runPixelLineSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool no_pls_dupclean) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Line Segment start" << std::endl; @@ -233,7 +231,7 @@ float runPixelLineSegment(lst::Event *event, bool no_pls_dupclean) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runPixelQuintuplet(lst::Event *event) { +float runPixelQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Quintuplet start" << std::endl; @@ -250,7 +248,7 @@ float runPixelQuintuplet(lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runTrackCandidate(lst::Event *event, bool no_pls_dupclean, bool tc_pls_triplets) { +float runTrackCandidate(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool no_pls_dupclean, bool tc_pls_triplets) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco TrackCandidate start" << std::endl; @@ -847,7 +845,7 @@ void addInputsToLineSegmentTrackingPreLoad(std::vector> &out_ } //___________________________________________________________________________________________________________________________________________________________________________________________ -float addInputsToEventPreLoad(lst::Event *event, +float addInputsToEventPreLoad(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool useOMP, std::vector trkX, std::vector trkY, @@ -1152,7 +1150,7 @@ void writeMetaData() { // DEPRECATED FUNCTIONS //__________________________________________________________________________________________ -[[deprecated]] float addInputsToLineSegmentTracking(lst::Event &event, bool useOMP) { +[[deprecated]] float addInputsToLineSegmentTracking(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event &event, bool useOMP) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Loading Inputs (i.e. outer tracker hits, and pixel line segements) to the Line Segment Tracking.... " @@ -1348,6 +1346,6 @@ void writeMetaData() { } //__________________________________________________________________________________________ -[[deprecated]] float addInputsToLineSegmentTrackingUsingExplicitMemory(lst::Event &event) { +[[deprecated]] float addInputsToLineSegmentTrackingUsingExplicitMemory(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event &event) { return addInputsToLineSegmentTracking(event, true); } diff --git a/RecoTracker/LSTCore/standalone/code/core/trkCore.h b/RecoTracker/LSTCore/standalone/code/core/trkCore.h index 0a2fddaba9d5c..66d5c10baf431 100644 --- a/RecoTracker/LSTCore/standalone/code/core/trkCore.h +++ b/RecoTracker/LSTCore/standalone/code/core/trkCore.h @@ -11,7 +11,7 @@ #include #include -using LSTEvent = lst::Event; +using LSTEvent = ALPAKA_ACCELERATOR_NAMESPACE::lst::Event; // --------------------- ======================== --------------------- diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc index 33eaeefc2d796..911a34f519a6d 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc @@ -1,7 +1,5 @@ #include "write_lst_ntuple.h" -using namespace ALPAKA_ACCELERATOR_NAMESPACE; - //________________________________________________________________________________________________________________________________ void createOutputBranches() { createRequiredOutputBranches(); @@ -9,7 +7,7 @@ void createOutputBranches() { } //________________________________________________________________________________________________________________________________ -void fillOutputBranches(lst::Event* event) { +void fillOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { setOutputBranches(event); setOptionalOutputBranches(event); if (ana.gnn_ntuple) @@ -183,7 +181,7 @@ void createGnnNtupleBranches() { } //________________________________________________________________________________________________________________________________ -void setOutputBranches(lst::Event* event) { +void setOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { // ============ Sim tracks ============= int n_accepted_simtrk = 0; for (unsigned int isimtrk = 0; isimtrk < trk.sim_pt().size(); ++isimtrk) { @@ -278,7 +276,7 @@ void setOutputBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setOptionalOutputBranches(lst::Event* event) { +void setOptionalOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { #ifdef CUT_VALUE_DEBUG setPixelQuintupletOutputBranches(event); @@ -289,7 +287,7 @@ void setOptionalOutputBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setPixelQuintupletOutputBranches(lst::Event* event) { +void setPixelQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { // ============ pT5 ============= lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); @@ -365,7 +363,7 @@ void setPixelQuintupletOutputBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setQuintupletOutputBranches(lst::Event* event) { +void setQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); lst::ObjectRanges const* ranges = event->getRanges()->data(); lst::Modules const* modules = event->getModules()->data(); @@ -436,7 +434,7 @@ void setQuintupletOutputBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setPixelTripletOutputBranches(lst::Event* event) { +void setPixelTripletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); lst::Modules const* modules = event->getModules()->data(); lst::Segments const* segments = event->getSegments()->data(); @@ -499,7 +497,7 @@ void setPixelTripletOutputBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setGnnNtupleBranches(lst::Event* event) { +void setGnnNtupleBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { // Get relevant information lst::Segments const* segments = event->getSegments()->data(); lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); @@ -640,7 +638,7 @@ void setGnnNtupleBranches(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setGnnNtupleMiniDoublet(lst::Event* event, unsigned int MD) { +void setGnnNtupleMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int MD) { // Get relevant information lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::Hits const* hitsEvt = event->getHits()->data(); @@ -708,8 +706,8 @@ void setGnnNtupleMiniDoublet(lst::Event* event, unsigned int MD) { } //________________________________________________________________________________________________________________________________ -std::tuple> parseTrackCandidate(lst::Event* event, - unsigned int idx) { +std::tuple> parseTrackCandidate( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { // Get the type of the track candidate lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[idx]; @@ -742,8 +740,8 @@ std::tuple> parseTrackCandidate( } //________________________________________________________________________________________________________________________________ -std::tuple, std::vector> parsepT5(lst::Event* event, - unsigned int idx) { +std::tuple, std::vector> parsepT5( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { // Get relevant information lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); @@ -854,8 +852,8 @@ std::tuple, std::vector, std::vector> parsepT3(lst::Event* event, - unsigned int idx) { +std::tuple, std::vector> parsepT3( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { // Get relevant information lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); lst::Triplets const* triplets = event->getTriplets()->data(); @@ -889,8 +887,8 @@ std::tuple, std::vector, std::vector> parseT5(lst::Event* event, - unsigned int idx) { +std::tuple, std::vector> parseT5( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); unsigned int T5 = trackCandidates->directObjectIndices[idx]; @@ -923,8 +921,8 @@ std::tuple, std::vector, std::vector> parsepLS(lst::Event* event, - unsigned int idx) { +std::tuple, std::vector> parsepLS( + ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); lst::Segments const* segments = event->getSegments()->data(); @@ -944,7 +942,7 @@ std::tuple, std::vector* event) { +void printHitMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::Modules const* modules = event->getModules()->data(); lst::ObjectRanges const* ranges = event->getRanges()->data(); @@ -959,7 +957,7 @@ void printHitMultiplicities(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printMiniDoubletMultiplicities(lst::Event* event) { +void printMiniDoubletMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::Modules const* modules = event->getModules()->data(); @@ -978,7 +976,7 @@ void printMiniDoubletMultiplicities(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printAllObjects(lst::Event* event) { +void printAllObjects(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { printMDs(event); printLSs(event); printpLSs(event); @@ -986,7 +984,7 @@ void printAllObjects(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printMDs(lst::Event* event) { +void printMDs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::Hits const* hitsEvt = event->getHits()->data(); lst::Modules const* modules = event->getModules()->data(); @@ -1008,7 +1006,7 @@ void printMDs(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printLSs(lst::Event* event) { +void printLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::Segments const* segments = event->getSegments()->data(); lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::Hits const* hitsEvt = event->getHits()->data(); @@ -1040,7 +1038,7 @@ void printLSs(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printpLSs(lst::Event* event) { +void printpLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::Segments const* segments = event->getSegments()->data(); lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); lst::Hits const* hitsEvt = event->getHits()->data(); @@ -1070,7 +1068,7 @@ void printpLSs(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printT3s(lst::Event* event) { +void printT3s(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::Triplets const* triplets = event->getTriplets()->data(); lst::Segments const* segments = event->getSegments()->data(); lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); @@ -1112,7 +1110,7 @@ void printT3s(lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void debugPrintOutlierMultiplicities(lst::Event* event) { +void debugPrintOutlierMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); lst::Triplets const* triplets = event->getTriplets()->data(); lst::Segments const* segments = event->getSegments()->data(); diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h index cd20553772b9a..7a25c0d3cbcc6 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h @@ -11,7 +11,7 @@ #include "trkCore.h" #include "AccessHelper.h" -using LSTEvent = lst::Event; +using LSTEvent = ALPAKA_ACCELERATOR_NAMESPACE::lst::Event; // Common void createOutputBranches(); From 79ea879d884fba79175bb0748c5e92debf1af55a Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Sat, 17 Aug 2024 07:31:46 -0700 Subject: [PATCH 20/20] Moved all alpaka code to ALPAKA_ACCELERATOR_NAMESPACE --- .../plugins/alpaka/LSTModulesDevESProducer.cc | 2 +- RecoTracker/LST/plugins/alpaka/LSTProducer.cc | 2 +- .../LSTCore/interface/alpaka/Constants.h | 171 ++--- .../LSTCore/interface/{ => alpaka}/LST.h | 8 +- RecoTracker/LSTCore/src/alpaka/Event.dev.cc | 553 ++++++++-------- RecoTracker/LSTCore/src/alpaka/Event.h | 95 +-- RecoTracker/LSTCore/src/alpaka/Hit.h | 17 +- RecoTracker/LSTCore/src/alpaka/Kernels.h | 43 +- RecoTracker/LSTCore/src/alpaka/LST.dev.cc | 18 +- RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 302 ++++----- .../LSTCore/src/alpaka/NeuralNetwork.h | 273 ++++---- .../LSTCore/src/alpaka/NeuralNetworkWeights.h | 610 +++++++++--------- RecoTracker/LSTCore/src/alpaka/ObjectRanges.h | 4 +- .../LSTCore/src/alpaka/PixelQuintuplet.h | 246 +++---- RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 347 +++++----- RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 570 ++++++++-------- RecoTracker/LSTCore/src/alpaka/Segment.h | 136 ++-- .../LSTCore/src/alpaka/TrackCandidate.h | 86 +-- RecoTracker/LSTCore/src/alpaka/Triplet.h | 179 +++-- .../standalone/code/core/AccessHelper.cc | 166 ++--- .../standalone/code/core/AnalysisConfig.h | 2 +- .../LSTCore/standalone/code/core/trkCore.cc | 22 +- .../standalone/code/core/write_lst_ntuple.cc | 183 +++--- 23 files changed, 2001 insertions(+), 2034 deletions(-) rename RecoTracker/LSTCore/interface/{ => alpaka}/LST.h (96%) diff --git a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc index c1d815210bd53..46c99993c5ed9 100644 --- a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc +++ b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc @@ -9,7 +9,7 @@ // LST includes #include "RecoTracker/LSTCore/interface/Module.h" -#include "RecoTracker/LSTCore/interface/LST.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { diff --git a/RecoTracker/LST/plugins/alpaka/LSTProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc index 6365eb9822483..e92ff549dffd1 100644 --- a/RecoTracker/LST/plugins/alpaka/LSTProducer.cc +++ b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc @@ -19,7 +19,7 @@ #include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h" -#include "RecoTracker/LSTCore/interface/LST.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h index 459989670ccdd..9fed7760c721a 100644 --- a/RecoTracker/LSTCore/interface/alpaka/Constants.h +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -9,99 +9,118 @@ #include #endif -namespace lst { - - using namespace alpaka_common; +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace lst { + + // Re-export some useful things from the main namespace + using ::lst::allocBufWrapper; + using ::lst::Buf; + using ::lst::max_blocks; + using ::lst::max_connected_modules; + using ::lst::n_max_nonpixel_track_candidates; + using ::lst::n_max_pixel_md_per_modules; + using ::lst::n_max_pixel_quintuplets; + using ::lst::n_max_pixel_segments_per_module; + using ::lst::n_max_pixel_track_candidates; + using ::lst::n_max_pixel_triplets; + using ::lst::Params_LS; + using ::lst::Params_pLS; + using ::lst::Params_pT3; + using ::lst::Params_pT5; + using ::lst::Params_T3; + using ::lst::Params_T5; + using ::lst::size_superbins; // Half precision wrapper functions. #if defined(FP16_Base) #define __F2H __float2half #define __H2F __half2float - typedef __half float FPX; + typedef __half float FPX; #else #define __F2H #define __H2F - typedef float FPX; + typedef float FPX; #endif - Vec3D constexpr elementsPerThread(Vec3D::all(static_cast(1))); + Vec3D constexpr elementsPerThread(Vec3D::all(static_cast(1))); // Needed for files that are compiled by g++ to not throw an error. // uint4 is defined only for CUDA, so we will have to revisit this soon when running on other backends. #if !defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_HIP_ENABLED) - struct uint4 { - unsigned int x; - unsigned int y; - unsigned int z; - unsigned int w; - }; + struct uint4 { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; #endif - // Adjust grid and block sizes based on backend configuration - template > - ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, - const Vec& threadsPerBlock, - const Vec& elementsPerThreadArg) { - Vec adjustedBlocks = blocksPerGrid; - Vec adjustedThreads = threadsPerBlock; - - // special overrides for CPU/host cases - if constexpr (std::is_same_v) { - adjustedBlocks = Vec::all(static_cast(1)); - - if constexpr (alpaka::accMatchesTags) { - // Serial execution, set threads to 1 as well - adjustedThreads = Vec::all(static_cast(1)); // probably redundant + // Adjust grid and block sizes based on backend configuration + template > + ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, + const Vec& threadsPerBlock, + const Vec& elementsPerThreadArg) { + Vec adjustedBlocks = blocksPerGrid; + Vec adjustedThreads = threadsPerBlock; + + // special overrides for CPU/host cases + if constexpr (std::is_same_v) { + adjustedBlocks = Vec::all(static_cast(1)); + + if constexpr (alpaka::accMatchesTags) { + // Serial execution, set threads to 1 as well + adjustedThreads = Vec::all(static_cast(1)); // probably redundant + } } + + return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThreadArg); } - return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThreadArg); - } - - // The constants below are usually used in functions like alpaka::math::min(), - // expecting a reference (T const&) in the arguments. Hence, - // ALPAKA_STATIC_ACC_MEM_GLOBAL needs to be used in addition to constexpr. - - // 15 MeV constant from the approximate Bethe-Bloch formula - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMulsInGeV = 0.015; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleBarrel[6] = { - 0.0052, 0.0038, 0.0034, 0.0034, 0.0032, 0.0034}; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleEndcap[5] = {0.006, 0.006, 0.006, 0.006, 0.006}; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanBarrel[6] = { - 25.007152356, 37.2186993757, 52.3104270826, 68.6658656666, 85.9770373007, 108.301772384}; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanEndcap[5] = { - 130.992832231, 154.813883559, 185.352604327, 221.635123002, 265.022076742}; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float k2Rinv1GeVf = (2.99792458e-3 * 3.8) / 2; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kR1GeVf = 1. / (2.99792458e-3 * 3.8); - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kSinAlphaMax = 0.95; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float ptCut = PT_CUT; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kDeltaZLum = 15.0; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPixelPSZpitch = 0.15; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStripPSZpitch = 2.4; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStrip2SZpitch = 5.0; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidth2S = 0.009; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidthPS = 0.01; - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPt_betaMax = 7.0; - // Since C++ can't represent infinity, lst_INF = 123456789 was used to represent infinity in the data table - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float lst_INF = 123456789.0; - - namespace t5dnn { - - // Working points matching LST fake rate (43.9%) or signal acceptance (82.0%) - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp1 = 0.3418833f; // 94.0% TPR, 43.9% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp2 = 0.6177366f; // 82.0% TPR, 20.0% FPR - // Other working points - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp70 = 0.7776195f; // 70.0% TPR, 10.0% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp75 = 0.7181118f; // 75.0% TPR, 13.5% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp80 = 0.6492643f; // 80.0% TPR, 17.9% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp85 = 0.5655319f; // 85.0% TPR, 23.8% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp90 = 0.4592205f; // 90.0% TPR, 32.6% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp95 = 0.3073708f; // 95.0% TPR, 47.7% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp97p5 = 0.2001348f; // 97.5% TPR, 61.2% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99 = 0.1120605f; // 99.0% TPR, 75.9% FPR - ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99p9 = 0.0218196f; // 99.9% TPR, 95.4% FPR - - } // namespace t5dnn - -} //namespace lst + // The constants below are usually used in functions like alpaka::math::min(), + // expecting a reference (T const&) in the arguments. Hence, + // ALPAKA_STATIC_ACC_MEM_GLOBAL needs to be used in addition to constexpr. + + // 15 MeV constant from the approximate Bethe-Bloch formula + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMulsInGeV = 0.015; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleBarrel[6] = { + 0.0052, 0.0038, 0.0034, 0.0034, 0.0032, 0.0034}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleEndcap[5] = {0.006, 0.006, 0.006, 0.006, 0.006}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanBarrel[6] = { + 25.007152356, 37.2186993757, 52.3104270826, 68.6658656666, 85.9770373007, 108.301772384}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanEndcap[5] = { + 130.992832231, 154.813883559, 185.352604327, 221.635123002, 265.022076742}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float k2Rinv1GeVf = (2.99792458e-3 * 3.8) / 2; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kR1GeVf = 1. / (2.99792458e-3 * 3.8); + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kSinAlphaMax = 0.95; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float ptCut = PT_CUT; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kDeltaZLum = 15.0; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPixelPSZpitch = 0.15; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStripPSZpitch = 2.4; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStrip2SZpitch = 5.0; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidth2S = 0.009; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidthPS = 0.01; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPt_betaMax = 7.0; + // Since C++ can't represent infinity, lst_INF = 123456789 was used to represent infinity in the data table + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float lst_INF = 123456789.0; + + namespace t5dnn { + + // Working points matching LST fake rate (43.9%) or signal acceptance (82.0%) + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp1 = 0.3418833f; // 94.0% TPR, 43.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp2 = 0.6177366f; // 82.0% TPR, 20.0% FPR + // Other working points + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp70 = 0.7776195f; // 70.0% TPR, 10.0% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp75 = 0.7181118f; // 75.0% TPR, 13.5% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp80 = 0.6492643f; // 80.0% TPR, 17.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp85 = 0.5655319f; // 85.0% TPR, 23.8% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp90 = 0.4592205f; // 90.0% TPR, 32.6% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp95 = 0.3073708f; // 95.0% TPR, 47.7% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp97p5 = 0.2001348f; // 97.5% TPR, 61.2% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99 = 0.1120605f; // 99.0% TPR, 75.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99p9 = 0.0218196f; // 99.9% TPR, 95.4% FPR + + } // namespace t5dnn + + } //namespace lst +} //namespace ALPAKA_ACCELERATOR_NAMESPACE #endif diff --git a/RecoTracker/LSTCore/interface/LST.h b/RecoTracker/LSTCore/interface/alpaka/LST.h similarity index 96% rename from RecoTracker/LSTCore/interface/LST.h rename to RecoTracker/LSTCore/interface/alpaka/LST.h index a83399cbd8356..0e4c64d2535df 100644 --- a/RecoTracker/LSTCore/interface/LST.h +++ b/RecoTracker/LSTCore/interface/alpaka/LST.h @@ -1,5 +1,5 @@ -#ifndef RecoTracker_LSTCore_interface_LST_h -#define RecoTracker_LSTCore_interface_LST_h +#ifndef RecoTracker_LSTCore_interface_alpaka_LST_h +#define RecoTracker_LSTCore_interface_alpaka_LST_h #include "RecoTracker/LSTCore/interface/Constants.h" #include "RecoTracker/LSTCore/interface/LSTESData.h" @@ -8,6 +8,8 @@ #include #include +using ::lst::LSTESData; + namespace ALPAKA_ACCELERATOR_NAMESPACE { namespace lst { class Event; @@ -18,7 +20,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void run(Queue& queue, bool verbose, - ::lst::LSTESData const* deviceESData, + LSTESData const* deviceESData, std::vector const& see_px, std::vector const& see_py, std::vector const& see_pz, diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc index 1526acd8e676d..62629bb08fc52 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -7,7 +7,9 @@ using Queue = ALPAKA_ACCELERATOR_NAMESPACE::Queue; using Acc1D = ALPAKA_ACCELERATOR_NAMESPACE::Acc1D; using Acc3D = ALPAKA_ACCELERATOR_NAMESPACE::Acc3D; -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::initSync(bool verbose) { +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + +void Event::initSync(bool verbose) { alpaka::wait(queue); // other calls can be asynchronous addObjects = verbose; hitsInGPU = nullptr; @@ -50,7 +52,7 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::initSync(bool verbose) { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::resetEventSync() { +void Event::resetEventSync() { alpaka::wait(queue); // synchronize to reset consistently //reset the arrays for (int i = 0; i < 6; i++) { @@ -157,24 +159,24 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::resetEventSync() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addHitToEvent(std::vector const& x, - std::vector const& y, - std::vector const& z, - std::vector const& detId, - std::vector const& idxInNtuple) { +void Event::addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple) { // Use the actual number of hits instead of a max. unsigned int nHits = x.size(); // Initialize space on device/host for next event. if (hitsInGPU == nullptr) { - hitsInGPU = new ::lst::Hits(); - hitsBuffers = new ::lst::HitsBuffer(nModules_, nHits, devAcc, queue); + hitsInGPU = new Hits(); + hitsBuffers = new HitsBuffer(nModules_, nHits, devAcc, queue); hitsInGPU->setData(*hitsBuffers); } if (rangesInGPU == nullptr) { - rangesInGPU = new ::lst::ObjectRanges(); - rangesBuffers = new ::lst::ObjectRangesBuffer(nModules_, nLowerModules_, devAcc, queue); + rangesInGPU = new ObjectRanges(); + rangesBuffers = new ObjectRangesBuffer(nModules_, nLowerModules_, devAcc, queue); rangesInGPU->setData(*rangesBuffers); } @@ -191,12 +193,12 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addHitToEvent(std::vector alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock1{1, 1, 256}; - Vec3D const blocksPerGrid1{1, 1, ::lst::max_blocks}; - WorkDiv3D const hit_loop_workdiv = ::lst::createWorkDiv(blocksPerGrid1, threadsPerBlock1, ::lst::elementsPerThread); + Vec3D const blocksPerGrid1{1, 1, max_blocks}; + WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); alpaka::exec(queue, hit_loop_workdiv, - ::lst::HitLoopKernel{}, + HitLoopKernel{}, ::lst::Endcap, ::lst::TwoS, nModules_, @@ -208,41 +210,40 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addHitToEvent(std::vector nHits); Vec3D const threadsPerBlock2{1, 1, 256}; - Vec3D const blocksPerGrid2{1, 1, ::lst::max_blocks}; - WorkDiv3D const module_ranges_workdiv = - ::lst::createWorkDiv(blocksPerGrid2, threadsPerBlock2, ::lst::elementsPerThread); + Vec3D const blocksPerGrid2{1, 1, max_blocks}; + WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); alpaka::exec( - queue, module_ranges_workdiv, ::lst::ModuleRangesKernel{}, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); + queue, module_ranges_workdiv, ModuleRangesKernel{}, *modulesBuffers_.data(), *hitsInGPU, nLowerModules_); } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vector const& hitIndices0, - std::vector const& hitIndices1, - std::vector const& hitIndices2, - std::vector const& hitIndices3, - std::vector const& dPhiChange, - std::vector const& ptIn, - std::vector const& ptErr, - std::vector const& px, - std::vector const& py, - std::vector const& pz, - std::vector const& eta, - std::vector const& etaErr, - std::vector const& phi, - std::vector const& charge, - std::vector const& seedIdx, - std::vector const& superbin, - std::vector const& pixelType, - std::vector const& isQuad) { +void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad) { unsigned int size = ptIn.size(); - if (size > ::lst::n_max_pixel_segments_per_module) { + if (size > n_max_pixel_segments_per_module) { printf( "*********************************************************\n" "* Warning: Pixel line segments will be truncated. *\n" "* You need to increase n_max_pixel_segments_per_module. *\n" "*********************************************************\n"); - size = ::lst::n_max_pixel_segments_per_module; + size = n_max_pixel_segments_per_module; } unsigned int mdSize = 2 * size; @@ -255,24 +256,24 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vecto // Create a host buffer for a value to be passed to the device auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - *pixelMaxMDs_buf_h.data() = ::lst::n_max_pixel_md_per_modules; + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - WorkDiv1D const createMDArrayRangesGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec( - queue, createMDArrayRangesGPU_workDiv, ::lst::CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); + queue, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); alpaka::wait(queue); // wait to get the data before manipulation - *nTotalMDs_buf_h.data() += ::lst::n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); - mdsInGPU = new ::lst::MiniDoublets(); - miniDoubletsBuffers = new ::lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); + mdsInGPU = new MiniDoublets(); + miniDoubletsBuffers = new MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_buf_h); @@ -281,11 +282,11 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vecto // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them - WorkDiv1D const createSegmentArrayRanges_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, createSegmentArrayRanges_workDiv, - ::lst::CreateSegmentArrayRanges{}, + CreateSegmentArrayRanges{}, *modulesBuffers_.data(), *rangesInGPU, *mdsInGPU); @@ -295,21 +296,21 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vecto alpaka::memcpy(queue, nTotalSegments_view, rangesBuffers->device_nTotalSegs_buf); alpaka::wait(queue); // wait to get the value before manipulation - nTotalSegments_ += ::lst::n_max_pixel_segments_per_module; + nTotalSegments_ += n_max_pixel_segments_per_module; - segmentsInGPU = new ::lst::Segments(); - segmentsBuffers = new ::lst::SegmentsBuffer( - nTotalSegments_, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devAcc, queue); + segmentsInGPU = new Segments(); + segmentsBuffers = + new SegmentsBuffer(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); alpaka::memcpy(queue, segmentsBuffers->nMemoryLocations_buf, nTotalSegments_view); } - auto hitIndices0_dev = ::lst::allocBufWrapper(devAcc, size, queue); - auto hitIndices1_dev = ::lst::allocBufWrapper(devAcc, size, queue); - auto hitIndices2_dev = ::lst::allocBufWrapper(devAcc, size, queue); - auto hitIndices3_dev = ::lst::allocBufWrapper(devAcc, size, queue); - auto dPhiChange_dev = ::lst::allocBufWrapper(devAcc, size, queue); + auto hitIndices0_dev = allocBufWrapper(devAcc, size, queue); + auto hitIndices1_dev = allocBufWrapper(devAcc, size, queue); + auto hitIndices2_dev = allocBufWrapper(devAcc, size, queue); + auto hitIndices3_dev = allocBufWrapper(devAcc, size, queue); + auto dPhiChange_dev = allocBufWrapper(devAcc, size, queue); alpaka::memcpy(queue, hitIndices0_dev, hitIndices0, size); alpaka::memcpy(queue, hitIndices1_dev, hitIndices1, size); @@ -352,13 +353,12 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vecto alpaka::wait(queue); // FIXME: remove synch after inputs refactored to be in pinned memory Vec3D const threadsPerBlock{1, 1, 256}; - Vec3D const blocksPerGrid{1, 1, ::lst::max_blocks}; - WorkDiv3D const addPixelSegmentToEvent_workdiv = - ::lst::createWorkDiv(blocksPerGrid, threadsPerBlock, ::lst::elementsPerThread); + Vec3D const blocksPerGrid{1, 1, max_blocks}; + WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); alpaka::exec(queue, addPixelSegmentToEvent_workdiv, - ::lst::AddPixelSegmentToEventKernel{}, + AddPixelSegmentToEventKernel{}, *modulesBuffers_.data(), *rangesInGPU, *hitsInGPU, @@ -373,53 +373,53 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addPixelSegmentToEvent(std::vecto size); } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createMiniDoublets() { +void Event::createMiniDoublets() { // Create a view for the element nLowerModules_ inside rangesBuffers->miniDoubletModuleOccupancy auto dst_view_miniDoubletModuleOccupancy = alpaka::createSubView(rangesBuffers->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); // Create a host buffer for a value to be passed to the device auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); - *pixelMaxMDs_buf_h.data() = ::lst::n_max_pixel_md_per_modules; + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; alpaka::memcpy(queue, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); - WorkDiv1D const createMDArrayRangesGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec( - queue, createMDArrayRangesGPU_workDiv, ::lst::CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); + queue, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU); auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue, (Idx)1u); alpaka::memcpy(queue, nTotalMDs_buf_h, rangesBuffers->device_nTotalMDs_buf); alpaka::wait(queue); // wait to get the data before manipulation - *nTotalMDs_buf_h.data() += ::lst::n_max_pixel_md_per_modules; + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); if (mdsInGPU == nullptr) { - mdsInGPU = new ::lst::MiniDoublets(); - miniDoubletsBuffers = new ::lst::MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); + mdsInGPU = new MiniDoublets(); + miniDoubletsBuffers = new MiniDoubletsBuffer(nTotalMDs, nLowerModules_, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); } Vec3D const threadsPerBlockCreateMDInGPU{1, 16, 32}; Vec3D const blocksPerGridCreateMDInGPU{1, nLowerModules_ / threadsPerBlockCreateMDInGPU[1], 1}; WorkDiv3D const createMiniDoubletsInGPUv2_workDiv = - ::lst::createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, elementsPerThread); alpaka::exec(queue, createMiniDoubletsInGPUv2_workDiv, - ::lst::CreateMiniDoubletsInGPUv2{}, + CreateMiniDoubletsInGPUv2{}, *modulesBuffers_.data(), *hitsInGPU, *mdsInGPU, *rangesInGPU); - WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, addMiniDoubletRangesToEventExplicit_workDiv, - ::lst::AddMiniDoubletRangesToEventExplicit{}, + AddMiniDoubletRangesToEventExplicit{}, *modulesBuffers_.data(), *mdsInGPU, *rangesInGPU, @@ -430,32 +430,32 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createMiniDoublets() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createSegmentsWithModuleMap() { +void Event::createSegmentsWithModuleMap() { if (segmentsInGPU == nullptr) { - segmentsInGPU = new ::lst::Segments(); - segmentsBuffers = new ::lst::SegmentsBuffer( - nTotalSegments_, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devAcc, queue); + segmentsInGPU = new Segments(); + segmentsBuffers = + new SegmentsBuffer(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); } Vec3D const threadsPerBlockCreateSeg{1, 1, 64}; Vec3D const blocksPerGridCreateSeg{1, 1, nLowerModules_}; WorkDiv3D const createSegmentsInGPUv2_workDiv = - ::lst::createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); alpaka::exec(queue, createSegmentsInGPUv2_workDiv, - ::lst::CreateSegmentsInGPUv2{}, + CreateSegmentsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, *rangesInGPU); - WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, addSegmentRangesToEventExplicit_workDiv, - ::lst::AddSegmentRangesToEventExplicit{}, + AddSegmentRangesToEventExplicit{}, *modulesBuffers_.data(), *segmentsInGPU, *rangesInGPU); @@ -465,13 +465,13 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createSegmentsWithModuleMap() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { +void Event::createTriplets() { if (tripletsInGPU == nullptr) { - WorkDiv1D const createTripletArrayRanges_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, createTripletArrayRanges_workDiv, - ::lst::CreateTripletArrayRanges{}, + CreateTripletArrayRanges{}, *modulesBuffers_.data(), *rangesInGPU, *segmentsInGPU); @@ -482,8 +482,8 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { alpaka::memcpy(queue, maxTriplets_buf_h, rangesBuffers->device_nTotalTrips_buf); alpaka::wait(queue); // wait to get the value before using it - tripletsInGPU = new ::lst::Triplets(); - tripletsBuffers = new ::lst::TripletsBuffer(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); + tripletsInGPU = new Triplets(); + tripletsBuffers = new TripletsBuffer(*maxTriplets_buf_h.data(), nLowerModules_, devAcc, queue); tripletsInGPU->setData(*tripletsBuffers); alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf_h); @@ -521,17 +521,17 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { } // Allocate and copy to device index - auto index_gpu_buf = ::lst::allocBufWrapper(devAcc, nLowerModules_, queue); + auto index_gpu_buf = allocBufWrapper(devAcc, nLowerModules_, queue); alpaka::memcpy(queue, index_gpu_buf, index_buf_h, nonZeroModules); Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; - Vec3D const blocksPerGridCreateTrip{::lst::max_blocks, 1, 1}; + Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; WorkDiv3D const createTripletsInGPUv2_workDiv = - ::lst::createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); alpaka::exec(queue, createTripletsInGPUv2_workDiv, - ::lst::CreateTripletsInGPUv2{}, + CreateTripletsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -540,11 +540,11 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { index_gpu_buf.data(), nonZeroModules); - WorkDiv1D const addTripletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, addTripletRangesToEventExplicit_workDiv, - ::lst::AddTripletRangesToEventExplicit{}, + AddTripletRangesToEventExplicit{}, *modulesBuffers_.data(), *tripletsInGPU, *rangesInGPU); @@ -554,33 +554,33 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTriplets() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { +void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { if (trackCandidatesInGPU == nullptr) { - trackCandidatesInGPU = new ::lst::TrackCandidates(); - trackCandidatesBuffers = new ::lst::TrackCandidatesBuffer( - ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devAcc, queue); + trackCandidatesInGPU = new TrackCandidates(); + trackCandidatesBuffers = new TrackCandidatesBuffer( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc, queue); trackCandidatesInGPU->setData(*trackCandidatesBuffers); } Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; Vec3D const blocksPerGrid_crossCleanpT3{1, 4, 20}; WorkDiv3D const crossCleanpT3_workDiv = - ::lst::createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, ::lst::elementsPerThread); + createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread); alpaka::exec(queue, crossCleanpT3_workDiv, - ::lst::CrossCleanpT3{}, + CrossCleanpT3{}, *modulesBuffers_.data(), *rangesInGPU, *pixelTripletsInGPU, *segmentsInGPU, *pixelQuintupletsInGPU); - WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = ::lst::createWorkDiv({1}, {512}, {1}); + WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); alpaka::exec(queue, addpT3asTrackCandidatesInGPU_workDiv, - ::lst::AddpT3asTrackCandidatesInGPU{}, + AddpT3asTrackCandidatesInGPU{}, nLowerModules_, *pixelTripletsInGPU, *trackCandidatesInGPU, @@ -596,22 +596,22 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = - ::lst::createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); alpaka::exec(queue, removeDupQuintupletsInGPUBeforeTC_workDiv, - ::lst::RemoveDupQuintupletsInGPUBeforeTC{}, + RemoveDupQuintupletsInGPUBeforeTC{}, *quintupletsInGPU, *rangesInGPU); Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; - Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, ::lst::max_blocks}; + Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks}; WorkDiv3D const crossCleanT5_workDiv = - ::lst::createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, ::lst::elementsPerThread); + createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread); alpaka::exec(queue, crossCleanT5_workDiv, - ::lst::CrossCleanT5{}, + CrossCleanT5{}, *modulesBuffers_.data(), *quintupletsInGPU, *pixelQuintupletsInGPU, @@ -620,12 +620,12 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls Vec3D const threadsPerBlock_addT5asTrackCandidateInGPU{1, 8, 128}; Vec3D const blocksPerGrid_addT5asTrackCandidateInGPU{1, 8, 10}; - WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = ::lst::createWorkDiv( - blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, ::lst::elementsPerThread); + WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = createWorkDiv( + blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, elementsPerThread); alpaka::exec(queue, addT5asTrackCandidateInGPU_workDiv, - ::lst::AddT5asTrackCandidateInGPU{}, + AddT5asTrackCandidateInGPU{}, nLowerModules_, *quintupletsInGPU, *trackCandidatesInGPU, @@ -633,22 +633,21 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; - Vec3D const blocksPerGridCheckHitspLS{1, ::lst::max_blocks * 4, ::lst::max_blocks / 4}; + Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; WorkDiv3D const checkHitspLS_workDiv = - ::lst::createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - alpaka::exec( - queue, checkHitspLS_workDiv, ::lst::CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, true); + alpaka::exec(queue, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, true); } Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; Vec3D const blocksPerGrid_crossCleanpLS{1, 4, 20}; WorkDiv3D const crossCleanpLS_workDiv = - ::lst::createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, ::lst::elementsPerThread); + createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread); alpaka::exec(queue, crossCleanpLS_workDiv, - ::lst::CrossCleanpLS{}, + CrossCleanpLS{}, *modulesBuffers_.data(), *rangesInGPU, *pixelTripletsInGPU, @@ -659,23 +658,23 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls *quintupletsInGPU); Vec3D const threadsPerBlock_addpLSasTrackCandidateInGPU{1, 1, 384}; - Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, ::lst::max_blocks}; - WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = ::lst::createWorkDiv( - blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, ::lst::elementsPerThread); + Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, max_blocks}; + WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = createWorkDiv( + blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, elementsPerThread); alpaka::exec(queue, addpLSasTrackCandidateInGPU_workDiv, - ::lst::AddpLSasTrackCandidateInGPU{}, + AddpLSasTrackCandidateInGPU{}, nLowerModules_, *trackCandidatesInGPU, *segmentsInGPU, tc_pls_triplets); // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached - auto nTrackCanpT5Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); - auto nTrackCanpT3Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); - auto nTrackCanpLSHost_buf = ::lst::allocBufWrapper(devHost, 1, queue); - auto nTrackCanT5Host_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nTrackCanpT5Host_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCanpT3Host_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCanpLSHost_buf = allocBufWrapper(devHost, 1, queue); + auto nTrackCanT5Host_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nTrackCanpT5Host_buf, trackCandidatesBuffers->nTrackCandidatespT5_buf); alpaka::memcpy(queue, nTrackCanpT3Host_buf, trackCandidatesBuffers->nTrackCandidatespT3_buf); alpaka::memcpy(queue, nTrackCanpLSHost_buf, trackCandidatesBuffers->nTrackCandidatespLS_buf); @@ -686,27 +685,26 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createTrackCandidates(bool no_pls auto nTrackCandidatespT3 = *nTrackCanpT3Host_buf.data(); auto nTrackCandidatespLS = *nTrackCanpLSHost_buf.data(); auto nTrackCandidatesT5 = *nTrackCanT5Host_buf.data(); - if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == ::lst::n_max_pixel_track_candidates) || - (nTrackCandidatesT5 == ::lst::n_max_nonpixel_track_candidates)) { + if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || + (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { printf( "****************************************************************************************************\n" "* Warning: Track candidates were possibly truncated. *\n" - "* You may need to increase either ::lst::n_max_pixel_track_candidates or " - "::lst::n_max_nonpixel_track_candidates. *\n" + "* You may need to increase either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates. *\n" "* Run the code with the WARNINGS flag activated for more details. *\n" "****************************************************************************************************\n"); } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { +void Event::createPixelTriplets() { if (pixelTripletsInGPU == nullptr) { - pixelTripletsInGPU = new ::lst::PixelTriplets(); - pixelTripletsBuffers = new ::lst::PixelTripletsBuffer(::lst::n_max_pixel_triplets, devAcc, queue); + pixelTripletsInGPU = new PixelTriplets(); + pixelTripletsBuffers = new PixelTripletsBuffer(n_max_pixel_triplets, devAcc, queue); pixelTripletsInGPU->setData(*pixelTripletsBuffers); } - auto superbins_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); + auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); @@ -722,25 +720,25 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using - auto connectedPixelSize_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelIndex_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelSize_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); - auto connectedPixelIndex_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); unsigned int* connectedPixelSize_host = connectedPixelSize_host_buf.data(); unsigned int* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); - int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::lst::size_superbins - 1] + - pixelMapping_.connectedPixelsSizes[::lst::size_superbins - 1]; - int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::lst::size_superbins - 1] + - pixelMapping_.connectedPixelsSizesPos[::lst::size_superbins - 1] + pixelIndexOffsetPos; + int pixelIndexOffsetPos = + pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; // TODO: check if a map/reduction to just eligible pLSs would speed up the kernel // the current selection still leaves a significant fraction of unmatchable pLSs for (unsigned int i = 0; i < nInnerSegments; i++) { // loop over # pLS int8_t pixelType = pixelTypes[i]; // Get pixel type for this pLS int superbin = superbins[i]; // Get superbin for this pixel - if ((superbin < 0) or (superbin >= (int)::lst::size_superbins) or (pixelType > 2) or (pixelType < 0)) { + if ((superbin < 0) or (superbin >= (int)size_superbins) or (pixelType > 2) or (pixelType < 0)) { connectedPixelSize_host[i] = 0; connectedPixelIndex_host[i] = 0; continue; @@ -772,11 +770,11 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { Vec3D const threadsPerBlock{1, 4, 32}; Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; WorkDiv3D const createPixelTripletsInGPUFromMapv2_workDiv = - ::lst::createWorkDiv(blocksPerGrid, threadsPerBlock, ::lst::elementsPerThread); + createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); alpaka::exec(queue, createPixelTripletsInGPUFromMapv2_workDiv, - ::lst::CreatePixelTripletsInGPUFromMapv2{}, + CreatePixelTripletsInGPUFromMapv2{}, *modulesBuffers_.data(), *rangesInGPU, *mdsInGPU, @@ -788,7 +786,7 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { nInnerSegments); #ifdef WARNINGS - auto nPixelTriplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nPixelTriplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelTriplets_buf, pixelTripletsBuffers->nPixelTriplets_buf); alpaka::wait(queue); // wait to get the value before using it @@ -801,26 +799,24 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelTriplets() { //seems like more blocks lead to conflicting writes Vec3D const blocksPerGridDupPixTrip{1, 40, 1}; WorkDiv3D const removeDupPixelTripletsInGPUFromMap_workDiv = - ::lst::createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); - alpaka::exec(queue, - removeDupPixelTripletsInGPUFromMap_workDiv, - ::lst::RemoveDupPixelTripletsInGPUFromMap{}, - *pixelTripletsInGPU); + alpaka::exec( + queue, removeDupPixelTripletsInGPUFromMap_workDiv, RemoveDupPixelTripletsInGPUFromMap{}, *pixelTripletsInGPU); } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { - WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); +void Event::createQuintuplets() { + WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, createEligibleModulesListForQuintupletsGPU_workDiv, - ::lst::CreateEligibleModulesListForQuintupletsGPU{}, + CreateEligibleModulesListForQuintupletsGPU{}, *modulesBuffers_.data(), *tripletsInGPU, *rangesInGPU); - auto nEligibleT5Modules_buf = ::lst::allocBufWrapper(devHost, 1, queue); - auto nTotalQuintuplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nEligibleT5Modules_buf = allocBufWrapper(devHost, 1, queue); + auto nTotalQuintuplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nEligibleT5Modules_buf, rangesBuffers->nEligibleT5Modules_buf); alpaka::memcpy(queue, nTotalQuintuplets_buf, rangesBuffers->device_nTotalQuints_buf); @@ -830,8 +826,8 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); if (quintupletsInGPU == nullptr) { - quintupletsInGPU = new ::lst::Quintuplets(); - quintupletsBuffers = new ::lst::QuintupletsBuffer(nTotalQuintuplets, nLowerModules_, devAcc, queue); + quintupletsInGPU = new Quintuplets(); + quintupletsBuffers = new QuintupletsBuffer(nTotalQuintuplets, nLowerModules_, devAcc, queue); quintupletsInGPU->setData(*quintupletsBuffers); alpaka::memcpy(queue, quintupletsBuffers->nMemoryLocations_buf, nTotalQuintuplets_buf); @@ -840,11 +836,11 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { Vec3D const threadsPerBlockQuints{1, 8, 32}; Vec3D const blocksPerGridQuints{std::max((int)nEligibleT5Modules, 1), 1, 1}; WorkDiv3D const createQuintupletsInGPUv2_workDiv = - ::lst::createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); alpaka::exec(queue, createQuintupletsInGPUv2_workDiv, - ::lst::CreateQuintupletsInGPUv2{}, + CreateQuintupletsInGPUv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -854,22 +850,22 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { nEligibleT5Modules); Vec3D const threadsPerBlockDupQuint{1, 16, 16}; - Vec3D const blocksPerGridDupQuint{::lst::max_blocks, 1, 1}; + Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1}; WorkDiv3D const removeDupQuintupletsInGPUAfterBuild_workDiv = - ::lst::createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); alpaka::exec(queue, removeDupQuintupletsInGPUAfterBuild_workDiv, - ::lst::RemoveDupQuintupletsInGPUAfterBuild{}, + RemoveDupQuintupletsInGPUAfterBuild{}, *modulesBuffers_.data(), *quintupletsInGPU, *rangesInGPU); - WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = ::lst::createWorkDiv({1}, {1024}, {1}); + WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); alpaka::exec(queue, addQuintupletRangesToEventExplicit_workDiv, - ::lst::AddQuintupletRangesToEventExplicit{}, + AddQuintupletRangesToEventExplicit{}, *modulesBuffers_.data(), *quintupletsInGPU, *rangesInGPU); @@ -879,33 +875,32 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createQuintuplets() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { +void Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { if (!no_pls_dupclean) { Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; - Vec3D const blocksPerGridCheckHitspLS{1, ::lst::max_blocks * 4, ::lst::max_blocks / 4}; + Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; WorkDiv3D const checkHitspLS_workDiv = - ::lst::createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); - alpaka::exec( - queue, checkHitspLS_workDiv, ::lst::CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, false); + alpaka::exec(queue, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU, false); } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { +void Event::createPixelQuintuplets() { if (pixelQuintupletsInGPU == nullptr) { - pixelQuintupletsInGPU = new ::lst::PixelQuintuplets(); - pixelQuintupletsBuffers = new ::lst::PixelQuintupletsBuffer(::lst::n_max_pixel_quintuplets, devAcc, queue); + pixelQuintupletsInGPU = new PixelQuintuplets(); + pixelQuintupletsBuffers = new PixelQuintupletsBuffer(n_max_pixel_quintuplets, devAcc, queue); pixelQuintupletsInGPU->setData(*pixelQuintupletsBuffers); } if (trackCandidatesInGPU == nullptr) { - trackCandidatesInGPU = new ::lst::TrackCandidates(); - trackCandidatesBuffers = new ::lst::TrackCandidatesBuffer( - ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devAcc, queue); + trackCandidatesInGPU = new TrackCandidates(); + trackCandidatesBuffers = new TrackCandidatesBuffer( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc, queue); trackCandidatesInGPU->setData(*trackCandidatesBuffers); } - auto superbins_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); - auto pixelTypes_buf = ::lst::allocBufWrapper(devHost, ::lst::n_max_pixel_segments_per_module, queue); + auto superbins_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); + auto pixelTypes_buf = allocBufWrapper(devHost, n_max_pixel_segments_per_module, queue); alpaka::memcpy(queue, superbins_buf, segmentsBuffers->superbin_buf); alpaka::memcpy(queue, pixelTypes_buf, segmentsBuffers->pixelType_buf); @@ -921,24 +916,24 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, nInnerSegments_src_view, dev_view_nSegments); alpaka::wait(queue); // wait to get nInnerSegments (also superbins and pixelTypes) before using - auto connectedPixelSize_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelIndex_host_buf = ::lst::allocBufWrapper(devHost, nInnerSegments, queue); - auto connectedPixelSize_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); - auto connectedPixelIndex_dev_buf = ::lst::allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelSize_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelIndex_host_buf = allocBufWrapper(devHost, nInnerSegments, queue); + auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); + auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc, nInnerSegments, queue); auto* connectedPixelSize_host = connectedPixelSize_host_buf.data(); auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); - int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::lst::size_superbins - 1] + - pixelMapping_.connectedPixelsSizes[::lst::size_superbins - 1]; - int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::lst::size_superbins - 1] + - pixelMapping_.connectedPixelsSizesPos[::lst::size_superbins - 1] + pixelIndexOffsetPos; + int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::size_superbins - 1] + + pixelMapping_.connectedPixelsSizes[::size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[::size_superbins - 1] + pixelIndexOffsetPos; // Loop over # pLS for (unsigned int i = 0; i < nInnerSegments; i++) { int8_t pixelType = pixelTypes[i]; // Get pixel type for this pLS int superbin = superbins[i]; // Get superbin for this pixel - if ((superbin < 0) or (superbin >= (int)::lst::size_superbins) or (pixelType > 2) or (pixelType < 0)) { + if ((superbin < 0) or (superbin >= (int)::size_superbins) or (pixelType > 2) or (pixelType < 0)) { connectedPixelIndex_host[i] = 0; connectedPixelSize_host[i] = 0; continue; @@ -964,13 +959,13 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { alpaka::memcpy(queue, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; - Vec3D const blocksPerGridCreatePixQuints{16, ::lst::max_blocks, 1}; + Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; WorkDiv3D const createPixelQuintupletsInGPUFromMapv2_workDiv = - ::lst::createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); alpaka::exec(queue, createPixelQuintupletsInGPUFromMapv2_workDiv, - ::lst::CreatePixelQuintupletsInGPUFromMapv2{}, + CreatePixelQuintupletsInGPUFromMapv2{}, *modulesBuffers_.data(), *mdsInGPU, *segmentsInGPU, @@ -983,20 +978,20 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { *rangesInGPU); Vec3D const threadsPerBlockDupPix{1, 16, 16}; - Vec3D const blocksPerGridDupPix{1, ::lst::max_blocks, 1}; + Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; WorkDiv3D const removeDupPixelQuintupletsInGPUFromMap_workDiv = - ::lst::createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, ::lst::elementsPerThread); + createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); alpaka::exec(queue, removeDupPixelQuintupletsInGPUFromMap_workDiv, - ::lst::RemoveDupPixelQuintupletsInGPUFromMap{}, + RemoveDupPixelQuintupletsInGPUFromMap{}, *pixelQuintupletsInGPU); - WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = ::lst::createWorkDiv({1}, {256}, {1}); + WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = createWorkDiv({1}, {256}, {1}); alpaka::exec(queue, addpT5asTrackCandidateInGPU_workDiv, - ::lst::AddpT5asTrackCandidateInGPU{}, + AddpT5asTrackCandidateInGPU{}, nLowerModules_, *pixelQuintupletsInGPU, *trackCandidatesInGPU, @@ -1004,7 +999,7 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { *rangesInGPU); #ifdef WARNINGS - auto nPixelQuintuplets_buf = ::lst::allocBufWrapper(devHost, 1, queue); + auto nPixelQuintuplets_buf = allocBufWrapper(devHost, 1, queue); alpaka::memcpy(queue, nPixelQuintuplets_buf, pixelQuintupletsBuffers->nPixelQuintuplets_buf); alpaka::wait(queue); // wait to get the value before using it @@ -1013,18 +1008,18 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::createPixelQuintuplets() { #endif } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addMiniDoubletsToEventExplicit() { - auto nMDsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); +void Event::addMiniDoubletsToEventExplicit() { + auto nMDsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nMDsCPU_buf, miniDoubletsBuffers->nMDs_buf, nLowerModules_); // FIXME: replace by ES host data - auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - auto module_hitRanges_buf = ::lst::allocBufWrapper(devHost, nLowerModules_ * 2, queue); + auto module_hitRanges_buf = allocBufWrapper(devHost, nLowerModules_ * 2, queue); alpaka::memcpy(queue, module_hitRanges_buf, hitsBuffers->hitRanges_buf, nLowerModules_ * 2u); alpaka::wait(queue); // wait for inputs before using them @@ -1045,15 +1040,15 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addMiniDoubletsToEventExplicit() } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addSegmentsToEventExplicit() { - auto nSegmentsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); +void Event::addSegmentsToEventExplicit() { + auto nSegmentsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nSegmentsCPU_buf, segmentsBuffers->nSegments_buf, nLowerModules_); // FIXME: replace by ES host data - auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); alpaka::wait(queue); // wait for inputs before using them @@ -1073,18 +1068,18 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addSegmentsToEventExplicit() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addQuintupletsToEventExplicit() { - auto nQuintupletsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); +void Event::addQuintupletsToEventExplicit() { + auto nQuintupletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nQuintupletsCPU_buf, quintupletsBuffers->nQuintuplets_buf); // FIXME: replace by ES host data - auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nModules_, queue); + auto module_subdets_buf = allocBufWrapper(devHost, nModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); - auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); - auto module_quintupletModuleIndices_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_quintupletModuleIndices_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_quintupletModuleIndices_buf, rangesBuffers->quintupletModuleIndices_buf); alpaka::wait(queue); // wait for inputs before using them @@ -1105,15 +1100,15 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addQuintupletsToEventExplicit() { } } -void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addTripletsToEventExplicit() { - auto nTripletsCPU_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); +void Event::addTripletsToEventExplicit() { + auto nTripletsCPU_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, nTripletsCPU_buf, tripletsBuffers->nTriplets_buf); // FIXME: replace by ES host data - auto module_subdets_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); - auto module_layers_buf = ::lst::allocBufWrapper(devHost, nLowerModules_, queue); + auto module_layers_buf = allocBufWrapper(devHost, nLowerModules_, queue); alpaka::memcpy(queue, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); alpaka::wait(queue); // wait for inputs before using them @@ -1133,7 +1128,7 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::addTripletsToEventExplicit() { } } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHits() { +unsigned int Event::getNumberOfHits() { unsigned int hits = 0; for (auto& it : n_hits_by_layer_barrel_) { hits += it; @@ -1145,22 +1140,18 @@ unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHits() { return hits; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayer(unsigned int layer) { +unsigned int Event::getNumberOfHitsByLayer(unsigned int layer) { if (layer == 6) return n_hits_by_layer_barrel_[layer]; else return n_hits_by_layer_barrel_[layer] + n_hits_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayerBarrel(unsigned int layer) { - return n_hits_by_layer_barrel_[layer]; -} +unsigned int Event::getNumberOfHitsByLayerBarrel(unsigned int layer) { return n_hits_by_layer_barrel_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfHitsByLayerEndcap(unsigned int layer) { - return n_hits_by_layer_endcap_[layer]; -} +unsigned int Event::getNumberOfHitsByLayerEndcap(unsigned int layer) { return n_hits_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoublets() { +unsigned int Event::getNumberOfMiniDoublets() { unsigned int miniDoublets = 0; for (auto& it : n_minidoublets_by_layer_barrel_) { miniDoublets += it; @@ -1172,22 +1163,22 @@ unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoublets() return miniDoublets; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayer(unsigned int layer) { +unsigned int Event::getNumberOfMiniDoubletsByLayer(unsigned int layer) { if (layer == 6) return n_minidoublets_by_layer_barrel_[layer]; else return n_minidoublets_by_layer_barrel_[layer] + n_minidoublets_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) { +unsigned int Event::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) { return n_minidoublets_by_layer_barrel_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) { +unsigned int Event::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) { return n_minidoublets_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegments() { +unsigned int Event::getNumberOfSegments() { unsigned int segments = 0; for (auto& it : n_segments_by_layer_barrel_) { segments += it; @@ -1199,22 +1190,18 @@ unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegments() { return segments; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayer(unsigned int layer) { +unsigned int Event::getNumberOfSegmentsByLayer(unsigned int layer) { if (layer == 6) return n_segments_by_layer_barrel_[layer]; else return n_segments_by_layer_barrel_[layer] + n_segments_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayerBarrel(unsigned int layer) { - return n_segments_by_layer_barrel_[layer]; -} +unsigned int Event::getNumberOfSegmentsByLayerBarrel(unsigned int layer) { return n_segments_by_layer_barrel_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfSegmentsByLayerEndcap(unsigned int layer) { - return n_segments_by_layer_endcap_[layer]; -} +unsigned int Event::getNumberOfSegmentsByLayerEndcap(unsigned int layer) { return n_segments_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTriplets() { +unsigned int Event::getNumberOfTriplets() { unsigned int triplets = 0; for (auto& it : n_triplets_by_layer_barrel_) { triplets += it; @@ -1226,22 +1213,18 @@ unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTriplets() { return triplets; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayer(unsigned int layer) { +unsigned int Event::getNumberOfTripletsByLayer(unsigned int layer) { if (layer == 6) return n_triplets_by_layer_barrel_[layer]; else return n_triplets_by_layer_barrel_[layer] + n_triplets_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayerBarrel(unsigned int layer) { - return n_triplets_by_layer_barrel_[layer]; -} +unsigned int Event::getNumberOfTripletsByLayerBarrel(unsigned int layer) { return n_triplets_by_layer_barrel_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { - return n_triplets_by_layer_endcap_[layer]; -} +unsigned int Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { return n_triplets_by_layer_endcap_[layer]; } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTriplets() { +int Event::getNumberOfPixelTriplets() { auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nPixelTriplets_buf_h, pixelTripletsBuffers->nPixelTriplets_buf); @@ -1249,7 +1232,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTriplets() { return *nPixelTriplets_buf_h.data(); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelQuintuplets() { +int Event::getNumberOfPixelQuintuplets() { auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers->nPixelQuintuplets_buf); @@ -1257,7 +1240,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelQuintuplets() { return *nPixelQuintuplets_buf_h.data(); } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintuplets() { +unsigned int Event::getNumberOfQuintuplets() { unsigned int quintuplets = 0; for (auto& it : n_quintuplets_by_layer_barrel_) { quintuplets += it; @@ -1269,22 +1252,22 @@ unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintuplets() return quintuplets; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayer(unsigned int layer) { +unsigned int Event::getNumberOfQuintupletsByLayer(unsigned int layer) { if (layer == 6) return n_quintuplets_by_layer_barrel_[layer]; else return n_quintuplets_by_layer_barrel_[layer] + n_quintuplets_by_layer_endcap_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) { +unsigned int Event::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) { return n_quintuplets_by_layer_barrel_[layer]; } -unsigned int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { +unsigned int Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { return n_quintuplets_by_layer_endcap_[layer]; } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTrackCandidates() { +int Event::getNumberOfTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidates_buf_h, trackCandidatesBuffers->nTrackCandidates_buf); @@ -1292,7 +1275,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfTrackCandidates() { return *nTrackCandidates_buf_h.data(); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT5TrackCandidates() { +int Event::getNumberOfPT5TrackCandidates() { auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers->nTrackCandidatespT5_buf); @@ -1301,7 +1284,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT5TrackCandidates() { return *nTrackCandidatesPT5_buf_h.data(); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT3TrackCandidates() { +int Event::getNumberOfPT3TrackCandidates() { auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers->nTrackCandidatespT3_buf); @@ -1309,7 +1292,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPT3TrackCandidates() { return *nTrackCandidatesPT3_buf_h.data(); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPLSTrackCandidates() { +int Event::getNumberOfPLSTrackCandidates() { auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers->nTrackCandidatespLS_buf); @@ -1317,7 +1300,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPLSTrackCandidates() { return *nTrackCandidatesPLS_buf_h.data(); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTrackCandidates() { +int Event::getNumberOfPixelTrackCandidates() { auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1327,7 +1310,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfPixelTrackCandidates() return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); } -int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfT5TrackCandidates() { +int Event::getNumberOfT5TrackCandidates() { auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nTrackCandidatesT5_buf_h, trackCandidatesBuffers->nTrackCandidatesT5_buf); @@ -1335,8 +1318,7 @@ int ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getNumberOfT5TrackCandidates() { return *nTrackCandidatesT5_buf_h.data(); } -lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getHits( - bool sync) //std::shared_ptr should take care of garbage collection +HitsBuffer* Event::getHits(bool sync) //std::shared_ptr should take care of garbage collection { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1344,7 +1326,7 @@ lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Even alpaka::wait(queue); // wait for the value before using auto const nHits = *nHits_buf_h.data(); - hitsInCPU = new ::lst::HitsBuffer(nModules_, nHits, devHost, queue); + hitsInCPU = new HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); *hitsInCPU->nHits_buf.data() = nHits; @@ -1360,14 +1342,14 @@ lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Even return hitsInCPU; } -lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getHitsInCMSSW(bool sync) { +HitsBuffer* Event::getHitsInCMSSW(bool sync) { if (hitsInCPU == nullptr) { auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); alpaka::memcpy(queue, nHits_buf_h, hitsBuffers->nHits_buf); alpaka::wait(queue); // wait for the value before using auto const nHits = *nHits_buf_h.data(); - hitsInCPU = new ::lst::HitsBuffer(nModules_, nHits, devHost, queue); + hitsInCPU = new HitsBuffer(nModules_, nHits, devHost, queue); hitsInCPU->setData(*hitsInCPU); *hitsInCPU->nHits_buf.data() = nHits; @@ -1378,9 +1360,9 @@ lst::HitsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Even return hitsInCPU; } -lst::ObjectRangesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getRanges(bool sync) { +ObjectRangesBuffer* Event::getRanges(bool sync) { if (rangesInCPU == nullptr) { - rangesInCPU = new ::lst::ObjectRangesBuffer(nModules_, nLowerModules_, devHost, queue); + rangesInCPU = new ObjectRangesBuffer(nModules_, nLowerModules_, devHost, queue); rangesInCPU->setData(*rangesInCPU); alpaka::memcpy(queue, rangesInCPU->hitRanges_buf, rangesBuffers->hitRanges_buf); @@ -1394,7 +1376,7 @@ lst::ObjectRangesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::l return rangesInCPU; } -lst::MiniDoubletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getMiniDoublets(bool sync) { +MiniDoubletsBuffer* Event::getMiniDoublets(bool sync) { if (mdsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based mdsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1402,7 +1384,7 @@ lst::MiniDoubletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::l alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - mdsInCPU = new ::lst::MiniDoubletsBuffer(nMemHost, nLowerModules_, devHost, queue); + mdsInCPU = new MiniDoubletsBuffer(nMemHost, nLowerModules_, devHost, queue); mdsInCPU->setData(*mdsInCPU); *mdsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1417,7 +1399,7 @@ lst::MiniDoubletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::l return mdsInCPU; } -lst::SegmentsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getSegments(bool sync) { +SegmentsBuffer* Event::getSegments(bool sync) { if (segmentsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based segmentsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1425,8 +1407,8 @@ lst::SegmentsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst:: alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - segmentsInCPU = new ::lst::SegmentsBuffer( - nMemHost, nLowerModules_, ::lst::n_max_pixel_segments_per_module, devHost, queue); + segmentsInCPU = + new SegmentsBuffer(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, devHost, queue); segmentsInCPU->setData(*segmentsInCPU); *segmentsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1454,7 +1436,7 @@ lst::SegmentsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst:: return segmentsInCPU; } -lst::TripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTriplets(bool sync) { +TripletsBuffer* Event::getTriplets(bool sync) { if (tripletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based tripletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1462,7 +1444,7 @@ lst::TripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst:: alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - tripletsInCPU = new ::lst::TripletsBuffer(nMemHost, nLowerModules_, devHost, queue); + tripletsInCPU = new TripletsBuffer(nMemHost, nLowerModules_, devHost, queue); tripletsInCPU->setData(*tripletsInCPU); *tripletsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1477,12 +1459,9 @@ lst::TripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst:: alpaka::memcpy(queue, tripletsInCPU->rtLo_buf, tripletsBuffers->rtLo_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->rtHi_buf, tripletsBuffers->rtHi_buf, nMemHost); #endif + alpaka::memcpy(queue, tripletsInCPU->hitIndices_buf, tripletsBuffers->hitIndices_buf, Params_T3::kHits * nMemHost); alpaka::memcpy( - queue, tripletsInCPU->hitIndices_buf, tripletsBuffers->hitIndices_buf, ::lst::Params_T3::kHits * nMemHost); - alpaka::memcpy(queue, - tripletsInCPU->logicalLayers_buf, - tripletsBuffers->logicalLayers_buf, - ::lst::Params_T3::kLayers * nMemHost); + queue, tripletsInCPU->logicalLayers_buf, tripletsBuffers->logicalLayers_buf, Params_T3::kLayers * nMemHost); alpaka::memcpy(queue, tripletsInCPU->segmentIndices_buf, tripletsBuffers->segmentIndices_buf, 2 * nMemHost); alpaka::memcpy(queue, tripletsInCPU->betaIn_buf, tripletsBuffers->betaIn_buf, nMemHost); alpaka::memcpy(queue, tripletsInCPU->circleRadius_buf, tripletsBuffers->circleRadius_buf, nMemHost); @@ -1494,7 +1473,7 @@ lst::TripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst:: return tripletsInCPU; } -lst::QuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getQuintuplets(bool sync) { +QuintupletsBuffer* Event::getQuintuplets(bool sync) { if (quintupletsInCPU == nullptr) { // Get nMemoryLocations parameter to initialize host based quintupletsInCPU auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1502,7 +1481,7 @@ lst::QuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::ls alpaka::wait(queue); // wait for the value before using auto const nMemHost = *nMemHost_buf_h.data(); - quintupletsInCPU = new ::lst::QuintupletsBuffer(nMemHost, nLowerModules_, devHost, queue); + quintupletsInCPU = new QuintupletsBuffer(nMemHost, nLowerModules_, devHost, queue); quintupletsInCPU->setData(*quintupletsInCPU); *quintupletsInCPU->nMemoryLocations_buf.data() = nMemHost; @@ -1513,7 +1492,7 @@ lst::QuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::ls alpaka::memcpy(queue, quintupletsInCPU->lowerModuleIndices_buf, quintupletsBuffers->lowerModuleIndices_buf, - ::lst::Params_T5::kLayers * nMemHost); + Params_T5::kLayers * nMemHost); alpaka::memcpy(queue, quintupletsInCPU->innerRadius_buf, quintupletsBuffers->innerRadius_buf, nMemHost); alpaka::memcpy(queue, quintupletsInCPU->bridgeRadius_buf, quintupletsBuffers->bridgeRadius_buf, nMemHost); alpaka::memcpy(queue, quintupletsInCPU->outerRadius_buf, quintupletsBuffers->outerRadius_buf, nMemHost); @@ -1531,7 +1510,7 @@ lst::QuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::ls return quintupletsInCPU; } -lst::PixelTripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getPixelTriplets(bool sync) { +PixelTripletsBuffer* Event::getPixelTriplets(bool sync) { if (pixelTripletsInCPU == nullptr) { // Get nPixelTriplets parameter to initialize host based quintupletsInCPU auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1539,7 +1518,7 @@ lst::PixelTripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE:: alpaka::wait(queue); // wait for the value before using auto const nPixelTriplets = *nPixelTriplets_buf_h.data(); - pixelTripletsInCPU = new ::lst::PixelTripletsBuffer(nPixelTriplets, devHost, queue); + pixelTripletsInCPU = new PixelTripletsBuffer(nPixelTriplets, devHost, queue); pixelTripletsInCPU->setData(*pixelTripletsInCPU); *pixelTripletsInCPU->nPixelTriplets_buf.data() = nPixelTriplets; @@ -1571,8 +1550,7 @@ lst::PixelTripletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE:: return pixelTripletsInCPU; } -lst::PixelQuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getPixelQuintuplets( - bool sync) { +PixelQuintupletsBuffer* Event::getPixelQuintuplets(bool sync) { if (pixelQuintupletsInCPU == nullptr) { // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1580,7 +1558,7 @@ lst::PixelQuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPAC alpaka::wait(queue); // wait for the value before using auto const nPixelQuintuplets = *nPixelQuintuplets_buf_h.data(); - pixelQuintupletsInCPU = new ::lst::PixelQuintupletsBuffer(nPixelQuintuplets, devHost, queue); + pixelQuintupletsInCPU = new PixelQuintupletsBuffer(nPixelQuintuplets, devHost, queue); pixelQuintupletsInCPU->setData(*pixelQuintupletsInCPU); *pixelQuintupletsInCPU->nPixelQuintuplets_buf.data() = nPixelQuintuplets; @@ -1609,8 +1587,7 @@ lst::PixelQuintupletsBuffer* ALPAKA_ACCELERATOR_NAMESPAC return pixelQuintupletsInCPU; } -lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTrackCandidates( - bool sync) { +TrackCandidatesBuffer* Event::getTrackCandidates(bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1618,21 +1595,21 @@ lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE alpaka::wait(queue); auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - trackCandidatesInCPU = new ::lst::TrackCandidatesBuffer( - ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devHost, queue); + trackCandidatesInCPU = new TrackCandidatesBuffer( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, - ::lst::Params_pT5::kHits * nTrackCanHost); + Params_pT5::kHits * nTrackCanHost); alpaka::memcpy( queue, trackCandidatesInCPU->pixelSeedIndex_buf, trackCandidatesBuffers->pixelSeedIndex_buf, nTrackCanHost); alpaka::memcpy(queue, trackCandidatesInCPU->logicalLayers_buf, trackCandidatesBuffers->logicalLayers_buf, - ::lst::Params_pT5::kLayers * nTrackCanHost); + Params_pT5::kLayers * nTrackCanHost); alpaka::memcpy(queue, trackCandidatesInCPU->directObjectIndices_buf, trackCandidatesBuffers->directObjectIndices_buf, @@ -1649,8 +1626,7 @@ lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE return trackCandidatesInCPU; } -lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getTrackCandidatesInCMSSW( - bool sync) { +TrackCandidatesBuffer* Event::getTrackCandidatesInCMSSW(bool sync) { if (trackCandidatesInCPU == nullptr) { // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue, 1u); @@ -1658,15 +1634,15 @@ lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE alpaka::wait(queue); // wait for the value before using auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); - trackCandidatesInCPU = new ::lst::TrackCandidatesBuffer( - ::lst::n_max_nonpixel_track_candidates + ::lst::n_max_pixel_track_candidates, devHost, queue); + trackCandidatesInCPU = new TrackCandidatesBuffer( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devHost, queue); trackCandidatesInCPU->setData(*trackCandidatesInCPU); *trackCandidatesInCPU->nTrackCandidates_buf.data() = nTrackCanHost; alpaka::memcpy(queue, trackCandidatesInCPU->hitIndices_buf, trackCandidatesBuffers->hitIndices_buf, - ::lst::Params_pT5::kHits * nTrackCanHost); + Params_pT5::kHits * nTrackCanHost); alpaka::memcpy( queue, trackCandidatesInCPU->pixelSeedIndex_buf, trackCandidatesBuffers->pixelSeedIndex_buf, nTrackCanHost); alpaka::memcpy(queue, @@ -1679,11 +1655,10 @@ lst::TrackCandidatesBuffer* ALPAKA_ACCELERATOR_NAMESPACE return trackCandidatesInCPU; } -lst::ModulesBuffer* ALPAKA_ACCELERATOR_NAMESPACE::lst::Event::getModules(bool isFull, - bool sync) { +ModulesBuffer* Event::getModules(bool isFull, bool sync) { if (modulesInCPU == nullptr) { // The last input here is just a small placeholder for the allocation. - modulesInCPU = new ::lst::ModulesBuffer(devHost, nModules_, nPixels_); + modulesInCPU = new ModulesBuffer(devHost, nModules_, nPixels_); modulesInCPU->copyFromSrc(queue, modulesBuffers_, isFull); if (sync) diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h index 3c3549f96d41e..2ad8e150ece88 100644 --- a/RecoTracker/LSTCore/src/alpaka/Event.h +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -2,8 +2,8 @@ #define RecoTracker_LSTCore_src_alpaka_Event_h #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" #include "RecoTracker/LSTCore/interface/Module.h" -#include "RecoTracker/LSTCore/interface/LST.h" #include "Hit.h" #include "Segment.h" @@ -17,6 +17,11 @@ #include "HeterogeneousCore/AlpakaInterface/interface/host.h" +using ::lst::EndcapGeometryBuffer; +using ::lst::LSTESData; +using ::lst::ModulesBuffer; +using ::lst::PixelMap; + namespace ALPAKA_ACCELERATOR_NAMESPACE { namespace lst { @@ -42,36 +47,36 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { unsigned int nTotalSegments_; //Device stuff - ::lst::ObjectRanges* rangesInGPU; - ::lst::ObjectRangesBuffer* rangesBuffers; - ::lst::Hits* hitsInGPU; - ::lst::HitsBuffer* hitsBuffers; - ::lst::MiniDoublets* mdsInGPU; - ::lst::MiniDoubletsBuffer* miniDoubletsBuffers; - ::lst::Segments* segmentsInGPU; - ::lst::SegmentsBuffer* segmentsBuffers; - ::lst::Triplets* tripletsInGPU; - ::lst::TripletsBuffer* tripletsBuffers; - ::lst::Quintuplets* quintupletsInGPU; - ::lst::QuintupletsBuffer* quintupletsBuffers; - ::lst::TrackCandidates* trackCandidatesInGPU; - ::lst::TrackCandidatesBuffer* trackCandidatesBuffers; - ::lst::PixelTriplets* pixelTripletsInGPU; - ::lst::PixelTripletsBuffer* pixelTripletsBuffers; - ::lst::PixelQuintuplets* pixelQuintupletsInGPU; - ::lst::PixelQuintupletsBuffer* pixelQuintupletsBuffers; + ObjectRanges* rangesInGPU; + ObjectRangesBuffer* rangesBuffers; + Hits* hitsInGPU; + HitsBuffer* hitsBuffers; + MiniDoublets* mdsInGPU; + MiniDoubletsBuffer* miniDoubletsBuffers; + Segments* segmentsInGPU; + SegmentsBuffer* segmentsBuffers; + Triplets* tripletsInGPU; + TripletsBuffer* tripletsBuffers; + Quintuplets* quintupletsInGPU; + QuintupletsBuffer* quintupletsBuffers; + TrackCandidates* trackCandidatesInGPU; + TrackCandidatesBuffer* trackCandidatesBuffers; + PixelTriplets* pixelTripletsInGPU; + PixelTripletsBuffer* pixelTripletsBuffers; + PixelQuintuplets* pixelQuintupletsInGPU; + PixelQuintupletsBuffer* pixelQuintupletsBuffers; //CPU interface stuff - ::lst::ObjectRangesBuffer* rangesInCPU; - ::lst::HitsBuffer* hitsInCPU; - ::lst::MiniDoubletsBuffer* mdsInCPU; - ::lst::SegmentsBuffer* segmentsInCPU; - ::lst::TripletsBuffer* tripletsInCPU; - ::lst::TrackCandidatesBuffer* trackCandidatesInCPU; - ::lst::ModulesBuffer* modulesInCPU; - ::lst::QuintupletsBuffer* quintupletsInCPU; - ::lst::PixelTripletsBuffer* pixelTripletsInCPU; - ::lst::PixelQuintupletsBuffer* pixelQuintupletsInCPU; + ObjectRangesBuffer* rangesInCPU; + HitsBuffer* hitsInCPU; + MiniDoubletsBuffer* mdsInCPU; + SegmentsBuffer* segmentsInCPU; + TripletsBuffer* tripletsInCPU; + TrackCandidatesBuffer* trackCandidatesInCPU; + ModulesBuffer* modulesInCPU; + QuintupletsBuffer* quintupletsInCPU; + PixelTripletsBuffer* pixelTripletsInCPU; + PixelQuintupletsBuffer* pixelQuintupletsInCPU; void initSync(bool verbose); @@ -82,13 +87,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const uint16_t nLowerModules_; const unsigned int nPixels_; const unsigned int nEndCapMap_; - ::lst::ModulesBuffer const& modulesBuffers_; - ::lst::PixelMap const& pixelMapping_; - ::lst::EndcapGeometryBuffer const& endcapGeometryBuffers_; + ModulesBuffer const& modulesBuffers_; + PixelMap const& pixelMapping_; + EndcapGeometryBuffer const& endcapGeometryBuffers_; public: // Constructor used for CMSSW integration. Uses an external queue. - Event(bool verbose, Queue const& q, const ::lst::LSTESData* deviceESData) + Event(bool verbose, Queue const& q, const LSTESData* deviceESData) : queue(q), devAcc(alpaka::getDev(q)), devHost(cms::alpakatools::host()), @@ -184,18 +189,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // (has no effect on repeated calls) // set to false may allow faster operation with concurrent calls of get* // HANDLE WITH CARE - ::lst::HitsBuffer* getHits(bool sync = true); - ::lst::HitsBuffer* getHitsInCMSSW(bool sync = true); - ::lst::ObjectRangesBuffer* getRanges(bool sync = true); - ::lst::MiniDoubletsBuffer* getMiniDoublets(bool sync = true); - ::lst::SegmentsBuffer* getSegments(bool sync = true); - ::lst::TripletsBuffer* getTriplets(bool sync = true); - ::lst::QuintupletsBuffer* getQuintuplets(bool sync = true); - ::lst::PixelTripletsBuffer* getPixelTriplets(bool sync = true); - ::lst::PixelQuintupletsBuffer* getPixelQuintuplets(bool sync = true); - ::lst::TrackCandidatesBuffer* getTrackCandidates(bool sync = true); - ::lst::TrackCandidatesBuffer* getTrackCandidatesInCMSSW(bool sync = true); - ::lst::ModulesBuffer* getModules(bool isFull = false, bool sync = true); + HitsBuffer* getHits(bool sync = true); + HitsBuffer* getHitsInCMSSW(bool sync = true); + ObjectRangesBuffer* getRanges(bool sync = true); + MiniDoubletsBuffer* getMiniDoublets(bool sync = true); + SegmentsBuffer* getSegments(bool sync = true); + TripletsBuffer* getTriplets(bool sync = true); + QuintupletsBuffer* getQuintuplets(bool sync = true); + PixelTripletsBuffer* getPixelTriplets(bool sync = true); + PixelQuintupletsBuffer* getPixelQuintuplets(bool sync = true); + TrackCandidatesBuffer* getTrackCandidates(bool sync = true); + TrackCandidatesBuffer* getTrackCandidatesInCMSSW(bool sync = true); + ModulesBuffer* getModules(bool isFull = false, bool sync = true); }; } // namespace lst diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h index cb95aa14538f3..1a54008d4331c 100644 --- a/RecoTracker/LSTCore/src/alpaka/Hit.h +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -4,7 +4,9 @@ #include "RecoTracker/LSTCore/interface/alpaka/Constants.h" #include "RecoTracker/LSTCore/interface/Module.h" -namespace lst { +using ::lst::Modules; + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct Hits { unsigned int* nHits; float* xs; @@ -178,10 +180,7 @@ namespace lst { struct ModuleRangesKernel { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Hits hitsInGPU, - int nLowerModules) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, Hits hitsInGPU, int nLowerModules) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -208,8 +207,8 @@ namespace lst { unsigned int nEndCapMap, // Number of elements in endcap map const unsigned int* geoMapDetId, // DetId's from endcap map const float* geoMapPhi, // Phi values from endcap map - lst::Modules modulesInGPU, - lst::Hits hitsInGPU, + Modules modulesInGPU, + Hits hitsInGPU, unsigned int nHits) const // Total number of hits in event { auto const globalThreadIdx = alpaka::getIdx(acc); @@ -221,7 +220,7 @@ namespace lst { int iDetId = hitsInGPU.detid[ihit]; hitsInGPU.rts[ihit] = alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y); - hitsInGPU.phis[ihit] = lst::phi(acc, ihit_x, ihit_y); + hitsInGPU.phis[ihit] = phi(acc, ihit_x, ihit_y); hitsInGPU.etas[ihit] = ((ihit_z > 0) - (ihit_z < 0)) * alpaka::math::acosh( @@ -255,5 +254,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h index 31f057017a766..bc284d052cc05 100644 --- a/RecoTracker/LSTCore/src/alpaka/Kernels.h +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -13,24 +13,24 @@ #include "PixelQuintuplet.h" #include "PixelTriplet.h" -namespace lst { - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(lst::Quintuplets& quintupletsInGPU, +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(Quintuplets& quintupletsInGPU, unsigned int quintupletIndex, bool secondpass = false) { quintupletsInGPU.isDup[quintupletIndex] |= 1 + secondpass; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(lst::PixelTriplets& pixelTripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(PixelTriplets& pixelTripletsInGPU, unsigned int pixelTripletIndex) { pixelTripletsInGPU.isDup[pixelTripletIndex] = true; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(lst::PixelQuintuplets& pixelQuintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(PixelQuintuplets& pixelQuintupletsInGPU, unsigned int pixelQuintupletIndex) { pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = true; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(lst::Segments& segmentsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(Segments& segmentsInGPU, unsigned int pixelSegmentArrayIndex, bool secondpass = false) { segmentsInGPU.isDup[pixelSegmentArrayIndex] |= 1 + secondpass; @@ -38,7 +38,7 @@ namespace lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, unsigned int jx, - lst::Quintuplets const& quintupletsInGPU) { + Quintuplets const& quintupletsInGPU) { unsigned int hits1[Params_T5::kHits]; unsigned int hits2[Params_T5::kHits]; @@ -65,7 +65,7 @@ namespace lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitspT5(unsigned int ix, unsigned int jx, - lst::PixelQuintuplets const& pixelQuintupletsInGPU) { + PixelQuintuplets const& pixelQuintupletsInGPU) { unsigned int hits1[Params_pT5::kHits]; unsigned int hits2[Params_pT5::kHits]; @@ -92,7 +92,7 @@ namespace lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE void checkHitspT3(unsigned int ix, unsigned int jx, - lst::PixelTriplets const& pixelTripletsInGPU, + PixelTriplets const& pixelTripletsInGPU, int* matched) { int phits1[Params_pLS::kHits]; int phits2[Params_pLS::kHits]; @@ -145,9 +145,9 @@ namespace lst { struct RemoveDupQuintupletsInGPUAfterBuild { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Quintuplets quintupletsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Quintuplets quintupletsInGPU, + ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -168,7 +168,7 @@ namespace lst { float eta2 = __H2F(quintupletsInGPU.eta[jx]); float phi2 = __H2F(quintupletsInGPU.phi[jx]); float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float score_rphisum2 = __H2F(quintupletsInGPU.score_rphisum[jx]); if (dEta > 0.1f) @@ -194,9 +194,7 @@ namespace lst { struct RemoveDupQuintupletsInGPUBeforeTC { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Quintuplets quintupletsInGPU, - lst::ObjectRanges rangesInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, Quintuplets quintupletsInGPU, ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -240,7 +238,7 @@ namespace lst { float score_rphisum2 = __H2F(quintupletsInGPU.score_rphisum[jx]); float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); if (dEta > 0.1f) continue; @@ -269,7 +267,7 @@ namespace lst { struct RemoveDupPixelTripletsInGPUFromMap { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::PixelTriplets pixelTripletsInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTripletsInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -306,7 +304,7 @@ namespace lst { struct RemoveDupPixelQuintupletsInGPUFromMap { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::PixelQuintuplets pixelQuintupletsInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintupletsInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -333,10 +331,7 @@ namespace lst { struct CheckHitspLS { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Segments segmentsInGPU, - bool secondpass) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, Segments segmentsInGPU, bool secondpass) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -411,7 +406,7 @@ namespace lst { } if (secondpass) { float dEta = alpaka::math::abs(acc, eta_pix1 - eta_pix2); - float dPhi = lst::calculate_dPhi(phi_pix1, phi_pix2); + float dPhi = calculate_dPhi(phi_pix1, phi_pix2); float dR2 = dEta * dEta + dPhi * dPhi; if ((npMatched >= 1) || (dR2 < 1e-5f)) { @@ -422,5 +417,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc index e3e9909045a6d..e847eb892af8c 100644 --- a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -1,4 +1,4 @@ -#include "RecoTracker/LSTCore/interface/LST.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" #include "Event.h" @@ -219,16 +219,16 @@ std::vector ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::getHitIdxs(sho unsigned int maxNHits = 0; if (trackCandidateType == 7) - maxNHits = ::lst::Params_pT5::kHits; // pT5 + maxNHits = Params_pT5::kHits; // pT5 else if (trackCandidateType == 5) - maxNHits = ::lst::Params_pT3::kHits; // pT3 + maxNHits = Params_pT3::kHits; // pT3 else if (trackCandidateType == 4) - maxNHits = ::lst::Params_T5::kHits; // T5 + maxNHits = Params_T5::kHits; // T5 else if (trackCandidateType == 8) - maxNHits = ::lst::Params_pLS::kHits; // pLS + maxNHits = Params_pLS::kHits; // pLS for (unsigned int i = 0; i < maxNHits; i++) { - unsigned int hitIdxInGPU = TCHitIndices[::lst::Params_pT5::kHits * TCIdx + i]; + unsigned int hitIdxInGPU = TCHitIndices[Params_pT5::kHits * TCIdx + i]; unsigned int hitIdx = (trackCandidateType == 8) ? hitIdxInGPU @@ -252,8 +252,8 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::getOutput(ALPAKA_ACCELERATOR_NAMESP std::vector tc_seedIdx; std::vector tc_trackCandidateType; - ::lst::HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line - ::lst::TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data(); + HitsBuffer& hitsInGPU = (*event.getHitsInCMSSW(false)); // sync on next line + TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW()->data(); unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; @@ -276,7 +276,7 @@ void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::getOutput(ALPAKA_ACCELERATOR_NAMESP void ALPAKA_ACCELERATOR_NAMESPACE::lst::LST::run(Queue& queue, bool verbose, - ::lst::LSTESData const* deviceESData, + LSTESData const* deviceESData, std::vector const& see_px, std::vector const& see_py, std::vector const& see_pz, diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index 469c79233f9e0..335ceeea2ab79 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -10,7 +10,7 @@ #include "Hit.h" #include "ObjectRanges.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct MiniDoublets { unsigned int* nMemoryLocations; @@ -189,9 +189,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc, - lst::MiniDoublets& mdsInGPU, - lst::Hits const& hitsInGPU, - lst::Modules const& modulesInGPU, + MiniDoublets& mdsInGPU, + Hits const& hitsInGPU, + Modules const& modulesInGPU, unsigned int lowerHitIdx, unsigned int upperHitIdx, uint16_t lowerModuleIdx, @@ -209,7 +209,8 @@ namespace lst { mdsInGPU.moduleIndices[idx] = lowerModuleIdx; unsigned int anchorHitIndex, outerHitIndex; - if (modulesInGPU.moduleType[lowerModuleIdx] == PS and modulesInGPU.moduleLayerType[lowerModuleIdx] == Strip) { + if (modulesInGPU.moduleType[lowerModuleIdx] == ::lst::PS and + modulesInGPU.moduleLayerType[lowerModuleIdx] == ::lst::Strip) { mdsInGPU.anchorHitIndices[idx] = upperHitIdx; mdsInGPU.outerHitIndices[idx] = lowerHitIdx; @@ -261,7 +262,7 @@ namespace lst { mdsInGPU.outerLowEdgeY[idx] = hitsInGPU.lowEdgeYs[outerHitIndex]; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules(lst::Modules const& modulesInGPU, uint16_t moduleIndex) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules(Modules const& modulesInGPU, uint16_t moduleIndex) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing // This is the same as what was previously considered as"isNormalTiltedModules" // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf @@ -270,10 +271,10 @@ namespace lst { short side = modulesInGPU.sides[moduleIndex]; short rod = modulesInGPU.rods[moduleIndex]; - if (subdet == Barrel) { - if ((side != Center and layer == 3) or (side == NegZ and layer == 2 and rod > 5) or - (side == PosZ and layer == 2 and rod < 8) or (side == NegZ and layer == 1 and rod > 9) or - (side == PosZ and layer == 1 and rod < 4)) + if (subdet == ::lst::Barrel) { + if ((side != ::lst::Center and layer == 3) or (side == ::lst::NegZ and layer == 2 and rod > 5) or + (side == ::lst::PosZ and layer == 2 and rod < 8) or (side == ::lst::NegZ and layer == 1 and rod > 9) or + (side == ::lst::PosZ and layer == 1 and rod < 4)) return true; else return false; @@ -281,7 +282,7 @@ namespace lst { return false; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(lst::Modules const& modulesInGPU, uint16_t moduleIndex) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(Modules const& modulesInGPU, uint16_t moduleIndex) { float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; @@ -318,11 +319,11 @@ namespace lst { float moduleSeparation = 0; - if (subdet == Barrel and side == Center) { + if (subdet == ::lst::Barrel and side == ::lst::Center) { moduleSeparation = miniDeltaFlat[iL]; } else if (isTighterTiltedModules(modulesInGPU, moduleIndex)) { moduleSeparation = miniDeltaTilted[iL]; - } else if (subdet == Endcap) { + } else if (subdet == ::lst::Endcap) { moduleSeparation = miniDeltaEndcap[iL][iR]; } else //Loose tilted modules { @@ -334,7 +335,7 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold( - TAcc const& acc, float rt, lst::Modules const& modulesInGPU, uint16_t moduleIndex, float dPhi = 0, float dz = 0) { + TAcc const& acc, float rt, Modules const& modulesInGPU, uint16_t moduleIndex, float dPhi = 0, float dz = 0) { // ================================================================= // Various constants // ================================================================= @@ -347,16 +348,19 @@ namespace lst { unsigned int iL = modulesInGPU.layers[moduleIndex] - 1; const float miniSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax)); const float rLayNominal = - ((modulesInGPU.subdets[moduleIndex] == Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]); + ((modulesInGPU.subdets[moduleIndex] == ::lst::Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]); const float miniPVoff = 0.1f / rLayNominal; - const float miniMuls = ((modulesInGPU.subdets[moduleIndex] == Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut - : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut); - const bool isTilted = modulesInGPU.subdets[moduleIndex] == Barrel and modulesInGPU.sides[moduleIndex] != Center; + const float miniMuls = + ((modulesInGPU.subdets[moduleIndex] == ::lst::Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut); + const bool isTilted = + modulesInGPU.subdets[moduleIndex] == ::lst::Barrel and modulesInGPU.sides[moduleIndex] != ::lst::Center; //the lower module is sent in irrespective of its layer type. We need to fetch the drdz properly float drdz; if (isTilted) { - if (modulesInGPU.moduleType[moduleIndex] == PS and modulesInGPU.moduleLayerType[moduleIndex] == Strip) { + if (modulesInGPU.moduleType[moduleIndex] == ::lst::PS and + modulesInGPU.moduleLayerType[moduleIndex] == ::lst::Strip) { drdz = modulesInGPU.drdzs[moduleIndex]; } else { drdz = modulesInGPU.drdzs[modulesInGPU.partnerModuleIndices[moduleIndex]]; @@ -375,12 +379,12 @@ namespace lst { // Return the threshold value // ================================================================= // Following condition is met if the module is central and flatly lying - if (modulesInGPU.subdets[moduleIndex] == Barrel and modulesInGPU.sides[moduleIndex] == Center) { + if (modulesInGPU.subdets[moduleIndex] == ::lst::Barrel and modulesInGPU.sides[moduleIndex] == ::lst::Center) { return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff); } // Following condition is met if the module is central and tilted - else if (modulesInGPU.subdets[moduleIndex] == Barrel and - modulesInGPU.sides[moduleIndex] != Center) //all types of tilted modules + else if (modulesInGPU.subdets[moduleIndex] == ::lst::Barrel and + modulesInGPU.sides[moduleIndex] != ::lst::Center) //all types of tilted modules { return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniTilt2 * miniSlope * miniSlope); @@ -393,7 +397,7 @@ namespace lst { template ALPAKA_FN_INLINE ALPAKA_FN_ACC void shiftStripHits(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, uint16_t lowerModuleIndex, uint16_t upperModuleIndex, unsigned int lowerHitIndex, @@ -419,8 +423,8 @@ namespace lst { // lowerModule // lowerHit // upperHit - // lst::endcapGeometry - // lst::tiltedGeometry + // endcapGeometry + // tiltedGeometry // Some variables relevant to the function float xp; // pixel x (pixel hit x) @@ -449,10 +453,11 @@ namespace lst { float absdzprime; // The distance between the two points after shifting const float& drdz_ = modulesInGPU.drdzs[lowerModuleIndex]; // Assign hit pointers based on their hit type - if (modulesInGPU.moduleType[lowerModuleIndex] == PS) { + if (modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS) { // TODO: This is somewhat of an mystery.... somewhat confused why this is the case - if (modulesInGPU.subdets[lowerModuleIndex] == Barrel ? modulesInGPU.moduleLayerType[lowerModuleIndex] != Pixel - : modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + if (modulesInGPU.subdets[lowerModuleIndex] == ::lst::Barrel + ? modulesInGPU.moduleLayerType[lowerModuleIndex] != ::lst::Pixel + : modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel) { xo = xUpper; yo = yUpper; xp = xLower; @@ -477,7 +482,7 @@ namespace lst { } // If it is endcap some of the math gets simplified (and also computers don't like infinities) - isEndcap = modulesInGPU.subdets[lowerModuleIndex] == Endcap; + isEndcap = modulesInGPU.subdets[lowerModuleIndex] == ::lst::Endcap; // NOTE: TODO: Keep in mind that the sin(atan) function can be simplified to something like x / sqrt(1 + x^2) and similar for cos // I am not sure how slow sin, atan, cos, functions are in c++. If x / sqrt(1 + x^2) are faster change this later to reduce arithmetic computation time @@ -492,14 +497,15 @@ namespace lst { moduleSeparation = moduleGapSize(modulesInGPU, lowerModuleIndex); // Sign flips if the pixel is later layer - if (modulesInGPU.moduleType[lowerModuleIndex] == PS and modulesInGPU.moduleLayerType[lowerModuleIndex] != Pixel) { + if (modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS and + modulesInGPU.moduleLayerType[lowerModuleIndex] != ::lst::Pixel) { moduleSeparation *= -1; } drprime = (moduleSeparation / alpaka::math::sin(acc, angleA + angleB)) * alpaka::math::sin(acc, angleA); // Compute arctan of the slope and take care of the slope = infinity case - absArctanSlope = ((slope != lst::lst_INF) ? fabs(alpaka::math::atan(acc, slope)) : float(M_PI) / 2.f); + absArctanSlope = ((slope != lst_INF) ? fabs(alpaka::math::atan(acc, slope)) : float(M_PI) / 2.f); // Depending on which quadrant the pixel hit lies, we define the angleM by shifting them slightly differently if (xp > 0 and yp > 0) { @@ -523,7 +529,7 @@ namespace lst { // Compute the new strip hit position (if the slope value is in special condition take care of the exceptions) if (slope == - lst::lst_INF) // Designated for tilted module when the slope is exactly infinity (module lying along y-axis) + lst_INF) // Designated for tilted module when the slope is exactly infinity (module lying along y-axis) { xn = xa; // New x point is simply where the anchor is yn = yo; // No shift in y @@ -544,7 +550,7 @@ namespace lst { angleA)); // module separation sign is for shifting in radial direction for z-axis direction take care of the sign later // Depending on which one as closer to the interactin point compute the new z wrt to the pixel properly - if (modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + if (modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel) { abszn = alpaka::math::abs(acc, zp) + absdzprime; } else { abszn = alpaka::math::abs(acc, zp) - absdzprime; @@ -557,81 +563,9 @@ namespace lst { shiftedCoords[2] = zn; } - template - ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, - lst::Modules const& modulesInGPU, - uint16_t lowerModuleIndex, - uint16_t upperModuleIndex, - unsigned int lowerHitIndex, - unsigned int upperHitIndex, - float& dz, - float& dPhi, - float& dPhiChange, - float& shiftedX, - float& shiftedY, - float& shiftedZ, - float& noShiftedDphi, - float& noShiftedDphiChange, - float xLower, - float yLower, - float zLower, - float rtLower, - float xUpper, - float yUpper, - float zUpper, - float rtUpper) { - if (modulesInGPU.subdets[lowerModuleIndex] == lst::Barrel) { - return runMiniDoubletDefaultAlgoBarrel(acc, - modulesInGPU, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, - dz, - dPhi, - dPhiChange, - shiftedX, - shiftedY, - shiftedZ, - noShiftedDphi, - noShiftedDphiChange, - xLower, - yLower, - zLower, - rtLower, - xUpper, - yUpper, - zUpper, - rtUpper); - } else { - return runMiniDoubletDefaultAlgoEndcap(acc, - modulesInGPU, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, - dz, - dPhi, - dPhiChange, - shiftedX, - shiftedY, - shiftedZ, - noShiftedDphi, - noShiftedDphiChange, - xLower, - yLower, - zLower, - rtLower, - xUpper, - yUpper, - zUpper, - rtUpper); - } - } - template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, uint16_t lowerModuleIndex, uint16_t upperModuleIndex, unsigned int lowerHitIndex, @@ -653,7 +587,7 @@ namespace lst { float zUpper, float rtUpper) { dz = zLower - zUpper; - const float dzCut = modulesInGPU.moduleType[lowerModuleIndex] == lst::PS ? 2.f : 10.f; + const float dzCut = modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS ? 2.f : 10.f; const float sign = ((dz > 0) - (dz < 0)) * ((zLower > 0) - (zLower < 0)); const float invertedcrossercut = (alpaka::math::abs(acc, dz) > 2) * sign; @@ -662,7 +596,7 @@ namespace lst { float miniCut = 0; - miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == lst::Pixel + miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel ? dPhiThreshold(acc, rtLower, modulesInGPU, lowerModuleIndex) : dPhiThreshold(acc, rtUpper, modulesInGPU, lowerModuleIndex); @@ -670,7 +604,7 @@ namespace lst { // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3085 float xn = 0.f, yn = 0.f; // , zn = 0; float shiftedRt2; - if (modulesInGPU.sides[lowerModuleIndex] != Center) // If barrel and not center it is tilted + if (modulesInGPU.sides[lowerModuleIndex] != ::lst::Center) // If barrel and not center it is tilted { // Shift the hits and calculate new xn, yn position float shiftedCoords[3]; @@ -693,27 +627,27 @@ namespace lst { yn = shiftedCoords[1]; // Lower or the upper hit needs to be modified depending on which one was actually shifted - if (modulesInGPU.moduleLayerType[lowerModuleIndex] == lst::Pixel) { + if (modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel) { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; shiftedRt2 = xn * xn + yn * yn; - dPhi = lst::deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); //function from Hit.cc - noShiftedDphi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); //function from Hit.cc + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); } else { shiftedX = xn; shiftedY = yn; shiftedZ = zLower; shiftedRt2 = xn * xn + yn * yn; - dPhi = lst::deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); - noShiftedDphi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); } } else { shiftedX = 0; shiftedY = 0; shiftedZ = 0; - dPhi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); noShiftedDphi = dPhi; } @@ -722,34 +656,34 @@ namespace lst { // Cut #3: The dphi change going from lower Hit to upper Hit // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3076 - if (modulesInGPU.sides[lowerModuleIndex] != Center) { + if (modulesInGPU.sides[lowerModuleIndex] != ::lst::Center) { // When it is tilted, use the new shifted positions // TODO: This is somewhat of an mystery.... somewhat confused why this is the case - if (modulesInGPU.moduleLayerType[lowerModuleIndex] != lst::Pixel) { + if (modulesInGPU.moduleLayerType[lowerModuleIndex] != ::lst::Pixel) { // dPhi Change should be calculated so that the upper hit has higher rt. // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) // setdeltaPhiChange(lowerHit.rt() < upperHitMod.rt() ? lowerHit.deltaPhiChange(upperHitMod) : upperHitMod.deltaPhiChange(lowerHit)); - dPhiChange = (rtLower * rtLower < shiftedRt2) ? lst::deltaPhiChange(acc, xLower, yLower, shiftedX, shiftedY) - : lst::deltaPhiChange(acc, shiftedX, shiftedY, xLower, yLower); - noShiftedDphiChange = rtLower < rtUpper ? lst::deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) - : lst::deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); + dPhiChange = (rtLower * rtLower < shiftedRt2) ? deltaPhiChange(acc, xLower, yLower, shiftedX, shiftedY) + : deltaPhiChange(acc, shiftedX, shiftedY, xLower, yLower); + noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); } else { // dPhi Change should be calculated so that the upper hit has higher rt. // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) - dPhiChange = (shiftedRt2 < rtUpper * rtUpper) ? lst::deltaPhiChange(acc, shiftedX, shiftedY, xUpper, yUpper) - : lst::deltaPhiChange(acc, xUpper, yUpper, shiftedX, shiftedY); - noShiftedDphiChange = rtLower < rtUpper ? lst::deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) - : lst::deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); + dPhiChange = (shiftedRt2 < rtUpper * rtUpper) ? deltaPhiChange(acc, shiftedX, shiftedY, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, shiftedX, shiftedY); + noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); } } else { // When it is flat lying module, whichever is the lowerSide will always have rt lower - dPhiChange = lst::deltaPhiChange(acc, xLower, yLower, xUpper, yUpper); + dPhiChange = deltaPhiChange(acc, xLower, yLower, xUpper, yUpper); noShiftedDphiChange = dPhiChange; } @@ -758,7 +692,7 @@ namespace lst { template ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, uint16_t lowerModuleIndex, uint16_t upperModuleIndex, unsigned int lowerHitIndex, @@ -792,7 +726,7 @@ namespace lst { return false; // Cut #2 : drt cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3100 - const float drtCut = modulesInGPU.moduleType[lowerModuleIndex] == lst::PS ? 2.f : 10.f; + const float drtCut = modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS ? 2.f : 10.f; drt = rtLower - rtUpper; if (alpaka::math::abs(acc, drt) >= drtCut) return false; @@ -820,37 +754,37 @@ namespace lst { yn = shiftedCoords[1]; zn = shiftedCoords[2]; - if (modulesInGPU.moduleType[lowerModuleIndex] == lst::PS) { + if (modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS) { // Appropriate lower or upper hit is modified after checking which one was actually shifted - if (modulesInGPU.moduleLayerType[lowerModuleIndex] == lst::Pixel) { + if (modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel) { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; - dPhi = lst::deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); - noShiftedDphi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); } else { shiftedX = xn; shiftedY = yn; shiftedZ = zLower; - dPhi = lst::deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); - noShiftedDphi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); } } else { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; - dPhi = lst::deltaPhi(acc, xLower, yLower, xn, yn); - noShiftedDphi = lst::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dPhi = deltaPhi(acc, xLower, yLower, xn, yn); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); } // dz needs to change if it is a PS module where the strip hits are shifted in order to properly account for the case when a tilted module falls under "endcap logic" // if it was an endcap it will have zero effect - if (modulesInGPU.moduleType[lowerModuleIndex] == lst::PS) { - dz = modulesInGPU.moduleLayerType[lowerModuleIndex] == lst::Pixel ? zLower - zn : zUpper - zn; + if (modulesInGPU.moduleType[lowerModuleIndex] == ::lst::PS) { + dz = modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel ? zLower - zn : zUpper - zn; } float miniCut = 0; - miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == lst::Pixel + miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == ::lst::Pixel ? dPhiThreshold(acc, rtLower, modulesInGPU, lowerModuleIndex, dPhi, dz) : dPhiThreshold(acc, rtUpper, modulesInGPU, lowerModuleIndex, dPhi, dz); @@ -867,13 +801,82 @@ namespace lst { return alpaka::math::abs(acc, dPhiChange) < miniCut; } + template + ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, + Modules const& modulesInGPU, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + if (modulesInGPU.subdets[lowerModuleIndex] == ::lst::Barrel) { + return runMiniDoubletDefaultAlgoBarrel(acc, + modulesInGPU, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + } else { + return runMiniDoubletDefaultAlgoEndcap(acc, + modulesInGPU, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + } + } + struct CreateMiniDoubletsInGPUv2 { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Hits hitsInGPU, - lst::MiniDoublets mdsInGPU, - lst::ObjectRanges rangesInGPU) const { + ALPAKA_FN_ACC void operator()( + TAcc const& acc, Modules modulesInGPU, Hits hitsInGPU, MiniDoublets mdsInGPU, ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -966,7 +969,7 @@ namespace lst { struct CreateMDArrayRangesGPU { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, lst::Modules modulesInGPU, lst::ObjectRanges rangesInGPU) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -1060,11 +1063,8 @@ namespace lst { struct AddMiniDoubletRangesToEventExplicit { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::MiniDoublets mdsInGPU, - lst::ObjectRanges rangesInGPU, - lst::Hits hitsInGPU) const { + ALPAKA_FN_ACC void operator()( + TAcc const& acc, Modules modulesInGPU, MiniDoublets mdsInGPU, ObjectRanges rangesInGPU, Hits hitsInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -1083,5 +1083,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h index b337b5f83f8ba..85b7b08dc075b 100644 --- a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h @@ -10,153 +10,156 @@ #include "Hit.h" #include "Triplet.h" -namespace lst::t5dnn { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - template - ALPAKA_FN_ACC ALPAKA_FN_INLINE float runInference(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets const& tripletsInGPU, - const float* xVec, - const float* yVec, - const unsigned int* mdIndices, - const uint16_t* lowerModuleIndices, - unsigned int innerTripletIndex, - unsigned int outerTripletIndex, - float innerRadius, - float outerRadius, - float bridgeRadius) { - // Unpack x-coordinates of hits - float x1 = xVec[0]; - float x2 = xVec[1]; - float x3 = xVec[2]; - float x4 = xVec[3]; - float x5 = xVec[4]; - // Unpack y-coordinates of hits - float y1 = yVec[0]; - float y2 = yVec[1]; - float y3 = yVec[2]; - float y4 = yVec[3]; - float y5 = yVec[4]; - // Unpack module indices - unsigned int mdIndex1 = mdIndices[0]; - unsigned int mdIndex2 = mdIndices[1]; - unsigned int mdIndex3 = mdIndices[2]; - unsigned int mdIndex4 = mdIndices[3]; - unsigned int mdIndex5 = mdIndices[4]; - // Unpack module indices - uint16_t lowerModuleIndex1 = lowerModuleIndices[0]; - uint16_t lowerModuleIndex2 = lowerModuleIndices[1]; - uint16_t lowerModuleIndex3 = lowerModuleIndices[2]; - uint16_t lowerModuleIndex4 = lowerModuleIndices[3]; - uint16_t lowerModuleIndex5 = lowerModuleIndices[4]; - // Compute some convenience variables - short layer2_adjustment = 0; - if (modulesInGPU.layers[lowerModuleIndex1] == 1) { - layer2_adjustment = 1; // get upper segment to be in second layer - } - unsigned int md_idx_for_t5_eta_phi = - segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + layer2_adjustment]]; - bool is_endcap1 = (modulesInGPU.subdets[lowerModuleIndex1] == 4); // true if anchor hit 1 is in the endcap - bool is_endcap2 = (modulesInGPU.subdets[lowerModuleIndex2] == 4); // true if anchor hit 2 is in the endcap - bool is_endcap3 = (modulesInGPU.subdets[lowerModuleIndex3] == 4); // true if anchor hit 3 is in the endcap - bool is_endcap4 = (modulesInGPU.subdets[lowerModuleIndex4] == 4); // true if anchor hit 4 is in the endcap - bool is_endcap5 = (modulesInGPU.subdets[lowerModuleIndex5] == 4); // true if anchor hit 5 is in the endcap + namespace t5dnn { + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float runInference(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, + const float* xVec, + const float* yVec, + const unsigned int* mdIndices, + const uint16_t* lowerModuleIndices, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex, + float innerRadius, + float outerRadius, + float bridgeRadius) { + // Unpack x-coordinates of hits + float x1 = xVec[0]; + float x2 = xVec[1]; + float x3 = xVec[2]; + float x4 = xVec[3]; + float x5 = xVec[4]; + // Unpack y-coordinates of hits + float y1 = yVec[0]; + float y2 = yVec[1]; + float y3 = yVec[2]; + float y4 = yVec[3]; + float y5 = yVec[4]; + // Unpack module indices + unsigned int mdIndex1 = mdIndices[0]; + unsigned int mdIndex2 = mdIndices[1]; + unsigned int mdIndex3 = mdIndices[2]; + unsigned int mdIndex4 = mdIndices[3]; + unsigned int mdIndex5 = mdIndices[4]; + // Unpack module indices + uint16_t lowerModuleIndex1 = lowerModuleIndices[0]; + uint16_t lowerModuleIndex2 = lowerModuleIndices[1]; + uint16_t lowerModuleIndex3 = lowerModuleIndices[2]; + uint16_t lowerModuleIndex4 = lowerModuleIndices[3]; + uint16_t lowerModuleIndex5 = lowerModuleIndices[4]; + // Compute some convenience variables + short layer2_adjustment = 0; + if (modulesInGPU.layers[lowerModuleIndex1] == 1) { + layer2_adjustment = 1; // get upper segment to be in second layer + } + unsigned int md_idx_for_t5_eta_phi = + segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + layer2_adjustment]]; + bool is_endcap1 = (modulesInGPU.subdets[lowerModuleIndex1] == 4); // true if anchor hit 1 is in the endcap + bool is_endcap2 = (modulesInGPU.subdets[lowerModuleIndex2] == 4); // true if anchor hit 2 is in the endcap + bool is_endcap3 = (modulesInGPU.subdets[lowerModuleIndex3] == 4); // true if anchor hit 3 is in the endcap + bool is_endcap4 = (modulesInGPU.subdets[lowerModuleIndex4] == 4); // true if anchor hit 4 is in the endcap + bool is_endcap5 = (modulesInGPU.subdets[lowerModuleIndex5] == 4); // true if anchor hit 5 is in the endcap - // Build DNN input vector (corresponding output N-tuple branch noted in parenthetical in comment) - float x[38] = { - alpaka::math::log10(acc, 2 * lst::k2Rinv1GeVf * innerRadius), // inner T3 pT (t3_pt) - mdsInGPU.anchorEta[mdIndex1], // inner T3 anchor hit 1 eta (t3_0_eta) - mdsInGPU.anchorPhi[mdIndex1], // inner T3 anchor hit 1 phi (t3_0_phi) - mdsInGPU.anchorZ[mdIndex1], // inner T3 anchor hit 1 z (t3_0_z) - alpaka::math::sqrt(acc, x1 * x1 + y1 * y1), // inner T3 anchor hit 1 r (t3_0_r) - float(modulesInGPU.layers[lowerModuleIndex1] + 6 * is_endcap1), // inner T3 anchor hit 1 layer (t3_0_layer) - mdsInGPU.anchorEta[mdIndex2], // inner T3 anchor hit 2 eta (t3_2_eta) - mdsInGPU.anchorPhi[mdIndex2], // inner T3 anchor hit 2 phi (t3_2_phi) - mdsInGPU.anchorZ[mdIndex2], // inner T3 anchor hit 2 z (t3_2_z) - alpaka::math::sqrt(acc, x2 * x2 + y2 * y2), // inner T3 anchor hit 2 r (t3_2_r) - float(modulesInGPU.layers[lowerModuleIndex2] + 6 * is_endcap2), // inner T3 anchor hit 2 layer (t3_2_layer) - mdsInGPU.anchorEta[mdIndex3], // inner T3 anchor hit 3 eta (t3_4_eta) - mdsInGPU.anchorPhi[mdIndex3], // inner T3 anchor hit 3 phi (t3_4_phi) - mdsInGPU.anchorZ[mdIndex3], // inner T3 anchor hit 3 z (t3_4_z) - alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // inner T3 anchor hit 3 r (t3_4_r) - float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // inner T3 anchor hit 3 layer (t3_4_layer) - alpaka::math::log10(acc, 2 * lst::k2Rinv1GeVf * outerRadius), // outer T3 pT (t3_pt) - mdsInGPU.anchorEta[mdIndex3], // outer T3 anchor hit 4 eta (t3_0_eta) - mdsInGPU.anchorPhi[mdIndex3], // outer T3 anchor hit 4 phi (t3_0_phi) - mdsInGPU.anchorZ[mdIndex3], // outer T3 anchor hit 3 eta (t3_0_z) - alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // outer T3 anchor hit 3 r (t3_0_r) - float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // outer T3 anchor hit 3 layer (t3_0_layer) - mdsInGPU.anchorEta[mdIndex4], // outer T3 anchor hit 4 eta (t3_2_eta) - mdsInGPU.anchorPhi[mdIndex4], // outer T3 anchor hit 4 phi (t3_2_phi) - mdsInGPU.anchorZ[mdIndex4], // outer T3 anchor hit 4 z (t3_2_z) - alpaka::math::sqrt(acc, x4 * x4 + y4 * y4), // outer T3 anchor hit 4 r (t3_2_r) - float(modulesInGPU.layers[lowerModuleIndex4] + 6 * is_endcap4), // outer T3 anchor hit 4 layer (t3_2_layer) - mdsInGPU.anchorEta[mdIndex5], // outer T3 anchor hit 5 eta (t3_4_eta) - mdsInGPU.anchorPhi[mdIndex5], // outer T3 anchor hit 5 phi (t3_4_phi) - mdsInGPU.anchorZ[mdIndex5], // outer T3 anchor hit 5 z (t3_4_z) - alpaka::math::sqrt(acc, x5 * x5 + y5 * y5), // outer T3 anchor hit 5 r (t3_4_r) - float(modulesInGPU.layers[lowerModuleIndex5] + 6 * is_endcap5), // outer T3 anchor hit 5 layer (t3_4_layer) - alpaka::math::log10(acc, (innerRadius + outerRadius) * lst::k2Rinv1GeVf), // T5 pT (t5_pt) - mdsInGPU.anchorEta[md_idx_for_t5_eta_phi], // T5 eta (t5_eta) - mdsInGPU.anchorPhi[md_idx_for_t5_eta_phi], // T5 phi (t5_phi) - alpaka::math::log10(acc, innerRadius), // T5 inner radius (t5_innerRadius) - alpaka::math::log10(acc, bridgeRadius), // T5 bridge radius (t5_bridgeRadius) - alpaka::math::log10(acc, outerRadius) // T5 outer radius (t5_outerRadius) - }; + // Build DNN input vector (corresponding output N-tuple branch noted in parenthetical in comment) + float x[38] = { + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * innerRadius), // inner T3 pT (t3_pt) + mdsInGPU.anchorEta[mdIndex1], // inner T3 anchor hit 1 eta (t3_0_eta) + mdsInGPU.anchorPhi[mdIndex1], // inner T3 anchor hit 1 phi (t3_0_phi) + mdsInGPU.anchorZ[mdIndex1], // inner T3 anchor hit 1 z (t3_0_z) + alpaka::math::sqrt(acc, x1 * x1 + y1 * y1), // inner T3 anchor hit 1 r (t3_0_r) + float(modulesInGPU.layers[lowerModuleIndex1] + 6 * is_endcap1), // inner T3 anchor hit 1 layer (t3_0_layer) + mdsInGPU.anchorEta[mdIndex2], // inner T3 anchor hit 2 eta (t3_2_eta) + mdsInGPU.anchorPhi[mdIndex2], // inner T3 anchor hit 2 phi (t3_2_phi) + mdsInGPU.anchorZ[mdIndex2], // inner T3 anchor hit 2 z (t3_2_z) + alpaka::math::sqrt(acc, x2 * x2 + y2 * y2), // inner T3 anchor hit 2 r (t3_2_r) + float(modulesInGPU.layers[lowerModuleIndex2] + 6 * is_endcap2), // inner T3 anchor hit 2 layer (t3_2_layer) + mdsInGPU.anchorEta[mdIndex3], // inner T3 anchor hit 3 eta (t3_4_eta) + mdsInGPU.anchorPhi[mdIndex3], // inner T3 anchor hit 3 phi (t3_4_phi) + mdsInGPU.anchorZ[mdIndex3], // inner T3 anchor hit 3 z (t3_4_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // inner T3 anchor hit 3 r (t3_4_r) + float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // inner T3 anchor hit 3 layer (t3_4_layer) + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * outerRadius), // outer T3 pT (t3_pt) + mdsInGPU.anchorEta[mdIndex3], // outer T3 anchor hit 4 eta (t3_0_eta) + mdsInGPU.anchorPhi[mdIndex3], // outer T3 anchor hit 4 phi (t3_0_phi) + mdsInGPU.anchorZ[mdIndex3], // outer T3 anchor hit 3 eta (t3_0_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // outer T3 anchor hit 3 r (t3_0_r) + float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // outer T3 anchor hit 3 layer (t3_0_layer) + mdsInGPU.anchorEta[mdIndex4], // outer T3 anchor hit 4 eta (t3_2_eta) + mdsInGPU.anchorPhi[mdIndex4], // outer T3 anchor hit 4 phi (t3_2_phi) + mdsInGPU.anchorZ[mdIndex4], // outer T3 anchor hit 4 z (t3_2_z) + alpaka::math::sqrt(acc, x4 * x4 + y4 * y4), // outer T3 anchor hit 4 r (t3_2_r) + float(modulesInGPU.layers[lowerModuleIndex4] + 6 * is_endcap4), // outer T3 anchor hit 4 layer (t3_2_layer) + mdsInGPU.anchorEta[mdIndex5], // outer T3 anchor hit 5 eta (t3_4_eta) + mdsInGPU.anchorPhi[mdIndex5], // outer T3 anchor hit 5 phi (t3_4_phi) + mdsInGPU.anchorZ[mdIndex5], // outer T3 anchor hit 5 z (t3_4_z) + alpaka::math::sqrt(acc, x5 * x5 + y5 * y5), // outer T3 anchor hit 5 r (t3_4_r) + float(modulesInGPU.layers[lowerModuleIndex5] + 6 * is_endcap5), // outer T3 anchor hit 5 layer (t3_4_layer) + alpaka::math::log10(acc, (innerRadius + outerRadius) * k2Rinv1GeVf), // T5 pT (t5_pt) + mdsInGPU.anchorEta[md_idx_for_t5_eta_phi], // T5 eta (t5_eta) + mdsInGPU.anchorPhi[md_idx_for_t5_eta_phi], // T5 phi (t5_phi) + alpaka::math::log10(acc, innerRadius), // T5 inner radius (t5_innerRadius) + alpaka::math::log10(acc, bridgeRadius), // T5 bridge radius (t5_bridgeRadius) + alpaka::math::log10(acc, outerRadius) // T5 outer radius (t5_outerRadius) + }; - // (0): Linear(in_features=38, out_features=32, bias=True) => x = x*W_T + b - float x_0[32]; - for (unsigned int col = 0; col < 32; ++col) { - x_0[col] = 0; - for (unsigned int inner = 0; inner < 38; ++inner) { - x_0[col] += x[inner] * wgtT_0[inner][col]; + // (0): Linear(in_features=38, out_features=32, bias=True) => x = x*W_T + b + float x_0[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_0[col] = 0; + for (unsigned int inner = 0; inner < 38; ++inner) { + x_0[col] += x[inner] * wgtT_0[inner][col]; + } + x_0[col] += bias_0[col]; } - x_0[col] += bias_0[col]; - } - // (1): ReLU() - float x_1[32]; - for (unsigned int col = 0; col < 32; ++col) { - x_1[col] = (x_0[col] > 0.f) ? x_0[col] : 0.f; - } + // (1): ReLU() + float x_1[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_1[col] = (x_0[col] > 0.f) ? x_0[col] : 0.f; + } - // (2): Linear(in_features=32, out_features=32, bias=True) => x = x*W_T + b - float x_2[32]; - for (unsigned int col = 0; col < 32; ++col) { - x_2[col] = 0; - for (unsigned int inner = 0; inner < 32; ++inner) { - x_2[col] += x_1[inner] * wgtT_2[inner][col]; + // (2): Linear(in_features=32, out_features=32, bias=True) => x = x*W_T + b + float x_2[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_2[col] = 0; + for (unsigned int inner = 0; inner < 32; ++inner) { + x_2[col] += x_1[inner] * wgtT_2[inner][col]; + } + x_2[col] += bias_2[col]; } - x_2[col] += bias_2[col]; - } - // (3): ReLU() - float x_3[32]; - for (unsigned int col = 0; col < 32; ++col) { - x_3[col] = (x_2[col] > 0.f) ? x_2[col] : 0.f; - } + // (3): ReLU() + float x_3[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_3[col] = (x_2[col] > 0.f) ? x_2[col] : 0.f; + } - // (4): Linear(in_features=32, out_features=1, bias=True) => x = x*W_T + b - float x_4[1]; - for (unsigned int col = 0; col < 1; ++col) { - x_4[col] = 0; - for (unsigned int inner = 0; inner < 32; ++inner) { - x_4[col] += x_3[inner] * wgtT_4[inner][col]; + // (4): Linear(in_features=32, out_features=1, bias=True) => x = x*W_T + b + float x_4[1]; + for (unsigned int col = 0; col < 1; ++col) { + x_4[col] = 0; + for (unsigned int inner = 0; inner < 32; ++inner) { + x_4[col] += x_3[inner] * wgtT_4[inner][col]; + } + x_4[col] += bias_4[col]; } - x_4[col] += bias_4[col]; - } - // (5): Sigmoid() - float x_5[1]; - for (unsigned int col = 0; col < 1; ++col) { - x_5[col] = alpaka::math::exp(acc, x_4[col]) / (alpaka::math::exp(acc, x_4[col]) + 1); - } + // (5): Sigmoid() + float x_5[1]; + for (unsigned int col = 0; col < 1; ++col) { + x_5[col] = alpaka::math::exp(acc, x_4[col]) / (alpaka::math::exp(acc, x_4[col]) + 1); + } - return x_5[0]; - } + return x_5[0]; + } -} //namespace lst::t5dnn + } // namespace t5dnn +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h index d7b2f03937bdb..d5321fea07a6e 100644 --- a/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h @@ -3,311 +3,313 @@ #include -namespace lst::t5dnn { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + namespace t5dnn { - ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_0[32] = { - -4.5069356f, -5.8842053f, 1.0793180f, -0.1540973f, -0.4705772f, 6.4027028f, -0.6620818f, -7.0734525f, - 0.6211641f, 4.9630723f, 3.4310920f, -0.8856288f, 4.5843782f, -6.0180559f, 0.0126438f, -1.5725276f, - -0.8549317f, -6.8545237f, -1.2129461f, 3.0617838f, -0.3911322f, 0.0799793f, -2.5398655f, -0.5780622f, - 2.8533990f, -0.1777968f, -2.6457164f, -0.7976936f, 4.5644889f, -2.1747942f, 3.4286616f, -10.1073380f}; - ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_0[38][32] = { - {6.1269712f, -10.6625051f, 17.4907818f, -0.0019928f, -3.4468415f, 1.6674044f, -7.8957767f, 2.2077549f, - 9.5517254f, -5.1345053f, -30.1643391f, 4.0148559f, -19.8330841f, -18.3806915f, 0.1334764f, 1.6213616f, - -4.1423774f, -15.3062429f, -1.0209556f, 1.5580219f, 0.7426265f, 0.0033929f, 1.3924170f, 0.9196110f, - -0.8995734f, 1.0594707f, 39.4390869f, 8.7642002f, 28.4583893f, -5.9235659f, 3.7221889f, 14.4167147f}, - {1.7863803f, -0.6068707f, 0.3166098f, -0.0608759f, 0.5939785f, 0.4870262f, -3.1375074f, -17.7147388f, - -0.7231818f, -9.3808413f, 2.2070611f, 15.7461920f, 0.9355862f, 2.3942475f, -0.0671409f, 3.5954301f, - -3.0463996f, -2.0748904f, -0.5450584f, -4.4800100f, 0.6074556f, -0.0161482f, 3.0624702f, -4.5688419f, - 2.9881518f, -0.3714012f, -0.0387531f, -0.7699140f, 4.4028845f, 5.0333014f, -4.7350726f, -8.6568584f}, - {5.6548429f, -0.0207700f, 0.1785973f, 0.0881671f, 0.2530097f, -0.1893259f, -0.1105739f, -0.5183877f, - 1.0728362f, 0.1833011f, 1.7765219f, 0.3127359f, 0.0455277f, -0.1442616f, -0.1048361f, -0.1235604f, - -0.1217661f, -0.5487315f, 0.7575656f, -0.1177454f, -17.0993137f, 0.1628031f, 0.2789381f, 0.5304270f, - 0.0837841f, -3.1120780f, 0.0074821f, -0.1648044f, -0.3395336f, 0.3958135f, 0.8718957f, -1.1980486f}, - {0.2401041f, -0.0585765f, -0.0144584f, 0.0411095f, 0.0752229f, 0.0292672f, -0.2437613f, -1.4396472f, - -0.0971315f, -1.7181139f, 0.2417643f, 2.2030578f, 0.0566049f, 0.1081589f, -0.1060181f, 0.3473758f, - -0.7095683f, -0.0345675f, 0.2794849f, -1.1702278f, 0.2622930f, -0.0072611f, 0.5026371f, -1.2882922f, - -0.4712771f, 0.0597130f, -0.0039970f, -0.6050836f, 0.1554724f, 1.0991164f, -0.4975886f, 0.2597970f}, - {0.0766028f, 0.0218421f, -0.1739017f, -0.0076569f, 0.0384461f, -0.1841756f, 0.9677940f, -3.1114254f, - 2.3830564f, 2.0706992f, -0.9643140f, 0.7361387f, -0.0060253f, -0.1554846f, -0.0831100f, 2.8754771f, - -1.4403527f, -0.5281797f, 0.5157787f, 4.2405987f, 0.4807618f, 0.0217647f, -1.2626950f, 0.9145837f, - -0.3931780f, 0.3426280f, -0.0065206f, -0.7510439f, -0.4555758f, 2.7724340f, -1.2173026f, 0.1039017f}, - {0.5685715f, 0.3927337f, 0.4942532f, -0.0671033f, -0.2808350f, -0.0336000f, -1.3983957f, 0.9876546f, - -2.3840380f, 0.7315395f, -2.2009561f, -1.4631602f, -0.4672308f, -0.4994236f, 0.1169335f, -1.1894208f, - -1.2692982f, 0.3303853f, -2.0147655f, -0.9912014f, 1.0042895f, 0.1121151f, -1.0789106f, -2.2821584f, - -6.6459913f, -0.0959398f, -0.0068429f, -2.8177626f, 0.3213172f, -2.6832986f, -4.7613306f, -0.9985733f}, - {1.4419515f, -0.3864825f, -0.6756768f, -0.1273375f, 0.4321181f, 0.3354745f, -0.8236564f, -2.8190827f, - 0.7090831f, 1.9072700f, -3.1834064f, -2.6938572f, 0.5051147f, 1.4382831f, 0.1241910f, -0.7352629f, - 0.7703634f, -1.7556250f, -2.1104112f, 3.0603442f, 1.9873468f, -0.0358815f, -1.0087154f, 3.8253262f, - -0.5466214f, 0.0875162f, 0.2691758f, 0.7121435f, 1.9314718f, -0.1580560f, 3.6484149f, -5.3173709f}, - {6.9104381f, -0.0033664f, -1.4405546f, -0.1768288f, 0.2028089f, -0.1012344f, -4.4735684f, 0.6354278f, - 4.3039737f, 0.2056303f, 1.8338999f, -1.1351355f, 0.1015760f, -0.0733253f, -0.0561627f, 2.5292397f, - 1.6314448f, -0.9333628f, -0.7773662f, 0.8313186f, -0.7829623f, 0.1265118f, 0.5922315f, -0.3463379f, - -1.3269740f, -3.3302619f, -0.0061799f, 2.3374722f, 0.0880938f, 0.7470241f, -0.4205743f, -4.7557602f}, - {0.0380794f, 0.0947470f, 0.0419397f, 0.0582226f, -0.0603404f, 0.0234028f, -0.2575402f, 0.4125248f, - 0.3035339f, 0.2663808f, -0.6092452f, -1.4727812f, 0.0247187f, -0.0539688f, -0.0150413f, 0.2094955f, - 0.5379737f, -0.3255228f, -0.5639279f, 0.0786276f, 0.6703192f, 0.1557026f, -0.2753083f, 1.1463971f, - -0.9372965f, 0.5657740f, 0.0041413f, 0.0870248f, 0.0101520f, -0.8214461f, 0.1212932f, 1.5648646f}, - {-0.0969819f, 0.0137566f, 1.3515147f, -0.0155047f, -0.1416170f, -0.1636726f, 0.5184190f, 0.4732984f, - 0.6815788f, -1.0522166f, -0.4486531f, -0.0516016f, 0.0201894f, -0.0849667f, -0.0861271f, -1.2027841f, - 1.2458711f, -0.7061657f, 1.0381308f, -0.3450044f, -0.1300479f, -0.0828402f, 0.6859242f, -1.0575374f, - 0.6947553f, -0.0922188f, 0.0199132f, 0.8038982f, -0.1734094f, -0.1057449f, 1.6305015f, -0.0688597f}, - {-1.8151448f, 0.1024327f, 1.7063105f, 0.1130912f, -0.1081472f, -0.2904744f, -1.3465070f, -1.0455177f, - -0.4581082f, -3.2220871f, 0.5221398f, -5.1637673f, 0.0811146f, -0.1326323f, -0.0379338f, -3.0439703f, - -2.4246936f, -0.3670847f, -3.1256330f, -1.6595014f, -3.4715190f, -0.1526113f, -1.0420206f, 0.9536474f, - -3.2932863f, 1.6048199f, 0.0025162f, -3.6049840f, 0.0604250f, -2.2404826f, 1.8406851f, -3.1381185f}, - {1.2985691f, -1.1044264f, 0.9062797f, -0.0788333f, 0.2694912f, 0.0032800f, -0.0574267f, 0.9734111f, - 1.1532565f, 2.6786125f, -3.8574269f, -2.2871449f, -0.1261243f, 1.0545347f, -0.1454154f, -0.5609738f, - 1.8385800f, -0.8035598f, -1.7668265f, 5.1665063f, 0.7966110f, 0.0940206f, -2.3943975f, 2.3344002f, - 1.0342182f, 0.4806454f, -0.3880928f, 0.6998246f, 1.4011886f, -1.7313483f, 4.9702630f, -6.0058608f}, - {1.0300356f, 0.0616315f, -0.1113776f, -0.1694220f, 0.7159944f, 0.0626456f, 2.0994680f, 0.3452290f, - -3.0487001f, 0.0654031f, -1.1510723f, 0.5370992f, -0.0290704f, -0.0300795f, 0.0751569f, -0.2345951f, - -0.3472281f, 0.4424143f, 1.2444530f, -0.2114656f, 0.7865694f, -0.0709381f, -0.1839961f, -0.0529834f, - 0.5867608f, -3.8793530f, -0.0814745f, -0.6368676f, 0.0361213f, -0.5549288f, 0.5661780f, 1.8374584f}, - {0.3345098f, 0.0068199f, -0.4205509f, -0.1088801f, -0.1043202f, -0.0040804f, 0.3400922f, 0.2673528f, - -0.6050695f, 0.4443954f, -0.4319905f, -0.6044132f, -0.0260679f, 0.0137036f, 0.0765494f, -0.0095099f, - 0.5880439f, -0.0083854f, -0.2407522f, 0.1942379f, 0.6554548f, -0.1322891f, -0.8298992f, 0.7909554f, - 1.0528831f, 0.1970959f, 0.0754069f, -0.0947960f, -0.0279494f, -0.5888316f, 0.8919419f, 0.4828835f}, - {0.3995822f, -0.2139665f, 0.3982936f, -0.1285759f, -0.3445527f, -0.1167238f, -0.1263519f, 0.8393803f, - -0.7758383f, 0.0719291f, -0.0134762f, 0.1715237f, 0.0796666f, 0.1023507f, -0.1172728f, -1.2364722f, - 1.2592632f, -0.3168479f, 0.7487004f, -1.5170647f, -0.2235429f, -0.1620898f, 1.4064828f, -1.0821995f, - 0.0740103f, -1.0412805f, -0.0621277f, 0.2439800f, 0.2684972f, -1.1661061f, 0.7859434f, -0.6170313f}, - {2.1615884f, 0.1431713f, 0.0642652f, -0.0522325f, -0.2658786f, -0.0245810f, -1.6857448f, -0.6685011f, - -0.6978170f, -0.8716729f, 0.3129902f, -2.5870812f, -0.2855283f, -0.3205920f, -0.0084069f, 1.3182145f, - -0.6923816f, -0.3730274f, -2.3638811f, -1.1128502f, -2.4709859f, 0.1349022f, -0.3574466f, -0.6597407f, - -4.1122031f, 0.2240651f, 0.1806145f, -1.6836300f, -0.0766231f, -3.2611966f, 0.0091456f, -0.0997367f}, - {5.2476101f, -0.1966512f, 4.8935304f, -0.1551689f, 1.6919724f, -0.8324367f, 14.3318472f, -0.3503132f, - 10.3614969f, -9.1522884f, -0.2543063f, -1.8476851f, 16.7961140f, 9.9541416f, -0.0434563f, -9.6973553f, - -5.0469398f, 6.1688442f, 7.6429725f, -7.3149266f, 1.2345183f, 0.1412155f, 0.7114770f, -1.6378664f, - 5.1548996f, 0.3686100f, -45.3027611f, 3.0492647f, -37.3445892f, 2.7421410f, -2.7958770f, -25.2034016f}, - {1.4597454f, -1.0561740f, 0.9751291f, 0.0446527f, 0.3691662f, 0.1006782f, 0.1418435f, 0.8871480f, - 1.1603093f, 2.8034730f, -4.0856910f, -1.9786842f, -0.2206208f, 0.9539357f, 0.0868183f, -0.6811873f, - 1.9642411f, -0.8065316f, -2.0244894f, 5.2936082f, 0.6120632f, -0.1194160f, -2.3925939f, 2.5555069f, - 1.0149733f, 0.4607603f, -0.2197217f, 0.5703423f, 1.4049014f, -1.5900208f, 5.1645074f, -6.0569463f}, - {0.9000676f, -0.0028781f, -0.1967366f, 0.1039593f, 0.7993248f, 0.0655172f, 2.2296758f, 0.4391927f, - -3.0292840f, 0.0334536f, -1.1728534f, 0.3479103f, -0.1190938f, 0.0410203f, 0.1146637f, -0.2958017f, - -0.3240463f, 0.4361866f, 1.0564958f, -0.1989332f, 0.5194008f, -0.0628912f, -0.1733121f, -0.1255383f, - 0.5990249f, -3.7692382f, 0.0995128f, -0.7101220f, -0.0785123f, -0.3514554f, 0.6662078f, 2.0991604f}, - {0.1781942f, -0.1873588f, -0.4653996f, -0.0153059f, -0.1399561f, -0.0498718f, 0.4552556f, 0.2300792f, - -0.7682312f, 0.4342302f, -0.3787803f, -0.6089386f, -0.1049337f, 0.0395331f, 0.0220332f, 0.0114750f, - 0.4672548f, 0.1284784f, -0.2472819f, 0.2892784f, 0.4788667f, 0.0472555f, -0.6593549f, 0.6508777f, - 0.9286987f, 0.3043948f, -0.0635985f, 0.0814399f, -0.1168853f, -0.6688027f, 0.8876534f, 0.4865684f}, - {0.4024099f, 0.0480259f, 0.4588822f, -0.1793082f, -0.2151573f, -0.1871128f, -0.1502780f, 1.1011307f, - -0.9467706f, 0.2632496f, -0.1257263f, -0.0241331f, 0.2280627f, 0.0878608f, -0.1334262f, -1.1642927f, - 1.0943586f, -0.4799654f, 0.5981907f, -1.5051398f, -0.4235946f, 0.0012827f, 1.2342577f, -0.8281875f, - 0.2776567f, -1.0362227f, 0.0408372f, 0.1540821f, 0.1777556f, -1.2684357f, 0.8836584f, -0.4001710f}, - {2.1558056f, 0.2082023f, 0.0863442f, 0.0364868f, -0.3985825f, 0.0307202f, -1.8889453f, -0.5614714f, - -0.7311882f, -0.8075573f, 0.4895108f, -2.7770483f, -0.3121874f, -0.1671291f, -0.1281284f, 1.3212786f, - -0.5310181f, -0.1974759f, -2.6240873f, -0.8320529f, -2.3875966f, -0.0286360f, -0.6263188f, -0.6553424f, - -4.1658955f, -0.0601300f, 0.0946256f, -1.6795633f, -0.1251303f, -3.0974686f, 0.2412274f, -0.0687501f}, - {2.0523887f, -0.6387668f, 2.0633900f, -0.0550964f, 0.5181718f, -0.4202190f, 1.8569367f, 0.8295385f, - 0.8555872f, 2.4727983f, -0.2072828f, -1.9006120f, 0.5379534f, 0.4463673f, 0.1468820f, 0.4918649f, - -3.4016700f, 0.2884440f, -1.9418719f, 4.5157170f, -0.5160927f, -0.0199372f, 3.1353824f, -0.9863126f, - -1.5135859f, 0.7576568f, 0.6715558f, 2.7409093f, 0.9291748f, -0.3247162f, 1.8204515f, -8.9181070f}, - {-0.1428107f, -0.0829889f, 0.4213613f, 0.0225415f, 1.2238166f, 0.0477106f, 0.3031853f, -0.7466553f, - 2.0663500f, 0.7588379f, 0.3689216f, -0.2003786f, 0.1242338f, 0.1693589f, -0.0351716f, -0.0186597f, - -0.0189417f, 0.5468715f, -0.2862698f, -0.1311738f, 3.0747476f, -0.0310747f, 0.0943165f, 0.3139819f, - 0.6274695f, -1.8314874f, 0.0147495f, 0.3554756f, 0.3829916f, 0.4891713f, 0.1328600f, 1.0535098f}, - {0.0534900f, 0.1787969f, -0.0571320f, -0.0685673f, 0.1968977f, 0.0374476f, 0.7876674f, 0.0828491f, - 0.6444036f, -0.2203166f, -0.2383427f, 0.5397566f, 0.0106769f, -0.1230072f, -0.0135021f, -0.5691944f, - -1.5040319f, 0.0406933f, -0.0025478f, 0.9251419f, -1.7180276f, -0.1112956f, 1.4840862f, 0.0407115f, - -0.0100329f, 0.0583593f, -0.0110524f, 0.7431355f, -0.0971857f, -0.5501527f, -0.6371027f, -0.1935233f}, - {-0.6455778f, 0.2317368f, 0.9285696f, -0.1415854f, 0.0822560f, 0.2488030f, -2.6992166f, 0.0884904f, - 0.6735302f, -0.1467820f, 0.5641044f, 0.6436581f, 0.0818401f, -0.0336634f, -0.0729000f, -0.1206900f, - -2.5739892f, 0.5776953f, 0.9531668f, -1.2362405f, -0.0615577f, -0.0143544f, -2.7525210f, 1.3738545f, - 0.2751348f, -1.7463943f, -0.0020144f, 2.4814103f, 0.1716725f, -0.7055540f, -0.3474010f, 0.4482578f}, - {-0.2526205f, -0.7463821f, -3.6076138f, -0.1511098f, 0.1216256f, 0.0888247f, -1.0190924f, -1.3260181f, - -0.0443211f, -4.8911066f, -3.4385188f, -6.0057454f, 0.3340450f, 0.2997236f, -0.0907855f, 0.7500492f, - -0.4007562f, 1.9382039f, 0.5687234f, 2.6511824f, 4.7703862f, 0.0006749f, -0.0201394f, -3.5885489f, - -4.1518898f, 0.0807014f, -0.0584071f, -0.8100027f, 0.7697087f, -0.8038046f, -1.2945876f, -4.0110312f}, - {0.4337017f, -1.1532011f, 2.0740633f, 0.0271806f, 0.6654227f, 0.1012998f, -4.0791736f, 1.2631345f, - 1.9511020f, 2.3272331f, 1.2707534f, 1.6306664f, 0.4936035f, 0.8285242f, 0.0807625f, 3.8652387f, - 0.0281145f, 1.6877037f, 1.2557380f, -0.3036775f, 0.5604967f, 0.1551418f, -0.9599600f, -6.3067718f, - -0.6352320f, 0.8058553f, 0.3657880f, -2.0491202f, -0.3926269f, 2.5650854f, 1.3697821f, -8.3070078f}, - {5.1334143f, -0.0351738f, -0.4774780f, -0.0679726f, 1.4569254f, 0.0580191f, -0.3649136f, -0.2298838f, - -3.3826666f, -0.7392708f, -0.6036060f, -0.2612940f, -0.1877640f, -0.1145124f, -0.0042578f, -0.0311193f, - -0.0320479f, 0.5270581f, -0.4324475f, 0.2681437f, 4.7813129f, -0.0222701f, -0.0525629f, -0.2861001f, - -0.1251072f, 3.9112861f, 0.0045046f, -0.0426071f, -0.3299106f, -0.0686970f, -0.1602017f, -0.0070103f}, - {-0.6633690f, 0.0103367f, 0.5998458f, 0.1256577f, -0.0359184f, -0.0176820f, -0.6458368f, -0.0370536f, - 0.3542259f, 0.1394724f, 0.8255956f, 0.2501569f, 0.0320156f, -0.0256806f, 0.0277949f, 0.0036392f, - 0.2825173f, 0.1400358f, 1.0011463f, -0.6792242f, 0.0672508f, 0.0728705f, -0.1089695f, -1.0414587f, - -0.4135485f, 0.4293025f, -0.0041241f, -0.9564193f, 0.0314900f, 0.8658463f, -0.7734696f, -0.7610567f}, - {-0.0200122f, -0.0749178f, -1.5026549f, -0.0387432f, -0.0713735f, 0.1214790f, 1.8730290f, -0.0552839f, - -1.6867150f, 0.2282097f, 0.7161849f, -0.1018546f, -0.1092003f, 0.0365504f, -0.1326883f, 1.2310545f, - 0.1800210f, 0.7024739f, -2.9606545f, 1.2275347f, -0.2050014f, 0.0940569f, 0.4761694f, 0.8812068f, - -0.0083424f, -1.5406264f, 0.0061815f, -2.7606382f, 0.0248556f, 1.1086880f, -1.3608936f, 1.0795454f}, - {0.9734020f, 0.3905411f, -3.7008634f, 0.0013557f, 0.1649124f, 0.9935362f, 1.3489184f, 0.9505764f, - 0.7966231f, -0.1627246f, -2.5754328f, 1.4892205f, 0.8586300f, 0.6974363f, 0.1320204f, -0.7840260f, - 0.3121157f, 0.0966901f, 2.7447381f, 1.8256680f, 0.7229405f, -0.1723188f, 0.9145948f, -2.1376033f, - 0.5259342f, 0.0731194f, -0.2908303f, -0.2603913f, -0.2326528f, 3.6684167f, -0.2883157f, -2.8546307f}, - {-4.8917460f, 6.7944999f, -0.2255474f, 0.1051999f, 3.9000113f, 2.0624907f, 5.3019547f, 10.0209141f, - 1.1268179f, 2.2669628f, -6.5002980f, 1.8408583f, 5.3039579f, 2.2055962f, 0.1055369f, 1.7230233f, - 6.9605255f, 7.7025104f, 2.9880707f, -0.9274251f, -0.2287160f, -0.0206735f, 0.6885675f, 2.8179996f, - -7.1129837f, -1.3772345f, 3.8655453f, -5.9388318f, -0.0469947f, 7.2763596f, -6.3536129f, -17.0069847f}, - {1.8787041f, -0.9953383f, -1.4839923f, 0.1308209f, 0.3657510f, 0.3106483f, -1.4158971f, -6.7449651f, - 0.6553892f, -4.5046172f, -3.5489719f, 3.5363002f, 0.5454772f, 2.3521471f, 0.1612140f, -0.9744226f, - 0.6546553f, -2.7179255f, -1.7758157f, 0.3089439f, 1.7462813f, 0.1654593f, -0.2440207f, 3.9501827f, - 1.3750844f, 0.0596805f, -0.1977254f, 0.0264880f, 2.6396444f, 1.0816911f, 3.6413448f, -6.0299959f}, - {-4.1295738f, 0.1044480f, 0.2131937f, 0.0420826f, 0.5292229f, 0.0090477f, -0.0973486f, 0.9596778f, - 2.9579651f, -0.6364226f, -1.7556342f, 0.1539868f, -0.1273174f, -0.1348504f, 0.1257833f, -1.4168571f, - -1.0960362f, 0.0482449f, -1.4395387f, -0.2524115f, -2.9162085f, -0.0451428f, -0.4021681f, -0.5756381f, - 0.0515293f, -3.1996479f, -0.0007676f, -1.3878343f, -0.2864279f, -0.9579773f, -1.0999249f, 1.6500067f}, - {-2.4806111f, -6.8115449f, 3.2805641f, 0.1187415f, -0.9950783f, 6.2553434f, -1.6450261f, -6.1463733f, - 2.7507148f, 4.2995782f, 0.0461297f, -0.5417359f, 2.4306326f, -7.3530145f, 0.0698273f, -0.9394333f, - -1.3595498f, -7.5141478f, -1.4911395f, 3.2300410f, 0.1203540f, 0.0314884f, -2.0116949f, -0.8167119f, - 2.4133310f, 0.1920709f, 1.0619365f, 0.2459123f, 6.9166069f, -2.6384118f, 3.6829739f, -7.2385545f}, - {0.9408096f, 14.9067144f, 1.7709646f, 0.1105646f, -0.5600107f, -15.3188124f, -12.3718462f, -1.8893757f, - 13.6364670f, -5.7327847f, -14.1805468f, 1.0581509f, -14.2186184f, 14.8948650f, 0.0190344f, 5.4395180f, - 6.7243400f, 9.8468456f, 4.5144215f, -1.4551491f, 1.1032411f, -0.0317988f, 2.3398454f, -3.1671596f, - -7.7541409f, 1.1255593f, 6.7340465f, -4.4448423f, -9.1472626f, -3.1959128f, 4.4181323f, -2.7904994f}, - {-2.1621978f, -4.7202382f, 1.7378219f, 0.1417439f, -0.5000908f, 5.4468708f, 1.4260571f, -6.6136570f, - 1.5713804f, 3.4479704f, 2.7354901f, -0.7388076f, 5.4666147f, -3.8697338f, -0.1368596f, -2.7903373f, - -1.2043713f, -4.9554005f, 0.3324645f, 1.6767365f, 0.1156244f, -0.0326964f, -2.0945346f, -0.4590589f, - 3.0942657f, 0.0015020f, -6.2626700f, -0.3969755f, 0.7717427f, -1.9667094f, 2.9664171f, -11.9477053f}, - }; - ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_2[32] = { - 9.8383608f, 3.6922295f, 3.5774977f, -4.4619012f, 6.5087032f, -0.9540017f, -0.5059246f, 0.0706402f, - 14.3396597f, -0.2771132f, -4.8409863f, -8.3581600f, -3.5078344f, 4.3287506f, -5.7808843f, 3.9264839f, - -2.1697845f, -0.0040514f, -0.2095029f, -6.8678174f, 1.7911285f, -0.4510343f, 1.2410443f, -4.5678806f, - -0.5693849f, 2.3320096f, 4.4606552f, -6.3771009f, -4.3149071f, -0.1905672f, -3.5726390f, -1.0744030f}; - ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_2[32][32] = { - {-0.0155548f, 0.0243339f, 0.0037967f, -0.2771824f, 0.0111955f, -0.0115980f, 0.0079653f, -2.9803498f, - -0.0061037f, -0.0956634f, 0.0332446f, 0.0179244f, -0.0080377f, -9.0180779f, 0.1720033f, 0.0350694f, - -0.0146588f, -0.2135506f, -0.3158041f, 1.3697664f, 0.0119146f, 0.0119120f, -0.0986927f, 0.0297492f, - 0.0355827f, -0.1196868f, -0.0745119f, 0.0281862f, -0.0422190f, -0.3069138f, -0.0477367f, -0.0550450f}, - {-1.7374619f, 1.4822800f, -2.1885235f, 1.8354234f, -0.5380136f, 1.6621803f, 0.6251035f, 0.1008954f, - -0.8387129f, -0.2063313f, 1.0661691f, -0.9799694f, -5.1710258f, -3.2260630f, -1.5073707f, -1.0792168f, - 1.8569958f, -0.2289213f, 0.0563821f, -1.6398847f, -4.1649504f, -2.7527378f, -0.0134577f, 3.0424533f, - 0.0364320f, 0.6762254f, -3.1551330f, 2.4888904f, 1.4757305f, -0.3141717f, -2.0126467f, -0.1675602f}, - {-0.9571826f, 0.0914152f, 0.0404339f, 0.2927902f, 0.2933607f, 0.0619171f, 0.0772318f, -1.3796169f, - -0.8194544f, -0.2179988f, -1.1241078f, -0.1443964f, 0.0559355f, -1.2914546f, -0.3445117f, 0.2031156f, - 0.0273864f, -0.0193422f, -0.2136522f, 0.0429592f, 0.0212854f, 0.0414394f, -1.1734651f, 0.0582848f, - 0.0136039f, -0.1892604f, 0.0764908f, -0.0130132f, -0.1272559f, -0.0818855f, -0.0408583f, -0.1563294f}, - {-0.0213695f, 0.0596942f, -0.0641309f, -0.0146449f, 0.0416586f, -0.0378931f, 0.1234860f, 0.1622967f, - 0.0794091f, -0.0639933f, -0.1030663f, 0.0579078f, 0.1050275f, -0.0136866f, 0.0149978f, 0.0876813f, - 0.0693554f, 0.1612417f, -0.0595916f, -0.1008234f, -0.0579058f, 0.0915138f, 0.1321436f, -0.1484535f, - -0.0920316f, -0.0024532f, -0.1045300f, 0.0924260f, 0.0277524f, -0.0287276f, -0.1271127f, 0.1164243f}, - {0.0713067f, 0.0198056f, -0.3023696f, -0.0025908f, -0.0085885f, -1.1157553f, 0.0236462f, -0.0704844f, - -0.0189257f, -0.0997382f, 0.3379845f, -0.1229390f, -0.0616165f, -0.8968034f, 0.0401445f, -0.1144476f, - -0.0532077f, 0.0604580f, 0.0609454f, -0.1613472f, 0.0103525f, -0.1653874f, 0.0205189f, 0.0758978f, - -0.1514593f, 0.0151441f, 0.2043469f, 0.0349607f, -0.1361278f, -0.1255922f, 0.0631648f, 0.3570991f}, - {0.3371337f, -3.7541580f, 2.2215877f, -0.3390516f, 0.1912718f, -4.1861577f, -1.2264019f, 2.8179801f, - 0.0667294f, -0.0093539f, 2.3029909f, 3.1814916f, 3.9780347f, 0.2310601f, 0.3986159f, -0.8544636f, - 0.4139664f, -0.1876569f, -0.2448732f, -2.8053334f, 4.0488625f, 2.1094146f, -6.7310257f, -4.9950023f, - -0.8315823f, 0.0555959f, 2.4573720f, -3.7234364f, -4.2910552f, -0.2995245f, -3.2605181f, 2.3620574f}, - {-1.5522735f, -0.1866350f, -0.0067679f, 0.3196557f, 1.4052233f, 2.8143549f, -0.9992948f, -0.5309914f, - -25.8852596f, -0.1218249f, 0.6625420f, 0.3007106f, -0.2767264f, -0.1847300f, -0.5313534f, -0.0383462f, - -0.1987552f, 0.0581405f, -0.3376078f, 1.2621028f, 0.0818709f, -0.1401216f, -0.4550788f, -0.1592657f, - 0.0597123f, 0.1344101f, -0.1005317f, -0.1538406f, 2.9142656f, -0.0806051f, -0.4267367f, -31.9512234f}, - {0.6859627f, 0.1212986f, 0.1291616f, 0.0459838f, -0.0899920f, 0.0287645f, 0.1987007f, -2.7079368f, - -0.2628384f, -0.1402464f, -0.6302179f, -0.2923960f, -0.1106663f, 0.8256195f, -2.8054097f, -0.0296494f, - -0.5632019f, -0.1335654f, -0.1558440f, -6.8611612f, 0.0203786f, 0.0046566f, -0.4401442f, -0.0471430f, - 0.4535986f, -0.8657981f, 0.0684740f, 0.0518814f, -0.0123748f, -0.2270164f, 0.0922878f, -0.3863277f}, - {0.0127175f, 2.3346109f, -0.4390767f, -0.4657893f, 0.1659466f, -0.1132782f, -0.4928388f, 0.7652873f, - 1.1510741f, -0.0879600f, 0.2721785f, -0.1878961f, -0.3477249f, -0.8473209f, -0.8931856f, -0.4328294f, - -11.9181929f, -0.0282545f, -0.0217915f, 1.6676594f, -0.2122232f, -0.6190930f, 1.9053432f, -0.7592348f, - -1.0739189f, -0.7170524f, 0.3864411f, -0.8849231f, 0.1393488f, 0.0738489f, 0.4460345f, 1.9020857f}, - {0.4453296f, -0.0767821f, 0.1638939f, 1.6997167f, -0.1098599f, -0.0551604f, 0.0040561f, -13.5290670f, - -0.1285677f, -0.0590394f, 0.6499141f, -0.7617344f, 0.0453151f, 0.3104213f, -1.0711143f, 0.1361838f, - -0.4365610f, -0.1300649f, 0.2013344f, -0.5308123f, 0.1451896f, 0.1030715f, -0.6487910f, -0.3136590f, - -0.0280079f, 0.5394178f, 0.1318262f, -0.0159292f, 0.0636870f, -0.3224248f, -0.1868187f, -0.2468304f}, - {-0.0333494f, -0.0834255f, -0.1221875f, 0.6861304f, 0.0521738f, -0.0416543f, -0.4437352f, -19.3246250f, - -0.1520821f, 0.0528602f, -0.6375434f, -0.5803806f, -0.0958465f, -2.0058544f, -0.8282642f, 0.0259000f, - 0.4846996f, 0.1211179f, 0.0356884f, 1.0009497f, 0.0635682f, -0.0314105f, -0.0011147f, 0.0131714f, - -0.3410152f, 0.2798154f, 0.0961889f, 0.1266228f, -0.0934717f, -0.0904307f, 0.1355542f, 0.5722573f}, - {0.2146454f, 0.2143834f, 0.1290650f, -0.9063646f, 0.2100945f, 0.1331054f, -0.2620614f, -0.1264993f, - 0.1313979f, 0.0455465f, -0.8395286f, -0.4967833f, -0.0538581f, 0.9155380f, 0.6627046f, 0.1691243f, - 0.9887002f, -0.1597013f, -0.1236713f, -1.9041336f, 0.0427585f, 0.0849747f, -5.2559652f, -0.3133100f, - 0.0141170f, -0.1635530f, 0.4938746f, 0.0162943f, 0.2107756f, -0.3413893f, -0.0657575f, 1.0542560f}, - {-2.8868380f, -2.0837426f, -1.0611480f, -0.6143807f, -0.6398501f, -2.8018746f, 0.5166737f, -1.0814301f, - -1.9272422f, -0.1017482f, -0.4651161f, -1.4021232f, 1.8854499f, 0.1815407f, 0.5965426f, -2.3344259f, - -0.0690846f, -0.1678239f, -0.4219488f, 0.6215640f, 1.0270095f, -0.3473049f, -0.3926674f, -0.7942593f, - 1.1305071f, -1.4621233f, -0.8051161f, -0.7698632f, -2.6038630f, -0.3090037f, -1.6365144f, -1.0179478f}, - {0.0046026f, 1.1319581f, -2.6405678f, -2.0353596f, -2.1687336f, 0.3364883f, 2.1122196f, 0.2584647f, - -2.4344857f, -0.0378498f, 0.6158544f, -0.6060749f, -4.9598379f, 0.1570698f, 2.2436838f, -2.6198347f, - -2.0935996f, -0.1845744f, -0.0716080f, -1.9338604f, -4.1995640f, -3.6706774f, -1.6762524f, 3.9646862f, - -0.9677961f, 1.8319578f, -3.1916575f, 3.7312632f, 0.0820446f, -0.0497568f, -0.0898171f, -0.2499462f}, - {-0.0780375f, -0.0286571f, 0.1007227f, 0.0012229f, -0.0531285f, 0.0840718f, 0.1013894f, 0.1312424f, - -0.0673772f, 0.1603183f, 0.0074385f, -0.0718321f, -0.1549873f, 0.1616689f, 0.0405887f, -0.1558588f, - 0.0740745f, 0.1696893f, -0.0064026f, -0.1656420f, -0.1186674f, -0.1262667f, -0.0784757f, -0.1280154f, - 0.0909976f, 0.0853046f, -0.1075811f, 0.1310615f, 0.0610194f, 0.0647223f, 0.1360559f, 0.0440074f}, - {-0.2106480f, 0.0087131f, 0.1119385f, -1.0611318f, 0.5250220f, 0.0525479f, -0.2733742f, -1.0799565f, - -0.5601607f, -0.0651806f, -1.9793440f, -0.3373334f, -0.1550518f, 0.8932216f, 0.7264332f, -0.0450735f, - 1.2373760f, -0.1236272f, 0.0680048f, -3.0446634f, -0.1533586f, -0.0127355f, -0.3326311f, -0.0225603f, - -0.2265739f, -2.3752897f, -0.3771705f, -0.0728938f, 0.1741305f, 0.1111639f, 0.4131119f, 0.2239323f}, - {-2.5691276f, -1.4011253f, -2.0640867f, -3.7236946f, 1.5542637f, -0.9456654f, -1.7575809f, 3.6794879f, - -0.4439790f, -0.1009826f, 3.6702275f, -0.1935008f, -0.4423219f, -0.3825364f, -0.4784791f, 0.5927492f, - -2.3482494f, 0.0801714f, -0.1567418f, -1.7934613f, -0.1706410f, -0.6326947f, 0.6260155f, 0.3631033f, - -0.9325932f, 1.9647995f, -1.3409088f, 1.3501998f, 0.0367797f, -0.1744210f, 1.8690013f, -1.0737898f}, - {-0.5934777f, 0.6232591f, -0.3391055f, 0.2640936f, -0.2824444f, 0.4815128f, 0.6625078f, -0.1103976f, - 0.9555223f, -0.0624896f, -0.6778919f, 0.1181502f, -0.5425385f, 0.7297349f, -1.7261271f, -0.2917557f, - 1.1873137f, -0.2725933f, 0.0975242f, 1.7756181f, -0.5735835f, -0.4453230f, 0.9800369f, 0.9344145f, - -1.8692539f, 0.0120440f, -0.7315661f, 0.6250805f, 0.3839143f, -0.0376306f, 0.3816243f, 0.6059195f}, - {0.5522162f, -1.8043815f, -10.9379101f, 0.5719097f, -0.2246755f, -1.4856353f, 0.4877502f, 0.7163438f, - -11.8135147f, -0.0180790f, -0.9928634f, 0.1107815f, -0.0005064f, -0.3824990f, -0.7453306f, -1.9909632f, - -7.4362645f, -0.0245507f, -0.1815712f, -3.5507584f, -0.0075889f, -11.0296011f, -1.1292133f, -0.0710276f, - 0.5675677f, 0.2017778f, -0.0684891f, -0.0367653f, -1.6674192f, 0.0281711f, -0.8356591f, -0.0447807f}, - {0.2537312f, -3.0178010f, -0.3493635f, 1.8573236f, 0.4017631f, 0.9912633f, -0.8625028f, -0.7783228f, - -1.7815375f, -0.1204695f, 1.8551122f, 0.3344182f, -0.2828701f, -1.3226960f, -1.4470471f, 0.2895959f, - 0.6780876f, -0.2010069f, 0.0425280f, -2.1786852f, -0.1274053f, -0.2549899f, -0.2233993f, -0.1561645f, - -0.4640818f, 0.6375850f, 0.7733670f, -0.2388286f, 1.0447853f, -0.1503223f, 0.3823584f, -13.8176088f}, - {0.2575197f, -2.2127593f, -0.0389457f, -0.0215759f, 0.1659477f, -0.0097748f, -0.1935415f, -0.9091369f, - -0.1453371f, 0.0442428f, -0.1206519f, 0.1435609f, -0.0186047f, -5.0154042f, 0.0538177f, 0.0403250f, - 0.0240955f, 0.0331080f, 0.0517951f, 0.7422639f, 0.0069818f, 0.0248351f, -0.2205741f, -0.0082387f, - 0.2043269f, 0.0459435f, 0.0876343f, 0.0140607f, 0.1056308f, 0.0062555f, 0.0184278f, -0.5539715f}, - {-0.0398742f, 0.1075264f, 0.1725024f, -0.0755192f, -0.0360048f, 0.1325573f, 0.0903103f, -0.0882263f, - 0.1207692f, 0.0032722f, 0.0048489f, -0.1257241f, 0.1450990f, -0.0713558f, 0.1116815f, 0.1107689f, - -0.1447252f, 0.1581838f, -0.0160124f, -0.0425587f, 0.1411217f, 0.0865060f, -0.0643460f, -0.0431262f, - -0.1452804f, -0.0195101f, 0.1234572f, 0.0520887f, 0.1117576f, -0.0751791f, 0.1511539f, 0.1224861f}, - {0.7728126f, 2.3075340f, -0.0385258f, -3.1270287f, 0.9414487f, 3.5251477f, -0.8043440f, 0.7212446f, - -7.6850162f, -0.1609414f, -3.7687578f, -1.0751100f, -0.2052089f, 5.0728245f, 2.2835267f, 0.5930225f, - 0.1303335f, -0.1428799f, -0.3715075f, 0.5136011f, -0.4755619f, -0.2192461f, -3.8696294f, -0.0062392f, - -1.3774812f, -0.0034140f, -1.5944362f, 0.9773729f, 3.2859125f, -0.1616932f, -1.2785367f, -13.5732412f}, - {0.5535743f, 0.1461481f, -0.2218016f, -0.2971808f, -0.2169309f, 0.1564545f, -0.0390397f, 1.1558976f, - -0.0119933f, -0.0774637f, 1.1907971f, -0.5127968f, -0.0066028f, -1.6794037f, -0.3650940f, 0.2555613f, - -0.9488379f, 0.0449603f, -0.1620417f, 0.1583214f, 0.0000908f, 0.0152763f, -1.0660053f, -0.0139402f, - -1.7440189f, 0.2515209f, 0.3333162f, 0.1904725f, 0.1116094f, -0.2287960f, -0.0007165f, -1.7047704f}, - {-5.9897852f, -0.1316296f, -0.0218074f, -0.4602887f, 0.3288545f, -0.0882939f, -0.5929499f, 0.4294790f, - -0.0383545f, 0.0556869f, 0.1975944f, 0.1341491f, 0.0629570f, -2.2742157f, 0.0175826f, -0.1439869f, - -24.8701649f, -0.1582915f, -0.2460304f, -3.9643264f, 0.0863483f, 0.0180861f, -0.2210452f, -0.0868723f, - -0.4175525f, -0.8231756f, 0.0247534f, -0.1473545f, -0.0021330f, -0.0410253f, -1.1944869f, -1.1523768f}, - {0.1031547f, -3.3402514f, -4.3636522f, -0.1534714f, -0.0622189f, 0.0374694f, -0.0870097f, -4.1865788f, - -0.0555377f, 0.0252329f, 0.1339467f, 0.0461691f, -0.0503090f, 0.0289890f, -0.0095674f, -0.3289992f, - -0.0279080f, 0.0274977f, -0.0903500f, 0.5610157f, -0.0478177f, 0.4346960f, 0.4822784f, -0.1058945f, - -0.2026870f, -0.0560638f, 0.0910069f, -0.0818529f, 0.0819198f, -0.0292193f, 0.3040628f, -0.1275230f}, - {-5.8789845f, -17.1114635f, -4.6755161f, 0.1016624f, -0.8685016f, -0.3898779f, -2.3363957f, 0.1413794f, - -2.4254086f, -0.2171030f, -0.0901150f, 0.7058705f, 0.4166250f, -0.0231085f, -0.1789686f, -9.4244318f, - -0.6418229f, -0.0857969f, 0.1683681f, -0.0310597f, -0.0247807f, -5.3748040f, -7.4730940f, 0.1019564f, - -1.2126822f, -0.3726285f, -1.0287101f, 0.1803891f, -0.2227769f, -0.0791530f, -0.0159770f, -1.4883354f}, - {-17.9394970f, -0.5228514f, -11.3547935f, -0.0672671f, -2.0371394f, -0.9076943f, 2.4331825f, -6.9409127f, - 0.8286008f, 0.0208618f, -0.8009814f, 1.2268484f, 0.1943726f, -1.7297083f, -0.7668949f, -6.5505466f, - -0.6495168f, -0.0404727f, -0.1260914f, -3.5029383f, -0.0852898f, -2.9679556f, 1.6404767f, -0.0251449f, - 1.1460075f, -0.7877688f, -0.0586593f, -0.4741839f, -1.7420560f, 0.0295600f, -2.3574052f, 0.0974777f}, - {0.4443443f, 0.6384261f, 1.3317494f, -1.0085982f, 0.9508762f, 1.3168396f, -0.1862490f, -0.1801148f, - 1.1106120f, -0.0654911f, 0.1186706f, -0.7198273f, 0.5449172f, -0.5886080f, 0.7504217f, 1.8046317f, - -0.1294390f, -0.1939137f, -0.2383934f, 0.4131435f, 0.6910310f, 1.2821866f, -0.1088722f, -0.5660405f, - -0.1188610f, 0.0364403f, 0.3597929f, -0.6409024f, 1.2114668f, -0.0212278f, 0.8423592f, 0.4848156f}, - {-0.8772649f, -13.5265112f, -4.5540547f, -0.2856667f, 0.7604876f, -0.6829260f, -0.8320626f, 0.6541347f, - 0.4020181f, 0.0009324f, -10.9660740f, -0.3540186f, -0.2316812f, 0.3576394f, 0.0998953f, -1.5738430f, - 1.2089975f, 0.0706465f, -0.2538019f, 0.7016497f, -0.0282650f, -3.1291001f, -0.4375663f, -0.3979468f, - -0.1588882f, 0.3978875f, 0.2038192f, -0.4281644f, -0.5787544f, -0.0922198f, 0.9595569f, 0.0212818f}, - {0.3392667f, 0.1170919f, -0.0705636f, -0.1025443f, -0.1192213f, -0.0495686f, 0.0284667f, -0.1226804f, - 0.0050191f, -0.0516545f, -1.0892097f, 0.0033689f, 0.0471462f, 1.4266804f, 0.0288870f, -0.0110408f, - -1.1283765f, -0.1299917f, -0.4318301f, -0.9854419f, -0.0190479f, -0.0269406f, 0.3697925f, -0.0757695f, - -0.3632923f, -0.1714077f, 0.0669245f, 0.0557428f, -0.1713906f, -0.4307863f, -0.1749060f, -2.1246362f}, - {0.8383662f, -3.8122442f, 0.1568939f, -2.2105119f, -0.7086993f, -0.4664145f, -0.3578597f, 0.5554636f, - 0.6965880f, -0.1506968f, 0.2646832f, 0.2874083f, 0.1901203f, -2.4997077f, -0.3519035f, -0.0518054f, - 1.0862818f, -0.2502540f, -0.3133347f, -0.7411230f, 0.1268138f, 0.1069811f, -0.8109779f, 0.0264679f, - 0.1604289f, -0.7534032f, -0.1419461f, 0.0688303f, -0.1570919f, -0.3055144f, -0.7415189f, 2.5547018f}, - }; - ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_4[1] = {1.4616280f}; - ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_4[32][1] = { - {0.0609813f}, {0.0685224f}, {0.1655236f}, {-0.0599842f}, {0.0669006f}, {-0.1817371f}, {-0.0539167f}, - {-0.0737955f}, {0.0654664f}, {0.0302955f}, {-0.0586768f}, {0.0717433f}, {0.1472274f}, {-0.0610073f}, - {-0.0601061f}, {0.2086218f}, {-0.0545418f}, {-0.0388369f}, {-0.0613536f}, {-0.1141072f}, {-0.2289097f}, - {-0.3354485f}, {0.0831025f}, {0.1333673f}, {0.0490410f}, {0.0484894f}, {0.0436755f}, {-0.1479877f}, - {0.1540713f}, {0.0021261f}, {-0.0845848f}, {-0.0564973f}, - }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_0[32] = { + -4.5069356f, -5.8842053f, 1.0793180f, -0.1540973f, -0.4705772f, 6.4027028f, -0.6620818f, -7.0734525f, + 0.6211641f, 4.9630723f, 3.4310920f, -0.8856288f, 4.5843782f, -6.0180559f, 0.0126438f, -1.5725276f, + -0.8549317f, -6.8545237f, -1.2129461f, 3.0617838f, -0.3911322f, 0.0799793f, -2.5398655f, -0.5780622f, + 2.8533990f, -0.1777968f, -2.6457164f, -0.7976936f, 4.5644889f, -2.1747942f, 3.4286616f, -10.1073380f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_0[38][32] = { + {6.1269712f, -10.6625051f, 17.4907818f, -0.0019928f, -3.4468415f, 1.6674044f, -7.8957767f, 2.2077549f, + 9.5517254f, -5.1345053f, -30.1643391f, 4.0148559f, -19.8330841f, -18.3806915f, 0.1334764f, 1.6213616f, + -4.1423774f, -15.3062429f, -1.0209556f, 1.5580219f, 0.7426265f, 0.0033929f, 1.3924170f, 0.9196110f, + -0.8995734f, 1.0594707f, 39.4390869f, 8.7642002f, 28.4583893f, -5.9235659f, 3.7221889f, 14.4167147f}, + {1.7863803f, -0.6068707f, 0.3166098f, -0.0608759f, 0.5939785f, 0.4870262f, -3.1375074f, -17.7147388f, + -0.7231818f, -9.3808413f, 2.2070611f, 15.7461920f, 0.9355862f, 2.3942475f, -0.0671409f, 3.5954301f, + -3.0463996f, -2.0748904f, -0.5450584f, -4.4800100f, 0.6074556f, -0.0161482f, 3.0624702f, -4.5688419f, + 2.9881518f, -0.3714012f, -0.0387531f, -0.7699140f, 4.4028845f, 5.0333014f, -4.7350726f, -8.6568584f}, + {5.6548429f, -0.0207700f, 0.1785973f, 0.0881671f, 0.2530097f, -0.1893259f, -0.1105739f, -0.5183877f, + 1.0728362f, 0.1833011f, 1.7765219f, 0.3127359f, 0.0455277f, -0.1442616f, -0.1048361f, -0.1235604f, + -0.1217661f, -0.5487315f, 0.7575656f, -0.1177454f, -17.0993137f, 0.1628031f, 0.2789381f, 0.5304270f, + 0.0837841f, -3.1120780f, 0.0074821f, -0.1648044f, -0.3395336f, 0.3958135f, 0.8718957f, -1.1980486f}, + {0.2401041f, -0.0585765f, -0.0144584f, 0.0411095f, 0.0752229f, 0.0292672f, -0.2437613f, -1.4396472f, + -0.0971315f, -1.7181139f, 0.2417643f, 2.2030578f, 0.0566049f, 0.1081589f, -0.1060181f, 0.3473758f, + -0.7095683f, -0.0345675f, 0.2794849f, -1.1702278f, 0.2622930f, -0.0072611f, 0.5026371f, -1.2882922f, + -0.4712771f, 0.0597130f, -0.0039970f, -0.6050836f, 0.1554724f, 1.0991164f, -0.4975886f, 0.2597970f}, + {0.0766028f, 0.0218421f, -0.1739017f, -0.0076569f, 0.0384461f, -0.1841756f, 0.9677940f, -3.1114254f, + 2.3830564f, 2.0706992f, -0.9643140f, 0.7361387f, -0.0060253f, -0.1554846f, -0.0831100f, 2.8754771f, + -1.4403527f, -0.5281797f, 0.5157787f, 4.2405987f, 0.4807618f, 0.0217647f, -1.2626950f, 0.9145837f, + -0.3931780f, 0.3426280f, -0.0065206f, -0.7510439f, -0.4555758f, 2.7724340f, -1.2173026f, 0.1039017f}, + {0.5685715f, 0.3927337f, 0.4942532f, -0.0671033f, -0.2808350f, -0.0336000f, -1.3983957f, 0.9876546f, + -2.3840380f, 0.7315395f, -2.2009561f, -1.4631602f, -0.4672308f, -0.4994236f, 0.1169335f, -1.1894208f, + -1.2692982f, 0.3303853f, -2.0147655f, -0.9912014f, 1.0042895f, 0.1121151f, -1.0789106f, -2.2821584f, + -6.6459913f, -0.0959398f, -0.0068429f, -2.8177626f, 0.3213172f, -2.6832986f, -4.7613306f, -0.9985733f}, + {1.4419515f, -0.3864825f, -0.6756768f, -0.1273375f, 0.4321181f, 0.3354745f, -0.8236564f, -2.8190827f, + 0.7090831f, 1.9072700f, -3.1834064f, -2.6938572f, 0.5051147f, 1.4382831f, 0.1241910f, -0.7352629f, + 0.7703634f, -1.7556250f, -2.1104112f, 3.0603442f, 1.9873468f, -0.0358815f, -1.0087154f, 3.8253262f, + -0.5466214f, 0.0875162f, 0.2691758f, 0.7121435f, 1.9314718f, -0.1580560f, 3.6484149f, -5.3173709f}, + {6.9104381f, -0.0033664f, -1.4405546f, -0.1768288f, 0.2028089f, -0.1012344f, -4.4735684f, 0.6354278f, + 4.3039737f, 0.2056303f, 1.8338999f, -1.1351355f, 0.1015760f, -0.0733253f, -0.0561627f, 2.5292397f, + 1.6314448f, -0.9333628f, -0.7773662f, 0.8313186f, -0.7829623f, 0.1265118f, 0.5922315f, -0.3463379f, + -1.3269740f, -3.3302619f, -0.0061799f, 2.3374722f, 0.0880938f, 0.7470241f, -0.4205743f, -4.7557602f}, + {0.0380794f, 0.0947470f, 0.0419397f, 0.0582226f, -0.0603404f, 0.0234028f, -0.2575402f, 0.4125248f, + 0.3035339f, 0.2663808f, -0.6092452f, -1.4727812f, 0.0247187f, -0.0539688f, -0.0150413f, 0.2094955f, + 0.5379737f, -0.3255228f, -0.5639279f, 0.0786276f, 0.6703192f, 0.1557026f, -0.2753083f, 1.1463971f, + -0.9372965f, 0.5657740f, 0.0041413f, 0.0870248f, 0.0101520f, -0.8214461f, 0.1212932f, 1.5648646f}, + {-0.0969819f, 0.0137566f, 1.3515147f, -0.0155047f, -0.1416170f, -0.1636726f, 0.5184190f, 0.4732984f, + 0.6815788f, -1.0522166f, -0.4486531f, -0.0516016f, 0.0201894f, -0.0849667f, -0.0861271f, -1.2027841f, + 1.2458711f, -0.7061657f, 1.0381308f, -0.3450044f, -0.1300479f, -0.0828402f, 0.6859242f, -1.0575374f, + 0.6947553f, -0.0922188f, 0.0199132f, 0.8038982f, -0.1734094f, -0.1057449f, 1.6305015f, -0.0688597f}, + {-1.8151448f, 0.1024327f, 1.7063105f, 0.1130912f, -0.1081472f, -0.2904744f, -1.3465070f, -1.0455177f, + -0.4581082f, -3.2220871f, 0.5221398f, -5.1637673f, 0.0811146f, -0.1326323f, -0.0379338f, -3.0439703f, + -2.4246936f, -0.3670847f, -3.1256330f, -1.6595014f, -3.4715190f, -0.1526113f, -1.0420206f, 0.9536474f, + -3.2932863f, 1.6048199f, 0.0025162f, -3.6049840f, 0.0604250f, -2.2404826f, 1.8406851f, -3.1381185f}, + {1.2985691f, -1.1044264f, 0.9062797f, -0.0788333f, 0.2694912f, 0.0032800f, -0.0574267f, 0.9734111f, + 1.1532565f, 2.6786125f, -3.8574269f, -2.2871449f, -0.1261243f, 1.0545347f, -0.1454154f, -0.5609738f, + 1.8385800f, -0.8035598f, -1.7668265f, 5.1665063f, 0.7966110f, 0.0940206f, -2.3943975f, 2.3344002f, + 1.0342182f, 0.4806454f, -0.3880928f, 0.6998246f, 1.4011886f, -1.7313483f, 4.9702630f, -6.0058608f}, + {1.0300356f, 0.0616315f, -0.1113776f, -0.1694220f, 0.7159944f, 0.0626456f, 2.0994680f, 0.3452290f, + -3.0487001f, 0.0654031f, -1.1510723f, 0.5370992f, -0.0290704f, -0.0300795f, 0.0751569f, -0.2345951f, + -0.3472281f, 0.4424143f, 1.2444530f, -0.2114656f, 0.7865694f, -0.0709381f, -0.1839961f, -0.0529834f, + 0.5867608f, -3.8793530f, -0.0814745f, -0.6368676f, 0.0361213f, -0.5549288f, 0.5661780f, 1.8374584f}, + {0.3345098f, 0.0068199f, -0.4205509f, -0.1088801f, -0.1043202f, -0.0040804f, 0.3400922f, 0.2673528f, + -0.6050695f, 0.4443954f, -0.4319905f, -0.6044132f, -0.0260679f, 0.0137036f, 0.0765494f, -0.0095099f, + 0.5880439f, -0.0083854f, -0.2407522f, 0.1942379f, 0.6554548f, -0.1322891f, -0.8298992f, 0.7909554f, + 1.0528831f, 0.1970959f, 0.0754069f, -0.0947960f, -0.0279494f, -0.5888316f, 0.8919419f, 0.4828835f}, + {0.3995822f, -0.2139665f, 0.3982936f, -0.1285759f, -0.3445527f, -0.1167238f, -0.1263519f, 0.8393803f, + -0.7758383f, 0.0719291f, -0.0134762f, 0.1715237f, 0.0796666f, 0.1023507f, -0.1172728f, -1.2364722f, + 1.2592632f, -0.3168479f, 0.7487004f, -1.5170647f, -0.2235429f, -0.1620898f, 1.4064828f, -1.0821995f, + 0.0740103f, -1.0412805f, -0.0621277f, 0.2439800f, 0.2684972f, -1.1661061f, 0.7859434f, -0.6170313f}, + {2.1615884f, 0.1431713f, 0.0642652f, -0.0522325f, -0.2658786f, -0.0245810f, -1.6857448f, -0.6685011f, + -0.6978170f, -0.8716729f, 0.3129902f, -2.5870812f, -0.2855283f, -0.3205920f, -0.0084069f, 1.3182145f, + -0.6923816f, -0.3730274f, -2.3638811f, -1.1128502f, -2.4709859f, 0.1349022f, -0.3574466f, -0.6597407f, + -4.1122031f, 0.2240651f, 0.1806145f, -1.6836300f, -0.0766231f, -3.2611966f, 0.0091456f, -0.0997367f}, + {5.2476101f, -0.1966512f, 4.8935304f, -0.1551689f, 1.6919724f, -0.8324367f, 14.3318472f, -0.3503132f, + 10.3614969f, -9.1522884f, -0.2543063f, -1.8476851f, 16.7961140f, 9.9541416f, -0.0434563f, -9.6973553f, + -5.0469398f, 6.1688442f, 7.6429725f, -7.3149266f, 1.2345183f, 0.1412155f, 0.7114770f, -1.6378664f, + 5.1548996f, 0.3686100f, -45.3027611f, 3.0492647f, -37.3445892f, 2.7421410f, -2.7958770f, -25.2034016f}, + {1.4597454f, -1.0561740f, 0.9751291f, 0.0446527f, 0.3691662f, 0.1006782f, 0.1418435f, 0.8871480f, + 1.1603093f, 2.8034730f, -4.0856910f, -1.9786842f, -0.2206208f, 0.9539357f, 0.0868183f, -0.6811873f, + 1.9642411f, -0.8065316f, -2.0244894f, 5.2936082f, 0.6120632f, -0.1194160f, -2.3925939f, 2.5555069f, + 1.0149733f, 0.4607603f, -0.2197217f, 0.5703423f, 1.4049014f, -1.5900208f, 5.1645074f, -6.0569463f}, + {0.9000676f, -0.0028781f, -0.1967366f, 0.1039593f, 0.7993248f, 0.0655172f, 2.2296758f, 0.4391927f, + -3.0292840f, 0.0334536f, -1.1728534f, 0.3479103f, -0.1190938f, 0.0410203f, 0.1146637f, -0.2958017f, + -0.3240463f, 0.4361866f, 1.0564958f, -0.1989332f, 0.5194008f, -0.0628912f, -0.1733121f, -0.1255383f, + 0.5990249f, -3.7692382f, 0.0995128f, -0.7101220f, -0.0785123f, -0.3514554f, 0.6662078f, 2.0991604f}, + {0.1781942f, -0.1873588f, -0.4653996f, -0.0153059f, -0.1399561f, -0.0498718f, 0.4552556f, 0.2300792f, + -0.7682312f, 0.4342302f, -0.3787803f, -0.6089386f, -0.1049337f, 0.0395331f, 0.0220332f, 0.0114750f, + 0.4672548f, 0.1284784f, -0.2472819f, 0.2892784f, 0.4788667f, 0.0472555f, -0.6593549f, 0.6508777f, + 0.9286987f, 0.3043948f, -0.0635985f, 0.0814399f, -0.1168853f, -0.6688027f, 0.8876534f, 0.4865684f}, + {0.4024099f, 0.0480259f, 0.4588822f, -0.1793082f, -0.2151573f, -0.1871128f, -0.1502780f, 1.1011307f, + -0.9467706f, 0.2632496f, -0.1257263f, -0.0241331f, 0.2280627f, 0.0878608f, -0.1334262f, -1.1642927f, + 1.0943586f, -0.4799654f, 0.5981907f, -1.5051398f, -0.4235946f, 0.0012827f, 1.2342577f, -0.8281875f, + 0.2776567f, -1.0362227f, 0.0408372f, 0.1540821f, 0.1777556f, -1.2684357f, 0.8836584f, -0.4001710f}, + {2.1558056f, 0.2082023f, 0.0863442f, 0.0364868f, -0.3985825f, 0.0307202f, -1.8889453f, -0.5614714f, + -0.7311882f, -0.8075573f, 0.4895108f, -2.7770483f, -0.3121874f, -0.1671291f, -0.1281284f, 1.3212786f, + -0.5310181f, -0.1974759f, -2.6240873f, -0.8320529f, -2.3875966f, -0.0286360f, -0.6263188f, -0.6553424f, + -4.1658955f, -0.0601300f, 0.0946256f, -1.6795633f, -0.1251303f, -3.0974686f, 0.2412274f, -0.0687501f}, + {2.0523887f, -0.6387668f, 2.0633900f, -0.0550964f, 0.5181718f, -0.4202190f, 1.8569367f, 0.8295385f, + 0.8555872f, 2.4727983f, -0.2072828f, -1.9006120f, 0.5379534f, 0.4463673f, 0.1468820f, 0.4918649f, + -3.4016700f, 0.2884440f, -1.9418719f, 4.5157170f, -0.5160927f, -0.0199372f, 3.1353824f, -0.9863126f, + -1.5135859f, 0.7576568f, 0.6715558f, 2.7409093f, 0.9291748f, -0.3247162f, 1.8204515f, -8.9181070f}, + {-0.1428107f, -0.0829889f, 0.4213613f, 0.0225415f, 1.2238166f, 0.0477106f, 0.3031853f, -0.7466553f, + 2.0663500f, 0.7588379f, 0.3689216f, -0.2003786f, 0.1242338f, 0.1693589f, -0.0351716f, -0.0186597f, + -0.0189417f, 0.5468715f, -0.2862698f, -0.1311738f, 3.0747476f, -0.0310747f, 0.0943165f, 0.3139819f, + 0.6274695f, -1.8314874f, 0.0147495f, 0.3554756f, 0.3829916f, 0.4891713f, 0.1328600f, 1.0535098f}, + {0.0534900f, 0.1787969f, -0.0571320f, -0.0685673f, 0.1968977f, 0.0374476f, 0.7876674f, 0.0828491f, + 0.6444036f, -0.2203166f, -0.2383427f, 0.5397566f, 0.0106769f, -0.1230072f, -0.0135021f, -0.5691944f, + -1.5040319f, 0.0406933f, -0.0025478f, 0.9251419f, -1.7180276f, -0.1112956f, 1.4840862f, 0.0407115f, + -0.0100329f, 0.0583593f, -0.0110524f, 0.7431355f, -0.0971857f, -0.5501527f, -0.6371027f, -0.1935233f}, + {-0.6455778f, 0.2317368f, 0.9285696f, -0.1415854f, 0.0822560f, 0.2488030f, -2.6992166f, 0.0884904f, + 0.6735302f, -0.1467820f, 0.5641044f, 0.6436581f, 0.0818401f, -0.0336634f, -0.0729000f, -0.1206900f, + -2.5739892f, 0.5776953f, 0.9531668f, -1.2362405f, -0.0615577f, -0.0143544f, -2.7525210f, 1.3738545f, + 0.2751348f, -1.7463943f, -0.0020144f, 2.4814103f, 0.1716725f, -0.7055540f, -0.3474010f, 0.4482578f}, + {-0.2526205f, -0.7463821f, -3.6076138f, -0.1511098f, 0.1216256f, 0.0888247f, -1.0190924f, -1.3260181f, + -0.0443211f, -4.8911066f, -3.4385188f, -6.0057454f, 0.3340450f, 0.2997236f, -0.0907855f, 0.7500492f, + -0.4007562f, 1.9382039f, 0.5687234f, 2.6511824f, 4.7703862f, 0.0006749f, -0.0201394f, -3.5885489f, + -4.1518898f, 0.0807014f, -0.0584071f, -0.8100027f, 0.7697087f, -0.8038046f, -1.2945876f, -4.0110312f}, + {0.4337017f, -1.1532011f, 2.0740633f, 0.0271806f, 0.6654227f, 0.1012998f, -4.0791736f, 1.2631345f, + 1.9511020f, 2.3272331f, 1.2707534f, 1.6306664f, 0.4936035f, 0.8285242f, 0.0807625f, 3.8652387f, + 0.0281145f, 1.6877037f, 1.2557380f, -0.3036775f, 0.5604967f, 0.1551418f, -0.9599600f, -6.3067718f, + -0.6352320f, 0.8058553f, 0.3657880f, -2.0491202f, -0.3926269f, 2.5650854f, 1.3697821f, -8.3070078f}, + {5.1334143f, -0.0351738f, -0.4774780f, -0.0679726f, 1.4569254f, 0.0580191f, -0.3649136f, -0.2298838f, + -3.3826666f, -0.7392708f, -0.6036060f, -0.2612940f, -0.1877640f, -0.1145124f, -0.0042578f, -0.0311193f, + -0.0320479f, 0.5270581f, -0.4324475f, 0.2681437f, 4.7813129f, -0.0222701f, -0.0525629f, -0.2861001f, + -0.1251072f, 3.9112861f, 0.0045046f, -0.0426071f, -0.3299106f, -0.0686970f, -0.1602017f, -0.0070103f}, + {-0.6633690f, 0.0103367f, 0.5998458f, 0.1256577f, -0.0359184f, -0.0176820f, -0.6458368f, -0.0370536f, + 0.3542259f, 0.1394724f, 0.8255956f, 0.2501569f, 0.0320156f, -0.0256806f, 0.0277949f, 0.0036392f, + 0.2825173f, 0.1400358f, 1.0011463f, -0.6792242f, 0.0672508f, 0.0728705f, -0.1089695f, -1.0414587f, + -0.4135485f, 0.4293025f, -0.0041241f, -0.9564193f, 0.0314900f, 0.8658463f, -0.7734696f, -0.7610567f}, + {-0.0200122f, -0.0749178f, -1.5026549f, -0.0387432f, -0.0713735f, 0.1214790f, 1.8730290f, -0.0552839f, + -1.6867150f, 0.2282097f, 0.7161849f, -0.1018546f, -0.1092003f, 0.0365504f, -0.1326883f, 1.2310545f, + 0.1800210f, 0.7024739f, -2.9606545f, 1.2275347f, -0.2050014f, 0.0940569f, 0.4761694f, 0.8812068f, + -0.0083424f, -1.5406264f, 0.0061815f, -2.7606382f, 0.0248556f, 1.1086880f, -1.3608936f, 1.0795454f}, + {0.9734020f, 0.3905411f, -3.7008634f, 0.0013557f, 0.1649124f, 0.9935362f, 1.3489184f, 0.9505764f, + 0.7966231f, -0.1627246f, -2.5754328f, 1.4892205f, 0.8586300f, 0.6974363f, 0.1320204f, -0.7840260f, + 0.3121157f, 0.0966901f, 2.7447381f, 1.8256680f, 0.7229405f, -0.1723188f, 0.9145948f, -2.1376033f, + 0.5259342f, 0.0731194f, -0.2908303f, -0.2603913f, -0.2326528f, 3.6684167f, -0.2883157f, -2.8546307f}, + {-4.8917460f, 6.7944999f, -0.2255474f, 0.1051999f, 3.9000113f, 2.0624907f, 5.3019547f, 10.0209141f, + 1.1268179f, 2.2669628f, -6.5002980f, 1.8408583f, 5.3039579f, 2.2055962f, 0.1055369f, 1.7230233f, + 6.9605255f, 7.7025104f, 2.9880707f, -0.9274251f, -0.2287160f, -0.0206735f, 0.6885675f, 2.8179996f, + -7.1129837f, -1.3772345f, 3.8655453f, -5.9388318f, -0.0469947f, 7.2763596f, -6.3536129f, -17.0069847f}, + {1.8787041f, -0.9953383f, -1.4839923f, 0.1308209f, 0.3657510f, 0.3106483f, -1.4158971f, -6.7449651f, + 0.6553892f, -4.5046172f, -3.5489719f, 3.5363002f, 0.5454772f, 2.3521471f, 0.1612140f, -0.9744226f, + 0.6546553f, -2.7179255f, -1.7758157f, 0.3089439f, 1.7462813f, 0.1654593f, -0.2440207f, 3.9501827f, + 1.3750844f, 0.0596805f, -0.1977254f, 0.0264880f, 2.6396444f, 1.0816911f, 3.6413448f, -6.0299959f}, + {-4.1295738f, 0.1044480f, 0.2131937f, 0.0420826f, 0.5292229f, 0.0090477f, -0.0973486f, 0.9596778f, + 2.9579651f, -0.6364226f, -1.7556342f, 0.1539868f, -0.1273174f, -0.1348504f, 0.1257833f, -1.4168571f, + -1.0960362f, 0.0482449f, -1.4395387f, -0.2524115f, -2.9162085f, -0.0451428f, -0.4021681f, -0.5756381f, + 0.0515293f, -3.1996479f, -0.0007676f, -1.3878343f, -0.2864279f, -0.9579773f, -1.0999249f, 1.6500067f}, + {-2.4806111f, -6.8115449f, 3.2805641f, 0.1187415f, -0.9950783f, 6.2553434f, -1.6450261f, -6.1463733f, + 2.7507148f, 4.2995782f, 0.0461297f, -0.5417359f, 2.4306326f, -7.3530145f, 0.0698273f, -0.9394333f, + -1.3595498f, -7.5141478f, -1.4911395f, 3.2300410f, 0.1203540f, 0.0314884f, -2.0116949f, -0.8167119f, + 2.4133310f, 0.1920709f, 1.0619365f, 0.2459123f, 6.9166069f, -2.6384118f, 3.6829739f, -7.2385545f}, + {0.9408096f, 14.9067144f, 1.7709646f, 0.1105646f, -0.5600107f, -15.3188124f, -12.3718462f, -1.8893757f, + 13.6364670f, -5.7327847f, -14.1805468f, 1.0581509f, -14.2186184f, 14.8948650f, 0.0190344f, 5.4395180f, + 6.7243400f, 9.8468456f, 4.5144215f, -1.4551491f, 1.1032411f, -0.0317988f, 2.3398454f, -3.1671596f, + -7.7541409f, 1.1255593f, 6.7340465f, -4.4448423f, -9.1472626f, -3.1959128f, 4.4181323f, -2.7904994f}, + {-2.1621978f, -4.7202382f, 1.7378219f, 0.1417439f, -0.5000908f, 5.4468708f, 1.4260571f, -6.6136570f, + 1.5713804f, 3.4479704f, 2.7354901f, -0.7388076f, 5.4666147f, -3.8697338f, -0.1368596f, -2.7903373f, + -1.2043713f, -4.9554005f, 0.3324645f, 1.6767365f, 0.1156244f, -0.0326964f, -2.0945346f, -0.4590589f, + 3.0942657f, 0.0015020f, -6.2626700f, -0.3969755f, 0.7717427f, -1.9667094f, 2.9664171f, -11.9477053f}, + }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_2[32] = { + 9.8383608f, 3.6922295f, 3.5774977f, -4.4619012f, 6.5087032f, -0.9540017f, -0.5059246f, 0.0706402f, + 14.3396597f, -0.2771132f, -4.8409863f, -8.3581600f, -3.5078344f, 4.3287506f, -5.7808843f, 3.9264839f, + -2.1697845f, -0.0040514f, -0.2095029f, -6.8678174f, 1.7911285f, -0.4510343f, 1.2410443f, -4.5678806f, + -0.5693849f, 2.3320096f, 4.4606552f, -6.3771009f, -4.3149071f, -0.1905672f, -3.5726390f, -1.0744030f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_2[32][32] = { + {-0.0155548f, 0.0243339f, 0.0037967f, -0.2771824f, 0.0111955f, -0.0115980f, 0.0079653f, -2.9803498f, + -0.0061037f, -0.0956634f, 0.0332446f, 0.0179244f, -0.0080377f, -9.0180779f, 0.1720033f, 0.0350694f, + -0.0146588f, -0.2135506f, -0.3158041f, 1.3697664f, 0.0119146f, 0.0119120f, -0.0986927f, 0.0297492f, + 0.0355827f, -0.1196868f, -0.0745119f, 0.0281862f, -0.0422190f, -0.3069138f, -0.0477367f, -0.0550450f}, + {-1.7374619f, 1.4822800f, -2.1885235f, 1.8354234f, -0.5380136f, 1.6621803f, 0.6251035f, 0.1008954f, + -0.8387129f, -0.2063313f, 1.0661691f, -0.9799694f, -5.1710258f, -3.2260630f, -1.5073707f, -1.0792168f, + 1.8569958f, -0.2289213f, 0.0563821f, -1.6398847f, -4.1649504f, -2.7527378f, -0.0134577f, 3.0424533f, + 0.0364320f, 0.6762254f, -3.1551330f, 2.4888904f, 1.4757305f, -0.3141717f, -2.0126467f, -0.1675602f}, + {-0.9571826f, 0.0914152f, 0.0404339f, 0.2927902f, 0.2933607f, 0.0619171f, 0.0772318f, -1.3796169f, + -0.8194544f, -0.2179988f, -1.1241078f, -0.1443964f, 0.0559355f, -1.2914546f, -0.3445117f, 0.2031156f, + 0.0273864f, -0.0193422f, -0.2136522f, 0.0429592f, 0.0212854f, 0.0414394f, -1.1734651f, 0.0582848f, + 0.0136039f, -0.1892604f, 0.0764908f, -0.0130132f, -0.1272559f, -0.0818855f, -0.0408583f, -0.1563294f}, + {-0.0213695f, 0.0596942f, -0.0641309f, -0.0146449f, 0.0416586f, -0.0378931f, 0.1234860f, 0.1622967f, + 0.0794091f, -0.0639933f, -0.1030663f, 0.0579078f, 0.1050275f, -0.0136866f, 0.0149978f, 0.0876813f, + 0.0693554f, 0.1612417f, -0.0595916f, -0.1008234f, -0.0579058f, 0.0915138f, 0.1321436f, -0.1484535f, + -0.0920316f, -0.0024532f, -0.1045300f, 0.0924260f, 0.0277524f, -0.0287276f, -0.1271127f, 0.1164243f}, + {0.0713067f, 0.0198056f, -0.3023696f, -0.0025908f, -0.0085885f, -1.1157553f, 0.0236462f, -0.0704844f, + -0.0189257f, -0.0997382f, 0.3379845f, -0.1229390f, -0.0616165f, -0.8968034f, 0.0401445f, -0.1144476f, + -0.0532077f, 0.0604580f, 0.0609454f, -0.1613472f, 0.0103525f, -0.1653874f, 0.0205189f, 0.0758978f, + -0.1514593f, 0.0151441f, 0.2043469f, 0.0349607f, -0.1361278f, -0.1255922f, 0.0631648f, 0.3570991f}, + {0.3371337f, -3.7541580f, 2.2215877f, -0.3390516f, 0.1912718f, -4.1861577f, -1.2264019f, 2.8179801f, + 0.0667294f, -0.0093539f, 2.3029909f, 3.1814916f, 3.9780347f, 0.2310601f, 0.3986159f, -0.8544636f, + 0.4139664f, -0.1876569f, -0.2448732f, -2.8053334f, 4.0488625f, 2.1094146f, -6.7310257f, -4.9950023f, + -0.8315823f, 0.0555959f, 2.4573720f, -3.7234364f, -4.2910552f, -0.2995245f, -3.2605181f, 2.3620574f}, + {-1.5522735f, -0.1866350f, -0.0067679f, 0.3196557f, 1.4052233f, 2.8143549f, -0.9992948f, -0.5309914f, + -25.8852596f, -0.1218249f, 0.6625420f, 0.3007106f, -0.2767264f, -0.1847300f, -0.5313534f, -0.0383462f, + -0.1987552f, 0.0581405f, -0.3376078f, 1.2621028f, 0.0818709f, -0.1401216f, -0.4550788f, -0.1592657f, + 0.0597123f, 0.1344101f, -0.1005317f, -0.1538406f, 2.9142656f, -0.0806051f, -0.4267367f, -31.9512234f}, + {0.6859627f, 0.1212986f, 0.1291616f, 0.0459838f, -0.0899920f, 0.0287645f, 0.1987007f, -2.7079368f, + -0.2628384f, -0.1402464f, -0.6302179f, -0.2923960f, -0.1106663f, 0.8256195f, -2.8054097f, -0.0296494f, + -0.5632019f, -0.1335654f, -0.1558440f, -6.8611612f, 0.0203786f, 0.0046566f, -0.4401442f, -0.0471430f, + 0.4535986f, -0.8657981f, 0.0684740f, 0.0518814f, -0.0123748f, -0.2270164f, 0.0922878f, -0.3863277f}, + {0.0127175f, 2.3346109f, -0.4390767f, -0.4657893f, 0.1659466f, -0.1132782f, -0.4928388f, 0.7652873f, + 1.1510741f, -0.0879600f, 0.2721785f, -0.1878961f, -0.3477249f, -0.8473209f, -0.8931856f, -0.4328294f, + -11.9181929f, -0.0282545f, -0.0217915f, 1.6676594f, -0.2122232f, -0.6190930f, 1.9053432f, -0.7592348f, + -1.0739189f, -0.7170524f, 0.3864411f, -0.8849231f, 0.1393488f, 0.0738489f, 0.4460345f, 1.9020857f}, + {0.4453296f, -0.0767821f, 0.1638939f, 1.6997167f, -0.1098599f, -0.0551604f, 0.0040561f, -13.5290670f, + -0.1285677f, -0.0590394f, 0.6499141f, -0.7617344f, 0.0453151f, 0.3104213f, -1.0711143f, 0.1361838f, + -0.4365610f, -0.1300649f, 0.2013344f, -0.5308123f, 0.1451896f, 0.1030715f, -0.6487910f, -0.3136590f, + -0.0280079f, 0.5394178f, 0.1318262f, -0.0159292f, 0.0636870f, -0.3224248f, -0.1868187f, -0.2468304f}, + {-0.0333494f, -0.0834255f, -0.1221875f, 0.6861304f, 0.0521738f, -0.0416543f, -0.4437352f, -19.3246250f, + -0.1520821f, 0.0528602f, -0.6375434f, -0.5803806f, -0.0958465f, -2.0058544f, -0.8282642f, 0.0259000f, + 0.4846996f, 0.1211179f, 0.0356884f, 1.0009497f, 0.0635682f, -0.0314105f, -0.0011147f, 0.0131714f, + -0.3410152f, 0.2798154f, 0.0961889f, 0.1266228f, -0.0934717f, -0.0904307f, 0.1355542f, 0.5722573f}, + {0.2146454f, 0.2143834f, 0.1290650f, -0.9063646f, 0.2100945f, 0.1331054f, -0.2620614f, -0.1264993f, + 0.1313979f, 0.0455465f, -0.8395286f, -0.4967833f, -0.0538581f, 0.9155380f, 0.6627046f, 0.1691243f, + 0.9887002f, -0.1597013f, -0.1236713f, -1.9041336f, 0.0427585f, 0.0849747f, -5.2559652f, -0.3133100f, + 0.0141170f, -0.1635530f, 0.4938746f, 0.0162943f, 0.2107756f, -0.3413893f, -0.0657575f, 1.0542560f}, + {-2.8868380f, -2.0837426f, -1.0611480f, -0.6143807f, -0.6398501f, -2.8018746f, 0.5166737f, -1.0814301f, + -1.9272422f, -0.1017482f, -0.4651161f, -1.4021232f, 1.8854499f, 0.1815407f, 0.5965426f, -2.3344259f, + -0.0690846f, -0.1678239f, -0.4219488f, 0.6215640f, 1.0270095f, -0.3473049f, -0.3926674f, -0.7942593f, + 1.1305071f, -1.4621233f, -0.8051161f, -0.7698632f, -2.6038630f, -0.3090037f, -1.6365144f, -1.0179478f}, + {0.0046026f, 1.1319581f, -2.6405678f, -2.0353596f, -2.1687336f, 0.3364883f, 2.1122196f, 0.2584647f, + -2.4344857f, -0.0378498f, 0.6158544f, -0.6060749f, -4.9598379f, 0.1570698f, 2.2436838f, -2.6198347f, + -2.0935996f, -0.1845744f, -0.0716080f, -1.9338604f, -4.1995640f, -3.6706774f, -1.6762524f, 3.9646862f, + -0.9677961f, 1.8319578f, -3.1916575f, 3.7312632f, 0.0820446f, -0.0497568f, -0.0898171f, -0.2499462f}, + {-0.0780375f, -0.0286571f, 0.1007227f, 0.0012229f, -0.0531285f, 0.0840718f, 0.1013894f, 0.1312424f, + -0.0673772f, 0.1603183f, 0.0074385f, -0.0718321f, -0.1549873f, 0.1616689f, 0.0405887f, -0.1558588f, + 0.0740745f, 0.1696893f, -0.0064026f, -0.1656420f, -0.1186674f, -0.1262667f, -0.0784757f, -0.1280154f, + 0.0909976f, 0.0853046f, -0.1075811f, 0.1310615f, 0.0610194f, 0.0647223f, 0.1360559f, 0.0440074f}, + {-0.2106480f, 0.0087131f, 0.1119385f, -1.0611318f, 0.5250220f, 0.0525479f, -0.2733742f, -1.0799565f, + -0.5601607f, -0.0651806f, -1.9793440f, -0.3373334f, -0.1550518f, 0.8932216f, 0.7264332f, -0.0450735f, + 1.2373760f, -0.1236272f, 0.0680048f, -3.0446634f, -0.1533586f, -0.0127355f, -0.3326311f, -0.0225603f, + -0.2265739f, -2.3752897f, -0.3771705f, -0.0728938f, 0.1741305f, 0.1111639f, 0.4131119f, 0.2239323f}, + {-2.5691276f, -1.4011253f, -2.0640867f, -3.7236946f, 1.5542637f, -0.9456654f, -1.7575809f, 3.6794879f, + -0.4439790f, -0.1009826f, 3.6702275f, -0.1935008f, -0.4423219f, -0.3825364f, -0.4784791f, 0.5927492f, + -2.3482494f, 0.0801714f, -0.1567418f, -1.7934613f, -0.1706410f, -0.6326947f, 0.6260155f, 0.3631033f, + -0.9325932f, 1.9647995f, -1.3409088f, 1.3501998f, 0.0367797f, -0.1744210f, 1.8690013f, -1.0737898f}, + {-0.5934777f, 0.6232591f, -0.3391055f, 0.2640936f, -0.2824444f, 0.4815128f, 0.6625078f, -0.1103976f, + 0.9555223f, -0.0624896f, -0.6778919f, 0.1181502f, -0.5425385f, 0.7297349f, -1.7261271f, -0.2917557f, + 1.1873137f, -0.2725933f, 0.0975242f, 1.7756181f, -0.5735835f, -0.4453230f, 0.9800369f, 0.9344145f, + -1.8692539f, 0.0120440f, -0.7315661f, 0.6250805f, 0.3839143f, -0.0376306f, 0.3816243f, 0.6059195f}, + {0.5522162f, -1.8043815f, -10.9379101f, 0.5719097f, -0.2246755f, -1.4856353f, 0.4877502f, 0.7163438f, + -11.8135147f, -0.0180790f, -0.9928634f, 0.1107815f, -0.0005064f, -0.3824990f, -0.7453306f, -1.9909632f, + -7.4362645f, -0.0245507f, -0.1815712f, -3.5507584f, -0.0075889f, -11.0296011f, -1.1292133f, -0.0710276f, + 0.5675677f, 0.2017778f, -0.0684891f, -0.0367653f, -1.6674192f, 0.0281711f, -0.8356591f, -0.0447807f}, + {0.2537312f, -3.0178010f, -0.3493635f, 1.8573236f, 0.4017631f, 0.9912633f, -0.8625028f, -0.7783228f, + -1.7815375f, -0.1204695f, 1.8551122f, 0.3344182f, -0.2828701f, -1.3226960f, -1.4470471f, 0.2895959f, + 0.6780876f, -0.2010069f, 0.0425280f, -2.1786852f, -0.1274053f, -0.2549899f, -0.2233993f, -0.1561645f, + -0.4640818f, 0.6375850f, 0.7733670f, -0.2388286f, 1.0447853f, -0.1503223f, 0.3823584f, -13.8176088f}, + {0.2575197f, -2.2127593f, -0.0389457f, -0.0215759f, 0.1659477f, -0.0097748f, -0.1935415f, -0.9091369f, + -0.1453371f, 0.0442428f, -0.1206519f, 0.1435609f, -0.0186047f, -5.0154042f, 0.0538177f, 0.0403250f, + 0.0240955f, 0.0331080f, 0.0517951f, 0.7422639f, 0.0069818f, 0.0248351f, -0.2205741f, -0.0082387f, + 0.2043269f, 0.0459435f, 0.0876343f, 0.0140607f, 0.1056308f, 0.0062555f, 0.0184278f, -0.5539715f}, + {-0.0398742f, 0.1075264f, 0.1725024f, -0.0755192f, -0.0360048f, 0.1325573f, 0.0903103f, -0.0882263f, + 0.1207692f, 0.0032722f, 0.0048489f, -0.1257241f, 0.1450990f, -0.0713558f, 0.1116815f, 0.1107689f, + -0.1447252f, 0.1581838f, -0.0160124f, -0.0425587f, 0.1411217f, 0.0865060f, -0.0643460f, -0.0431262f, + -0.1452804f, -0.0195101f, 0.1234572f, 0.0520887f, 0.1117576f, -0.0751791f, 0.1511539f, 0.1224861f}, + {0.7728126f, 2.3075340f, -0.0385258f, -3.1270287f, 0.9414487f, 3.5251477f, -0.8043440f, 0.7212446f, + -7.6850162f, -0.1609414f, -3.7687578f, -1.0751100f, -0.2052089f, 5.0728245f, 2.2835267f, 0.5930225f, + 0.1303335f, -0.1428799f, -0.3715075f, 0.5136011f, -0.4755619f, -0.2192461f, -3.8696294f, -0.0062392f, + -1.3774812f, -0.0034140f, -1.5944362f, 0.9773729f, 3.2859125f, -0.1616932f, -1.2785367f, -13.5732412f}, + {0.5535743f, 0.1461481f, -0.2218016f, -0.2971808f, -0.2169309f, 0.1564545f, -0.0390397f, 1.1558976f, + -0.0119933f, -0.0774637f, 1.1907971f, -0.5127968f, -0.0066028f, -1.6794037f, -0.3650940f, 0.2555613f, + -0.9488379f, 0.0449603f, -0.1620417f, 0.1583214f, 0.0000908f, 0.0152763f, -1.0660053f, -0.0139402f, + -1.7440189f, 0.2515209f, 0.3333162f, 0.1904725f, 0.1116094f, -0.2287960f, -0.0007165f, -1.7047704f}, + {-5.9897852f, -0.1316296f, -0.0218074f, -0.4602887f, 0.3288545f, -0.0882939f, -0.5929499f, 0.4294790f, + -0.0383545f, 0.0556869f, 0.1975944f, 0.1341491f, 0.0629570f, -2.2742157f, 0.0175826f, -0.1439869f, + -24.8701649f, -0.1582915f, -0.2460304f, -3.9643264f, 0.0863483f, 0.0180861f, -0.2210452f, -0.0868723f, + -0.4175525f, -0.8231756f, 0.0247534f, -0.1473545f, -0.0021330f, -0.0410253f, -1.1944869f, -1.1523768f}, + {0.1031547f, -3.3402514f, -4.3636522f, -0.1534714f, -0.0622189f, 0.0374694f, -0.0870097f, -4.1865788f, + -0.0555377f, 0.0252329f, 0.1339467f, 0.0461691f, -0.0503090f, 0.0289890f, -0.0095674f, -0.3289992f, + -0.0279080f, 0.0274977f, -0.0903500f, 0.5610157f, -0.0478177f, 0.4346960f, 0.4822784f, -0.1058945f, + -0.2026870f, -0.0560638f, 0.0910069f, -0.0818529f, 0.0819198f, -0.0292193f, 0.3040628f, -0.1275230f}, + {-5.8789845f, -17.1114635f, -4.6755161f, 0.1016624f, -0.8685016f, -0.3898779f, -2.3363957f, 0.1413794f, + -2.4254086f, -0.2171030f, -0.0901150f, 0.7058705f, 0.4166250f, -0.0231085f, -0.1789686f, -9.4244318f, + -0.6418229f, -0.0857969f, 0.1683681f, -0.0310597f, -0.0247807f, -5.3748040f, -7.4730940f, 0.1019564f, + -1.2126822f, -0.3726285f, -1.0287101f, 0.1803891f, -0.2227769f, -0.0791530f, -0.0159770f, -1.4883354f}, + {-17.9394970f, -0.5228514f, -11.3547935f, -0.0672671f, -2.0371394f, -0.9076943f, 2.4331825f, -6.9409127f, + 0.8286008f, 0.0208618f, -0.8009814f, 1.2268484f, 0.1943726f, -1.7297083f, -0.7668949f, -6.5505466f, + -0.6495168f, -0.0404727f, -0.1260914f, -3.5029383f, -0.0852898f, -2.9679556f, 1.6404767f, -0.0251449f, + 1.1460075f, -0.7877688f, -0.0586593f, -0.4741839f, -1.7420560f, 0.0295600f, -2.3574052f, 0.0974777f}, + {0.4443443f, 0.6384261f, 1.3317494f, -1.0085982f, 0.9508762f, 1.3168396f, -0.1862490f, -0.1801148f, + 1.1106120f, -0.0654911f, 0.1186706f, -0.7198273f, 0.5449172f, -0.5886080f, 0.7504217f, 1.8046317f, + -0.1294390f, -0.1939137f, -0.2383934f, 0.4131435f, 0.6910310f, 1.2821866f, -0.1088722f, -0.5660405f, + -0.1188610f, 0.0364403f, 0.3597929f, -0.6409024f, 1.2114668f, -0.0212278f, 0.8423592f, 0.4848156f}, + {-0.8772649f, -13.5265112f, -4.5540547f, -0.2856667f, 0.7604876f, -0.6829260f, -0.8320626f, 0.6541347f, + 0.4020181f, 0.0009324f, -10.9660740f, -0.3540186f, -0.2316812f, 0.3576394f, 0.0998953f, -1.5738430f, + 1.2089975f, 0.0706465f, -0.2538019f, 0.7016497f, -0.0282650f, -3.1291001f, -0.4375663f, -0.3979468f, + -0.1588882f, 0.3978875f, 0.2038192f, -0.4281644f, -0.5787544f, -0.0922198f, 0.9595569f, 0.0212818f}, + {0.3392667f, 0.1170919f, -0.0705636f, -0.1025443f, -0.1192213f, -0.0495686f, 0.0284667f, -0.1226804f, + 0.0050191f, -0.0516545f, -1.0892097f, 0.0033689f, 0.0471462f, 1.4266804f, 0.0288870f, -0.0110408f, + -1.1283765f, -0.1299917f, -0.4318301f, -0.9854419f, -0.0190479f, -0.0269406f, 0.3697925f, -0.0757695f, + -0.3632923f, -0.1714077f, 0.0669245f, 0.0557428f, -0.1713906f, -0.4307863f, -0.1749060f, -2.1246362f}, + {0.8383662f, -3.8122442f, 0.1568939f, -2.2105119f, -0.7086993f, -0.4664145f, -0.3578597f, 0.5554636f, + 0.6965880f, -0.1506968f, 0.2646832f, 0.2874083f, 0.1901203f, -2.4997077f, -0.3519035f, -0.0518054f, + 1.0862818f, -0.2502540f, -0.3133347f, -0.7411230f, 0.1268138f, 0.1069811f, -0.8109779f, 0.0264679f, + 0.1604289f, -0.7534032f, -0.1419461f, 0.0688303f, -0.1570919f, -0.3055144f, -0.7415189f, 2.5547018f}, + }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_4[1] = {1.4616280f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_4[32][1] = { + {0.0609813f}, {0.0685224f}, {0.1655236f}, {-0.0599842f}, {0.0669006f}, {-0.1817371f}, {-0.0539167f}, + {-0.0737955f}, {0.0654664f}, {0.0302955f}, {-0.0586768f}, {0.0717433f}, {0.1472274f}, {-0.0610073f}, + {-0.0601061f}, {0.2086218f}, {-0.0545418f}, {-0.0388369f}, {-0.0613536f}, {-0.1141072f}, {-0.2289097f}, + {-0.3354485f}, {0.0831025f}, {0.1333673f}, {0.0490410f}, {0.0484894f}, {0.0436755f}, {-0.1479877f}, + {0.1540713f}, {0.0021261f}, {-0.0845848f}, {-0.0564973f}, + }; -} //namespace lst::t5dnn + } // namespace t5dnn +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h index 0e17185104c74..81e4358ab30d6 100644 --- a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h +++ b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h @@ -3,7 +3,7 @@ #include "RecoTracker/LSTCore/interface/Constants.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct ObjectRanges { int* hitRanges; @@ -150,5 +150,5 @@ namespace lst { void setData(ObjectRangesBuffer& buf) { data_.setData(buf); } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index 12161acc08de0..1ecc256887c77 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -11,7 +11,7 @@ #include "Quintuplet.h" #include "PixelTriplet.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct PixelQuintuplets { unsigned int* pixelIndices; unsigned int* T5Indices; @@ -106,11 +106,11 @@ namespace lst { inline void setData(PixelQuintupletsBuffer& buf) { data_.setData(buf); } }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelQuintupletToMemory(lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Quintuplets const& quintupletsInGPU, - lst::PixelQuintuplets& pixelQuintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelQuintupletToMemory(Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Quintuplets const& quintupletsInGPU, + PixelQuintuplets& pixelQuintupletsInGPU, unsigned int pixelIndex, unsigned int T5Index, unsigned int pixelQuintupletIndex, @@ -202,7 +202,7 @@ namespace lst { pixelQuintupletsInGPU.rPhiChiSquaredInwards[pixelQuintupletIndex] = rPhiChiSquaredInwards; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -210,25 +210,25 @@ namespace lst { uint16_t lowerModuleIndex5, float rzChiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); const int layer4 = modulesInGPU.layers[lowerModuleIndex4] + - 6 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex4] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex4] == ::lst::TwoS); const int layer5 = modulesInGPU.layers[lowerModuleIndex5] + - 6 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex5] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex5] == ::lst::TwoS); if (layer1 == 1 and layer2 == 2 and layer3 == 3) { if (layer4 == 12 and layer5 == 13) { @@ -292,7 +292,7 @@ namespace lst { return true; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -300,25 +300,25 @@ namespace lst { uint16_t lowerModuleIndex5, float rPhiChiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); const int layer4 = modulesInGPU.layers[lowerModuleIndex4] + - 6 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex4] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex4] == ::lst::TwoS); const int layer5 = modulesInGPU.layers[lowerModuleIndex5] + - 6 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex5] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex5] == ::lst::TwoS); if (layer1 == 1 and layer2 == 2 and layer3 == 3) { if (layer4 == 12 and layer5 == 13) { @@ -401,8 +401,8 @@ namespace lst { float chiSquared = 0.f; float absArctanSlope, angleM, xPrime, yPrime, sigma2; for (size_t i = 0; i < nPoints; i++) { - absArctanSlope = ((slopes[i] != lst::lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) - : 0.5f * float(M_PI)); + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); if (xs[i] > 0 and ys[i] > 0) { angleM = 0.5f * float(M_PI) - absArctanSlope; } else if (xs[i] < 0 and ys[i] > 0) { @@ -430,7 +430,7 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression_pT5(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, const uint16_t* lowerModuleIndices, float* delta1, float* delta2, @@ -446,7 +446,7 @@ namespace lst { need not always be a PS strip module, but all non-anchor hits sit on strip modules. */ - ModuleType moduleType; + ::lst::ModuleType moduleType; short moduleSubdet, moduleSide; float inv1 = kWidthPS / kWidth2S; float inv2 = kPixelPSZpitch / kWidth2S; @@ -458,21 +458,21 @@ namespace lst { const float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; //category 1 - barrel PS flat - if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide == ::lst::Center) { delta1[i] = inv1; delta2[i] = inv1; slopes[i] = -999.f; isFlat[i] = true; } //category 2 - barrel 2S - else if (moduleSubdet == Barrel and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::TwoS) { delta1[i] = 1.f; delta2[i] = 1.f; slopes[i] = -999.f; isFlat[i] = true; } //category 3 - barrel PS tilted - else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide != ::lst::Center) { delta1[i] = inv1; isFlat[i] = false; @@ -483,7 +483,7 @@ namespace lst { } } //category 4 - endcap PS - else if (moduleSubdet == Endcap and moduleType == PS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::PS) { delta1[i] = inv1; isFlat[i] = false; /* @@ -498,7 +498,7 @@ namespace lst { } } //category 5 - endcap 2S - else if (moduleSubdet == Endcap and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::TwoS) { delta1[i] = 1.f; delta2[i] = 500.f * inv1; isFlat[i] = false; @@ -516,7 +516,7 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquared(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, uint16_t* lowerModuleIndices, float g, float f, @@ -552,7 +552,7 @@ namespace lst { return chiSquared; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredInwardsCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredInwardsCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -560,25 +560,25 @@ namespace lst { uint16_t lowerModuleIndex5, float rPhiChiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); const int layer4 = modulesInGPU.layers[lowerModuleIndex4] + - 6 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex4] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex4] == ::lst::TwoS); const int layer5 = modulesInGPU.layers[lowerModuleIndex5] + - 6 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex5] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex5] == ::lst::TwoS); if (layer1 == 1 and layer2 == 2 and layer3 == 3) { if (layer4 == 12 and layer5 == 13) { @@ -642,14 +642,58 @@ namespace lst { return true; } + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RZChiSquared(TAcc const& acc, + Modules const& modulesInGPU, + uint16_t* lowerModuleIndices, + float* rtPix, + float* zPix, + float* rts, + float* zs) { + //use the two anchor hits of the pixel segment to compute the slope + //then compute the pseudo chi squared of the five outer hits + + float slope = (zPix[1] - zPix[0]) / (rtPix[1] - rtPix[0]); + float residual = 0; + float error2 = 0; + //hardcoded array indices!!! + float RMSE = 0; + for (size_t i = 0; i < Params_T5::kLayers; i++) { + uint16_t& lowerModuleIndex = lowerModuleIndices[i]; + const int moduleType = modulesInGPU.moduleType[lowerModuleIndex]; + const int moduleSide = modulesInGPU.sides[lowerModuleIndex]; + const int moduleSubdet = modulesInGPU.subdets[lowerModuleIndex]; + + residual = (moduleSubdet == ::lst::Barrel) ? (zs[i] - zPix[0]) - slope * (rts[i] - rtPix[0]) + : (rts[i] - rtPix[0]) - (zs[i] - zPix[0]) / slope; + const float& drdz = modulesInGPU.drdzs[lowerModuleIndex]; + //PS Modules + if (moduleType == 0) { + error2 = kPixelPSZpitch * kPixelPSZpitch; + } else //2S modules + { + error2 = kStrip2SZpitch * kStrip2SZpitch; + } + + //special dispensation to tilted PS modules! + if (moduleType == 0 and moduleSubdet == ::lst::Barrel and moduleSide != ::lst::Center) { + error2 /= (1.f + drdz * drdz); + } + RMSE += (residual * residual) / error2; + } + + RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. + return RMSE; + } + template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelQuintupletDefaultAlgo(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::ObjectRanges const& rangesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets const& tripletsInGPU, - lst::Quintuplets const& quintupletsInGPU, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, + Quintuplets const& quintupletsInGPU, unsigned int pixelSegmentIndex, unsigned int quintupletIndex, float& rzChiSquared, @@ -788,63 +832,19 @@ namespace lst { return true; } - template - ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RZChiSquared(TAcc const& acc, - lst::Modules const& modulesInGPU, - uint16_t* lowerModuleIndices, - float* rtPix, - float* zPix, - float* rts, - float* zs) { - //use the two anchor hits of the pixel segment to compute the slope - //then compute the pseudo chi squared of the five outer hits - - float slope = (zPix[1] - zPix[0]) / (rtPix[1] - rtPix[0]); - float residual = 0; - float error2 = 0; - //hardcoded array indices!!! - float RMSE = 0; - for (size_t i = 0; i < Params_T5::kLayers; i++) { - uint16_t& lowerModuleIndex = lowerModuleIndices[i]; - const int moduleType = modulesInGPU.moduleType[lowerModuleIndex]; - const int moduleSide = modulesInGPU.sides[lowerModuleIndex]; - const int moduleSubdet = modulesInGPU.subdets[lowerModuleIndex]; - - residual = (moduleSubdet == lst::Barrel) ? (zs[i] - zPix[0]) - slope * (rts[i] - rtPix[0]) - : (rts[i] - rtPix[0]) - (zs[i] - zPix[0]) / slope; - const float& drdz = modulesInGPU.drdzs[lowerModuleIndex]; - //PS Modules - if (moduleType == 0) { - error2 = kPixelPSZpitch * kPixelPSZpitch; - } else //2S modules - { - error2 = kStrip2SZpitch * kStrip2SZpitch; - } - - //special dispensation to tilted PS modules! - if (moduleType == 0 and moduleSubdet == lst::Barrel and moduleSide != Center) { - error2 /= (1.f + drdz * drdz); - } - RMSE += (residual * residual) / error2; - } - - RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. - return RMSE; - } - struct CreatePixelQuintupletsInGPUFromMapv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, - lst::Triplets tripletsInGPU, - lst::Quintuplets quintupletsInGPU, - lst::PixelQuintuplets pixelQuintupletsInGPU, + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + Quintuplets quintupletsInGPU, + PixelQuintuplets pixelQuintupletsInGPU, unsigned int* connectedPixelSize, unsigned int* connectedPixelIndex, unsigned int nPixelSegments, - lst::ObjectRanges rangesInGPU) const { + ObjectRanges rangesInGPU) const { auto const globalBlockIdx = alpaka::getIdx(acc); auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridBlockExtent = alpaka::getWorkDiv(acc); @@ -858,7 +858,7 @@ namespace lst { uint16_t quintupletLowerModuleIndex = modulesInGPU.connectedPixels[iLSModule]; if (quintupletLowerModuleIndex >= *modulesInGPU.nLowerModules) continue; - if (modulesInGPU.moduleType[quintupletLowerModuleIndex] == lst::TwoS) + if (modulesInGPU.moduleType[quintupletLowerModuleIndex] == ::lst::TwoS) continue; uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules; if (segmentsInGPU.isDup[i_pLS]) @@ -942,5 +942,5 @@ namespace lst { } // end i_pLS } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 0c78efcafc87f..710c760fb809f 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -11,7 +11,7 @@ #include "ObjectRanges.h" #include "Quintuplet.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // One pixel segment, one outer tracker triplet! struct PixelTriplets { unsigned int* pixelSegmentIndices; @@ -129,10 +129,10 @@ namespace lst { inline void setData(PixelTripletsBuffer& buf) { data_.setData(buf); } }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelTripletToMemory(lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets const& tripletsInGPU, - lst::PixelTriplets& pixelTripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelTripletToMemory(MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, + PixelTriplets& pixelTripletsInGPU, unsigned int pixelSegmentIndex, unsigned int tripletIndex, float pixelRadius, @@ -210,10 +210,10 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTrackletDefaultAlgopT3(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::ObjectRanges const& rangesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t pixelLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -228,8 +228,8 @@ namespace lst { unsigned int thirdMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * outerSegmentIndex]; unsigned int fourthMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * outerSegmentIndex + 1]; - if (outerInnerLowerModuleSubdet == lst::Barrel and - (outerOuterLowerModuleSubdet == lst::Barrel or outerOuterLowerModuleSubdet == lst::Endcap)) { + if (outerInnerLowerModuleSubdet == ::lst::Barrel and + (outerOuterLowerModuleSubdet == ::lst::Barrel or outerOuterLowerModuleSubdet == ::lst::Endcap)) { return runTripletDefaultAlgoPPBB(acc, modulesInGPU, rangesInGPU, @@ -244,7 +244,7 @@ namespace lst { secondMDIndex, thirdMDIndex, fourthMDIndex); - } else if (outerInnerLowerModuleSubdet == lst::Endcap and outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (outerInnerLowerModuleSubdet == ::lst::Endcap and outerOuterLowerModuleSubdet == ::lst::Endcap) { return runTripletDefaultAlgoPPEE(acc, modulesInGPU, rangesInGPU, @@ -263,23 +263,23 @@ namespace lst { return false; }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RZChiSquaredCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RZChiSquaredCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, float rzChiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); if (layer1 == 8 and layer2 == 9 and layer3 == 10) { return rzChiSquared < 13.6067f; @@ -335,8 +335,8 @@ namespace lst { float chiSquared = 0.f; float absArctanSlope, angleM, xPrime, yPrime, sigma2; for (size_t i = 0; i < nPoints; i++) { - absArctanSlope = ((slopes[i] != lst::lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) - : 0.5f * float(M_PI)); + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); if (xs[i] > 0 and ys[i] > 0) { angleM = 0.5f * float(M_PI) - absArctanSlope; } else if (xs[i] < 0 and ys[i] > 0) { @@ -366,7 +366,7 @@ namespace lst { //TODO: merge this one and the pT5 function later into a single function template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RPhiChiSquared(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, uint16_t* lowerModuleIndices, float g, float f, @@ -379,33 +379,33 @@ namespace lst { float inv1 = kWidthPS / kWidth2S; float inv2 = kPixelPSZpitch / kWidth2S; for (size_t i = 0; i < 3; i++) { - ModuleType moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; + ::lst::ModuleType moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; short moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]]; short moduleSide = modulesInGPU.sides[lowerModuleIndices[i]]; float drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; //category 1 - barrel PS flat - if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide == ::lst::Center) { delta1[i] = inv1; delta2[i] = inv1; slopes[i] = -999; isFlat[i] = true; } //category 2 - barrel 2S - else if (moduleSubdet == Barrel and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::TwoS) { delta1[i] = 1; delta2[i] = 1; slopes[i] = -999; isFlat[i] = true; } //category 3 - barrel PS tilted - else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide != ::lst::Center) { delta1[i] = inv1; isFlat[i] = false; delta2[i] = (inv2 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); } //category 4 - endcap PS - else if (moduleSubdet == Endcap and moduleType == PS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::PS) { delta1[i] = inv1; isFlat[i] = false; @@ -416,7 +416,7 @@ namespace lst { delta2[i] = inv2; } //category 5 - endcap 2S - else if (moduleSubdet == Endcap and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::TwoS) { delta1[i] = 1; delta2[i] = 500 * inv1; isFlat[i] = false; @@ -447,23 +447,23 @@ namespace lst { }; //90pc threshold - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, float chiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); if (layer1 == 8 and layer2 == 9 and layer3 == 10) { return chiSquared < 7.003f; @@ -494,23 +494,23 @@ namespace lst { return true; }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredInwardsCuts(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredInwardsCuts(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, float chiSquared) { const int layer1 = modulesInGPU.layers[lowerModuleIndex1] + - 6 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS); const int layer2 = modulesInGPU.layers[lowerModuleIndex2] + - 6 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS); const int layer3 = modulesInGPU.layers[lowerModuleIndex3] + - 6 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap) + - 5 * (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS); + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS); if (layer1 == 7 and layer2 == 8 and layer3 == 9) // endcap layer 1,2,3, ps { @@ -663,18 +663,18 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterion(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, float pixelRadius, float pixelRadiusError, float tripletRadius, int16_t lowerModuleIndex, uint16_t middleModuleIndex, uint16_t upperModuleIndex) { - if (modulesInGPU.subdets[lowerModuleIndex] == lst::Endcap) { + if (modulesInGPU.subdets[lowerModuleIndex] == ::lst::Endcap) { return passRadiusCriterionEEE(acc, pixelRadius, pixelRadiusError, tripletRadius); - } else if (modulesInGPU.subdets[middleModuleIndex] == lst::Endcap) { + } else if (modulesInGPU.subdets[middleModuleIndex] == ::lst::Endcap) { return passRadiusCriterionBEE(acc, pixelRadius, pixelRadiusError, tripletRadius); - } else if (modulesInGPU.subdets[upperModuleIndex] == lst::Endcap) { + } else if (modulesInGPU.subdets[upperModuleIndex] == ::lst::Endcap) { return passRadiusCriterionBBE(acc, pixelRadius, pixelRadiusError, tripletRadius); } else { return passRadiusCriterionBBB(acc, pixelRadius, pixelRadiusError, tripletRadius); @@ -683,7 +683,7 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RZChiSquared(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, const uint16_t* lowerModuleIndices, const float* rtPix, const float* xPix, @@ -724,14 +724,14 @@ namespace lst { float p = alpaka::math::sqrt(acc, Px * Px + Py * Py + Pz * Pz); float rou = a / p; - if (moduleSubdet == lst::Endcap) { + if (moduleSubdet == ::lst::Endcap) { float s = (zsi - z1) * p / Pz; float x = x1 + Px / a * alpaka::math::sin(acc, rou * s) - Py / a * (1 - alpaka::math::cos(acc, rou * s)); float y = y1 + Py / a * alpaka::math::sin(acc, rou * s) + Px / a * (1 - alpaka::math::cos(acc, rou * s)); diffr = alpaka::math::abs(acc, rtsi - alpaka::math::sqrt(acc, x * x + y * y)) * 100; } - if (moduleSubdet == lst::Barrel) { + if (moduleSubdet == ::lst::Barrel) { float paraA = r1 * r1 + 2 * (Px * Px + Py * Py) / (a * a) + 2 * (y1 * Px - x1 * Py) / a - rtsi * rtsi; float paraB = 2 * (x1 * Px + y1 * Py) / a; float paraC = 2 * (y1 * Px - x1 * Py) / a + 2 * (Px * Px + Py * Py) / (a * a); @@ -747,7 +747,7 @@ namespace lst { diffz = alpaka::math::min(acc, diffz1, diffz2); } - residual = moduleSubdet == lst::Barrel ? diffz : diffr; + residual = moduleSubdet == ::lst::Barrel ? diffz : diffr; //PS Modules if (moduleType == 0) { @@ -758,7 +758,7 @@ namespace lst { } //special dispensation to tilted PS modules! - if (moduleType == 0 and moduleSubdet == lst::Barrel and moduleSide != Center) { + if (moduleType == 0 and moduleSubdet == ::lst::Barrel and moduleSide != ::lst::Center) { float drdz = modulesInGPU.drdzs[lowerModuleIndex]; error2 /= (1 + drdz * drdz); } @@ -772,11 +772,11 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTripletDefaultAlgo(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::ObjectRanges const& rangesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets const& tripletsInGPU, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, unsigned int pixelSegmentIndex, unsigned int tripletIndex, float& pixelRadius, @@ -928,12 +928,12 @@ namespace lst { struct CreatePixelTripletsInGPUFromMapv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, - lst::Triplets tripletsInGPU, - lst::PixelTriplets pixelTripletsInGPU, + Modules modulesInGPU, + ObjectRanges rangesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + PixelTriplets pixelTripletsInGPU, unsigned int* connectedPixelSize, unsigned int* connectedPixelIndex, unsigned int nPixelSegments) const { @@ -959,7 +959,7 @@ namespace lst { } #endif //Removes 2S-2S :FIXME: filter these out in the pixel map - if (modulesInGPU.moduleType[tripletLowerModuleIndex] == lst::TwoS) + if (modulesInGPU.moduleType[tripletLowerModuleIndex] == ::lst::TwoS) continue; uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules; @@ -990,7 +990,7 @@ namespace lst { outerTripletArrayIndex += gridThreadExtent[2]) { unsigned int outerTripletIndex = rangesInGPU.tripletModuleIndices[tripletLowerModuleIndex] + outerTripletArrayIndex; - if (modulesInGPU.moduleType[tripletsInGPU.lowerModuleIndices[3 * outerTripletIndex + 1]] == lst::TwoS) + if (modulesInGPU.moduleType[tripletsInGPU.lowerModuleIndices[3 * outerTripletIndex + 1]] == ::lst::TwoS) continue; //REMOVES PS-2S if (tripletsInGPU.partOfPT5[outerTripletIndex]) @@ -1076,33 +1076,30 @@ namespace lst { betaOut += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaOut); return; } if (betaIn * betaOut > 0.f and - (alpaka::math::abs(acc, pt_beta) < 4.f * lst::kPt_betaMax or + (alpaka::math::abs(acc, pt_beta) < 4.f * kPt_betaMax or (lIn >= 11 and alpaka::math::abs(acc, pt_beta) < - 8.f * lst::kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap + 8.f * kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap { const float betaInUpd = - betaIn + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), - betaIn); //FIXME: need a faster version + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version const float betaOutUpd = - betaOut + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), - betaOut); //FIXME: need a faster version + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); //FIXME: need a faster version betaAv = 0.5f * (betaInUpd + betaOutUpd); //1st update @@ -1111,69 +1108,65 @@ namespace lst { betaIn += alpaka::math::copysign( acc, - alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * lst::k2Rinv1GeVf * pt_beta_inv, lst::kSinAlphaMax)), + alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), betaIn); //FIXME: need a faster version betaOut += alpaka::math::copysign( acc, - alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf * pt_beta_inv, lst::kSinAlphaMax)), + alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), betaOut); //FIXME: need a faster version //update the av and pt betaAv = 0.5f * (betaIn + betaOut); //2nd update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } else if (lIn < 11 && alpaka::math::abs(acc, betaOut) < 0.2f * alpaka::math::abs(acc, betaIn) && - alpaka::math::abs(acc, pt_beta) < 12.f * lst::kPt_betaMax) //use betaIn sign as ref + alpaka::math::abs(acc, pt_beta) < 12.f * kPt_betaMax) //use betaIn sign as ref { const float pt_betaIn = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaIn); const float betaInUpd = - betaIn + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), lst::kSinAlphaMax)), - betaIn); //FIXME: need a faster version + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version const float betaOutUpd = betaOut + alpaka::math::copysign( acc, alpaka::math::asin( acc, - alpaka::math::min( - acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), lst::kSinAlphaMax)), + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), betaIn); //FIXME: need a faster version betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) ? (0.5f * (betaInUpd + betaOutUpd)) : betaInUpd; //1st update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate betaIn += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaIn); //FIXME: need a faster version betaOut += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaIn); //FIXME: need a faster version //update the av and pt betaAv = 0.5f * (betaIn + betaOut); //2nd update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::ObjectRanges const& rangesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t pixelModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -1185,7 +1178,7 @@ namespace lst { unsigned int fourthMDIndex) { float dPhi, betaIn, betaOut, pt_beta, zLo, zHi, zLoPointed, zHiPointed, dPhiCut, betaOutCut; - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS); + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS); float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; float rt_InUp = mdsInGPU.anchorRt[secondMDIndex]; @@ -1207,7 +1200,7 @@ namespace lst { float rt_InOut = rt_InUp; - if (alpaka::math::abs(acc, lst::deltaPhi(acc, x_InUp, y_InUp, x_OutLo, y_OutLo)) > 0.5f * float(M_PI)) + if (alpaka::math::abs(acc, deltaPhi(acc, x_InUp, y_InUp, x_OutLo, y_OutLo)) > 0.5f * float(M_PI)) return false; unsigned int pixelSegmentArrayIndex = innerSegmentIndex - rangesInGPU.segmentModuleIndices[pixelModuleIndex]; @@ -1281,7 +1274,7 @@ namespace lst { float diffX = x_OutLo - x_InLo; float diffY = y_OutLo - y_InLo; - dPhi = lst::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; @@ -1291,11 +1284,11 @@ namespace lst { float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); - bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == lst::Endcap and - modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::TwoS; + bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == ::lst::Endcap and + modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::TwoS; float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; - alpha_OutUp = lst::deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + alpha_OutUp = deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); alpha_OutUp_highEdge = alpha_OutUp; alpha_OutUp_lowEdge = alpha_OutUp; @@ -1309,42 +1302,42 @@ namespace lst { float tl_axis_lowEdge_x = tl_axis_x; float tl_axis_lowEdge_y = tl_axis_y; - betaIn = -lst::deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + betaIn = -deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); float betaInRHmin = betaIn; float betaInRHmax = betaIn; - betaOut = -alpha_OutUp + lst::deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + betaOut = -alpha_OutUp + deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; if (isEC_lastLayer) { - alpha_OutUp_highEdge = lst::deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], - mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); - alpha_OutUp_lowEdge = lst::deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], - mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); + alpha_OutUp_highEdge = deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); + alpha_OutUp_lowEdge = deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_InUp; tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_InUp; tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_InUp; tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_InUp; - betaOutRHmin = -alpha_OutUp_highEdge + lst::deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], - tl_axis_highEdge_x, - tl_axis_highEdge_y); - betaOutRHmax = -alpha_OutUp_lowEdge + lst::deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], - tl_axis_lowEdge_x, - tl_axis_lowEdge_y); + betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); } //beta computation @@ -1378,7 +1371,7 @@ namespace lst { betaOutRHmax *= betaOutMMSF; float min_ptBeta_ptBetaMax = alpaka::math::min( - acc, alpaka::math::abs(acc, pt_beta), lst::kPt_betaMax); //need to confirm the range-out value of 7 GeV + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); const float alphaInAbsReg = alpaka::math::max(acc, @@ -1428,10 +1421,10 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::ObjectRanges const& rangesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t pixelModuleIndex, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -1443,7 +1436,7 @@ namespace lst { unsigned int fourthMDIndex) { float dPhi, betaIn, betaOut, pt_beta, rtLo, rtHi, dPhiCut, betaOutCut; - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS); + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS); float z_InUp = mdsInGPU.anchorZ[secondMDIndex]; float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; @@ -1487,7 +1480,7 @@ namespace lst { const float dzDrtScale = alpaka::math::tan(acc, slope) / slope; //FIXME: need approximate value const float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InUp); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS; + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS; const float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch @@ -1544,7 +1537,7 @@ namespace lst { float diffX = x_OutLo - x_InLo; float diffY = y_OutLo - y_InLo; - dPhi = lst::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); // Cut #5: deltaPhiChange if (alpaka::math::abs(acc, dPhi) > dPhiCut) @@ -1553,12 +1546,12 @@ namespace lst { float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); - bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == lst::Endcap and - modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::TwoS; + bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == ::lst::Endcap and + modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::TwoS; float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; - alpha_OutUp = lst::deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + alpha_OutUp = deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); alpha_OutUp_highEdge = alpha_OutUp; alpha_OutUp_lowEdge = alpha_OutUp; @@ -1571,41 +1564,41 @@ namespace lst { float tl_axis_lowEdge_x = tl_axis_x; float tl_axis_lowEdge_y = tl_axis_y; - betaIn = -lst::deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + betaIn = -deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); float betaInRHmin = betaIn; float betaInRHmax = betaIn; - betaOut = -alpha_OutUp + lst::deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + betaOut = -alpha_OutUp + deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; if (isEC_lastLayer) { - alpha_OutUp_highEdge = lst::deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], - mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); - alpha_OutUp_lowEdge = lst::deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], - mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, - mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); + alpha_OutUp_highEdge = deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); + alpha_OutUp_lowEdge = deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_InUp; tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_InUp; tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_InUp; tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_InUp; - betaOutRHmin = -alpha_OutUp_highEdge + lst::deltaPhi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex], - tl_axis_highEdge_x, - tl_axis_highEdge_y); - betaOutRHmax = -alpha_OutUp_lowEdge + lst::deltaPhi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex], - tl_axis_lowEdge_x, - tl_axis_lowEdge_y); + betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); } //beta computation @@ -1637,7 +1630,7 @@ namespace lst { betaOutRHmax *= betaOutMMSF; float min_ptBeta_ptBetaMax = alpaka::math::min( - acc, alpaka::math::abs(acc, pt_beta), lst::kPt_betaMax); //need to confirm the range-out value of 7 GeV + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); const float alphaInAbsReg = @@ -1690,5 +1683,5 @@ namespace lst { return dBeta * dBeta <= dBetaCut2; } -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 6b602a426a889..4ff67d66d2844 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -14,7 +14,7 @@ #include "ObjectRanges.h" #include "Triplet.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct Quintuplets { unsigned int* tripletIndices; uint16_t* lowerModuleIndices; @@ -149,8 +149,8 @@ namespace lst { return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(lst::Triplets const& tripletsInGPU, - lst::Quintuplets& quintupletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(Triplets const& tripletsInGPU, + Quintuplets& quintupletsInGPU, unsigned int innerTripletIndex, unsigned int outerTripletIndex, uint16_t lowerModule1, @@ -232,7 +232,7 @@ namespace lst { } //90% constraint - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(Modules const& modulesInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -317,8 +317,8 @@ namespace lst { //bounds can be found at http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_RZFix/t5_rz_thresholds.txt template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passT5RZConstraint(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, unsigned int firstMDIndex, unsigned int secondMDIndex, unsigned int thirdMDIndex, @@ -528,7 +528,7 @@ namespace lst { continue; } - // calculation is copied from PixelTriplet.cc lst::computePT3RZChiSquared + // calculation is copied from PixelTriplet.cc computePT3RZChiSquared float diffr = 0, diffz = 0; float rou = a / p; @@ -586,14 +586,14 @@ namespace lst { subdets = modulesInGPU.subdets[lowerModuleIndex3]; } if (i == 2 || i == 3) { - residual = (layeri <= 6 && ((side == lst::Center) or (drdz < 1))) ? diffz : diffr; + residual = (layeri <= 6 && ((side == ::lst::Center) or (drdz < 1))) ? diffz : diffr; float projection_missing2 = 1.f; if (drdz < 1) - projection_missing2 = ((subdets == lst::Endcap) or (side == lst::Center)) + projection_missing2 = ((subdets == ::lst::Endcap) or (side == ::lst::Center)) ? 1.f : 1.f / (1 + drdz * drdz); // cos(atan(drdz)), if dr/dz<1 if (drdz > 1) - projection_missing2 = ((subdets == lst::Endcap) or (side == lst::Center)) + projection_missing2 = ((subdets == ::lst::Endcap) or (side == ::lst::Center)) ? 1.f : (drdz * drdz) / (1 + drdz * drdz); //sin(atan(drdz)), if dr/dz>1 error2 = error2 * projection_missing2; @@ -751,8 +751,8 @@ namespace lst { } template - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(lst::Triplets const& tripletsInGPU, - lst::Segments const& segmentsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(Triplets const& tripletsInGPU, + Segments const& segmentsInGPU, unsigned int innerTripletIndex, unsigned int outerTripletIndex) { unsigned int innerOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1]; @@ -778,7 +778,7 @@ namespace lst { //brute force float candidateRadius; float g, f; - minimumRadius = lst::lst_INF; + minimumRadius = lst_INF; maximumRadius = 0.f; for (size_t i = 0; i < 3; i++) { float x1 = x1Vec[i]; @@ -1012,7 +1012,7 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression(TAcc const& acc, - lst::Modules const& modulesInGPU, + Modules const& modulesInGPU, const uint16_t* lowerModuleIndices, float* delta1, float* delta2, @@ -1029,7 +1029,7 @@ namespace lst { modules. */ - ModuleType moduleType; + ::lst::ModuleType moduleType; short moduleSubdet, moduleSide; float inv1 = kWidthPS / kWidth2S; float inv2 = kPixelPSZpitch / kWidth2S; @@ -1041,21 +1041,21 @@ namespace lst { const float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; //category 1 - barrel PS flat - if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide == ::lst::Center) { delta1[i] = inv1; delta2[i] = inv1; slopes[i] = -999.f; isFlat[i] = true; } //category 2 - barrel 2S - else if (moduleSubdet == Barrel and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::TwoS) { delta1[i] = 1.f; delta2[i] = 1.f; slopes[i] = -999.f; isFlat[i] = true; } //category 3 - barrel PS tilted - else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + else if (moduleSubdet == ::lst::Barrel and moduleType == ::lst::PS and moduleSide != ::lst::Center) { delta1[i] = inv1; isFlat[i] = false; @@ -1066,7 +1066,7 @@ namespace lst { } } //category 4 - endcap PS - else if (moduleSubdet == Endcap and moduleType == PS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::PS) { delta1[i] = inv1; isFlat[i] = false; @@ -1082,7 +1082,7 @@ namespace lst { } } //category 5 - endcap 2S - else if (moduleSubdet == Endcap and moduleType == TwoS) { + else if (moduleSubdet == ::lst::Endcap and moduleType == ::lst::TwoS) { delta1[i] = 1.f; delta2[i] = 500.f * inv1; isFlat[i] = false; @@ -1130,8 +1130,8 @@ namespace lst { // Computing sigmas is a very tricky affair // if the module is tilted or endcap, we need to use the slopes properly! - absArctanSlope = ((slopes[i] != lst::lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) - : 0.5f * float(M_PI)); + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); if (xs[i] > 0 and ys[i] > 0) { angleM = 0.5f * float(M_PI) - absArctanSlope; @@ -1213,8 +1213,8 @@ namespace lst { float chiSquared = 0.f; float absArctanSlope, angleM, xPrime, yPrime, sigma2; for (size_t i = 0; i < nPoints; i++) { - absArctanSlope = ((slopes[i] != lst::lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) - : 0.5f * float(M_PI)); + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); if (xs[i] > 0 and ys[i] > 0) { angleM = 0.5f * float(M_PI) - absArctanSlope; } else if (xs[i] < 0 and ys[i] > 0) { @@ -1255,33 +1255,30 @@ namespace lst { betaOut += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaOut); return; } if (betaIn * betaOut > 0.f and - (alpaka::math::abs(acc, pt_beta) < 4.f * lst::kPt_betaMax or + (alpaka::math::abs(acc, pt_beta) < 4.f * kPt_betaMax or (lIn >= 11 and alpaka::math::abs(acc, pt_beta) < - 8.f * lst::kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap + 8.f * kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap { const float betaInUpd = - betaIn + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), - betaIn); //FIXME: need a faster version + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version const float betaOutUpd = - betaOut + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), - betaOut); //FIXME: need a faster version + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); //FIXME: need a faster version betaAv = 0.5f * (betaInUpd + betaOutUpd); //1st update @@ -1290,68 +1287,64 @@ namespace lst { betaIn += alpaka::math::copysign( acc, - alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * lst::k2Rinv1GeVf * pt_beta_inv, lst::kSinAlphaMax)), + alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), betaIn); //FIXME: need a faster version betaOut += alpaka::math::copysign( acc, - alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf * pt_beta_inv, lst::kSinAlphaMax)), + alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), betaOut); //FIXME: need a faster version //update the av and pt betaAv = 0.5f * (betaIn + betaOut); //2nd update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } else if (lIn < 11 && alpaka::math::abs(acc, betaOut) < 0.2f * alpaka::math::abs(acc, betaIn) && - alpaka::math::abs(acc, pt_beta) < 12.f * lst::kPt_betaMax) //use betaIn sign as ref + alpaka::math::abs(acc, pt_beta) < 12.f * kPt_betaMax) //use betaIn sign as ref { const float pt_betaIn = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaIn); const float betaInUpd = - betaIn + alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, - alpaka::math::min( - acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), lst::kSinAlphaMax)), - betaIn); //FIXME: need a faster version + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version const float betaOutUpd = betaOut + alpaka::math::copysign( acc, alpaka::math::asin( acc, - alpaka::math::min( - acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), lst::kSinAlphaMax)), + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), betaIn); //FIXME: need a faster version betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) ? (0.5f * (betaInUpd + betaOutUpd)) : betaInUpd; //1st update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate betaIn += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdIn_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaIn); //FIXME: need a faster version betaOut += alpaka::math::copysign( acc, alpaka::math::asin( - acc, - alpaka::math::min(acc, sdOut_dr * lst::k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), lst::kSinAlphaMax)), + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), betaIn); //FIXME: need a faster version //update the av and pt betaAv = 0.5f * (betaIn + betaOut); //2nd update - pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate } } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBBB(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1362,8 +1355,8 @@ namespace lst { unsigned int secondMDIndex, unsigned int thirdMDIndex, unsigned int fourthMDIndex) { - bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS); - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS); + bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS); + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS); float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; @@ -1374,17 +1367,17 @@ namespace lst { float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; float alpha1GeV_OutLo = - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float rtRatio_OutLoInLo = rt_OutLo / rt_InLo; // Outer segment beginning rt divided by inner segment beginning rt; float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly - float zpitch_InLo = (isPS_InLo ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); - float zpitch_OutLo = (isPS_OutLo ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); + float zpitch_InLo = (isPS_InLo ? kPixelPSZpitch : kStrip2SZpitch); + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); - float zHi = z_InLo + (z_InLo + lst::kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo < 0.f ? 1.f : dzDrtScale) + + float zHi = z_InLo + (z_InLo + kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo < 0.f ? 1.f : dzDrtScale) + (zpitch_InLo + zpitch_OutLo); - float zLo = z_InLo + (z_InLo - lst::kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - + float zLo = z_InLo + (z_InLo - kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - (zpitch_InLo + zpitch_OutLo); //Cut 1 - z compatibility @@ -1402,7 +1395,7 @@ namespace lst { float dzErr = (zpitch_InLo + zpitch_OutLo) * (zpitch_InLo + zpitch_OutLo) * 2.f; float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * (r3_InLo / rt_InLo); - float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; dzErr += muls2 * drt_OutLo_InLo * drt_OutLo_InLo / 3.f * coshEta * coshEta; dzErr = alpaka::math::sqrt(acc, dzErr); @@ -1410,7 +1403,7 @@ namespace lst { const float dzMean = dz_InSeg / drt_InSeg * drt_OutLo_InLo; const float zWindow = dzErr / drt_InSeg * drt_OutLo_InLo + - (zpitch_InLo + zpitch_OutLo); //FIXME for lst::ptCut lower than ~0.8 need to add curv path correction + (zpitch_InLo + zpitch_OutLo); //FIXME for ptCut lower than ~0.8 need to add curv path correction float zLoPointed = z_InLo + dzMean * (z_InLo > 0.f ? 1.f : dzDrtScale) - zWindow; float zHiPointed = z_InLo + dzMean * (z_InLo < 0.f ? 1.f : dzDrtScale) + zWindow; @@ -1421,7 +1414,7 @@ namespace lst { float pvOffset = 0.1f / rt_OutLo; float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); - float deltaPhiPos = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); // Cut #3: FIXME:deltaPhiPos can be tighter if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) return false; @@ -1431,7 +1424,7 @@ namespace lst { float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - float dPhi = lst::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); // Cut #4: deltaPhiChange if (alpaka::math::abs(acc, dPhi) > dPhiCut) @@ -1442,16 +1435,16 @@ namespace lst { float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); - bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == lst::Endcap and - modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::TwoS; + bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == ::lst::Endcap and + modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::TwoS; float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; - alpha_OutUp = lst::phi_mpi_pi(acc, - lst::phi(acc, - mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorPhi[fourthMDIndex]); + alpha_OutUp = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorPhi[fourthMDIndex]); alpha_OutUp_highEdge = alpha_OutUp; alpha_OutUp_lowEdge = alpha_OutUp; @@ -1463,42 +1456,38 @@ namespace lst { float tl_axis_lowEdge_x = tl_axis_x; float tl_axis_lowEdge_y = tl_axis_y; - float betaIn = - alpha_InLo - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + float betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); float betaInRHmin = betaIn; float betaInRHmax = betaIn; - float betaOut = - -alpha_OutUp + lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + float betaOut = -alpha_OutUp + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; if (isEC_lastLayer) { - alpha_OutUp_highEdge = - lst::phi_mpi_pi(acc, - lst::phi(acc, - mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); - alpha_OutUp_lowEdge = - lst::phi_mpi_pi(acc, - lst::phi(acc, - mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); + alpha_OutUp_highEdge = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); + alpha_OutUp_lowEdge = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - betaOutRHmin = -alpha_OutUp_highEdge + lst::phi_mpi_pi(acc, - lst::phi(acc, tl_axis_highEdge_x, tl_axis_highEdge_y) - - mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); - betaOutRHmax = -alpha_OutUp_lowEdge + lst::phi_mpi_pi(acc, - lst::phi(acc, tl_axis_lowEdge_x, tl_axis_lowEdge_y) - - mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); + betaOutRHmin = + -alpha_OutUp_highEdge + + phi_mpi_pi(acc, phi(acc, tl_axis_highEdge_x, tl_axis_highEdge_y) - mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); + betaOutRHmax = + -alpha_OutUp_lowEdge + + phi_mpi_pi(acc, phi(acc, tl_axis_lowEdge_x, tl_axis_lowEdge_y) - mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); } //beta computation @@ -1514,9 +1503,7 @@ namespace lst { (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); float betaInCut = alpaka::math::asin( - acc, - alpaka::math::min( - acc, (-rt_InSeg * corrF + drt_tl_axis) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + acc, alpaka::math::min(acc, (-rt_InSeg * corrF + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / drt_InSeg); //Cut #5: first beta cut @@ -1524,7 +1511,7 @@ namespace lst { return false; float betaAv = 0.5f * (betaIn + betaOut); - float pt_beta = drt_tl_axis * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + float pt_beta = drt_tl_axis * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); int lIn = 5; int lOut = isEC_lastLayer ? 11 : 5; float sdOut_dr = alpaka::math::sqrt(acc, @@ -1534,7 +1521,7 @@ namespace lst { (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; - lst::runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) @@ -1548,19 +1535,19 @@ namespace lst { betaOutRHmax *= betaOutMMSF; float min_ptBeta_maxPtBeta = alpaka::math::min( - acc, alpaka::math::abs(acc, pt_beta), lst::kPt_betaMax); //need to confimm the range-out value of 7 GeV + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confimm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); - const float alphaInAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, alpha_InLo), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float alphaOutAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, alpha_OutLo), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * lst::kDeltaZLum / z_InLo); - const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * lst::kDeltaZLum / z_OutLo); + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_InLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_OutLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); const float sinDPhi = alpaka::math::sin(acc, dPhi); @@ -1580,8 +1567,7 @@ namespace lst { const float dBetaROut2 = dBetaROut * dBetaROut; float betaOutCut = - alpaka::math::asin(acc, - alpaka::math::min(acc, drt_tl_axis * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); //Cut #6: The real beta cut @@ -1601,9 +1587,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBEE(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1614,8 +1600,8 @@ namespace lst { unsigned int secondMDIndex, unsigned int thirdMDIndex, unsigned int fourthMDIndex) { - bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS); - bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS); + bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS); + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS); float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; @@ -1626,21 +1612,21 @@ namespace lst { float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; float alpha1GeV_OutLo = - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly - float zpitch_InLo = (isPS_InLo ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); - float zpitch_OutLo = (isPS_OutLo ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); + float zpitch_InLo = (isPS_InLo ? kPixelPSZpitch : kStrip2SZpitch); + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); float zGeom = zpitch_InLo + zpitch_OutLo; // Cut #0: Preliminary (Only here in endcap case) if (z_InLo * z_OutLo <= 0) return false; - float dLum = alpaka::math::copysign(acc, lst::kDeltaZLum, z_InLo); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS; - float rtGeom1 = isOutSgInnerMDPS ? lst::kPixelPSZpitch : lst::kStrip2SZpitch; + float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS; + float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; float zGeom1 = alpaka::math::copysign(acc, zGeom, z_InLo); float rtLo = rt_InLo * (1.f + (z_OutLo - z_InLo - zGeom1) / (z_InLo + zGeom1 + dLum) / dzDrtScale) - rtGeom1; //slope correction only on the lower end @@ -1669,12 +1655,12 @@ namespace lst { const float coshEta = dr3SDIn / drtSDIn; //direction estimate const float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InLo); const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); - const float zGeom1_another = lst::kPixelPSZpitch; + const float zGeom1_another = kPixelPSZpitch; float kZ = (z_OutLo - z_InLo) / dzSDIn; float drtErr = zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ); const float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * (rIn / rt_InLo); - const float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; drtErr += muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta; drtErr = alpaka::math::sqrt(acc, drtErr); @@ -1685,7 +1671,7 @@ namespace lst { const float pvOffset = 0.1f / rt_OutLo; float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); - float deltaPhiPos = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); //Cut #4: deltaPhiPos can be tighter if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) @@ -1696,7 +1682,7 @@ namespace lst { float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - float dPhi = lst::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); // Cut #5: deltaPhiChange if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; @@ -1706,33 +1692,32 @@ namespace lst { float sdIn_alpha_max = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); float sdOut_alpha = sdIn_alpha; - float sdOut_alphaOut = lst::phi_mpi_pi(acc, - lst::phi(acc, - mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], - mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - - mdsInGPU.anchorPhi[fourthMDIndex]); + float sdOut_alphaOut = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorPhi[fourthMDIndex]); - float sdOut_alphaOut_min = lst::phi_mpi_pi( + float sdOut_alphaOut_min = phi_mpi_pi( acc, __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMins[outerSegmentIndex])); - float sdOut_alphaOut_max = lst::phi_mpi_pi( + float sdOut_alphaOut_max = phi_mpi_pi( acc, __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMaxs[outerSegmentIndex])); float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - float betaIn = - sdIn_alpha - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); float betaInRHmin = betaIn; float betaInRHmax = betaIn; float betaOut = - -sdOut_alphaOut + lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; - bool isEC_secondLayer = (modulesInGPU.subdets[innerOuterLowerModuleIndex] == lst::Endcap) and - (modulesInGPU.moduleType[innerOuterLowerModuleIndex] == lst::TwoS); + bool isEC_secondLayer = (modulesInGPU.subdets[innerOuterLowerModuleIndex] == ::lst::Endcap) and + (modulesInGPU.moduleType[innerOuterLowerModuleIndex] == ::lst::TwoS); if (isEC_secondLayer) { betaInRHmin = betaIn - sdIn_alpha_min + sdIn_alpha; @@ -1765,8 +1750,7 @@ namespace lst { float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); const float corrF = 1.f; float betaInCut = - alpaka::math::asin( - acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / sdIn_d); //Cut #6: first beta cut @@ -1774,7 +1758,7 @@ namespace lst { return false; float betaAv = 0.5f * (betaIn + betaOut); - float pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + float pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); float lIn = 5; float lOut = 11; @@ -1786,7 +1770,7 @@ namespace lst { (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; - lst::runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) @@ -1800,25 +1784,25 @@ namespace lst { betaOutRHmax *= betaOutMMSF; float min_ptBeta_maxPtBeta = alpaka::math::min( - acc, alpaka::math::abs(acc, pt_beta), lst::kPt_betaMax); //need to confirm the range-out value of 7 GeV + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); - const float alphaInAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, sdIn_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float alphaOutAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, sdOut_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * lst::kDeltaZLum / z_InLo); - const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * lst::kDeltaZLum / z_OutLo); + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdIn_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdOut_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); const float sinDPhi = alpaka::math::sin(acc, dPhi); const float dBetaRIn2 = 0; // TODO-RH float dBetaROut = 0; - if (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::TwoS) { + if (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::TwoS) { dBetaROut = (alpaka::math::sqrt(acc, mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + @@ -1830,9 +1814,8 @@ namespace lst { } const float dBetaROut2 = dBetaROut * dBetaROut; - float betaOutCut = - alpaka::math::asin(acc, alpaka::math::min(acc, dr * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + - (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + float betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); //Cut #6: The real beta cut if (alpaka::math::abs(acc, betaOut) >= betaOutCut) @@ -1851,9 +1834,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoEEEE(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -1873,7 +1856,7 @@ namespace lst { float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; float alpha1GeV_OutLo = - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly @@ -1882,13 +1865,13 @@ namespace lst { if ((z_InLo * z_OutLo) <= 0) return false; - float dLum = alpaka::math::copysign(acc, lst::kDeltaZLum, z_InLo); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == lst::PS; - bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS; + float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == ::lst::PS; + bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS; - float rtGeom = (isInSgInnerMDPS and isOutSgInnerMDPS) ? 2.f * lst::kPixelPSZpitch - : (isInSgInnerMDPS or isOutSgInnerMDPS) ? lst::kPixelPSZpitch + lst::kStrip2SZpitch - : 2.f * lst::kStrip2SZpitch; + float rtGeom = (isInSgInnerMDPS and isOutSgInnerMDPS) ? 2.f * kPixelPSZpitch + : (isInSgInnerMDPS or isOutSgInnerMDPS) ? kPixelPSZpitch + kStrip2SZpitch + : 2.f * kStrip2SZpitch; float dz = z_OutLo - z_InLo; float rtLo = rt_InLo * (1.f + dz / (z_InLo + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end @@ -1902,7 +1885,7 @@ namespace lst { if ((rtOut < rtLo) || (rtOut > rtHi)) return false; - bool isInSgOuterMDPS = modulesInGPU.moduleType[innerOuterLowerModuleIndex] == lst::PS; + bool isInSgOuterMDPS = modulesInGPU.moduleType[innerOuterLowerModuleIndex] == ::lst::PS; const float drtSDIn = rt_InOut - rt_InLo; const float dzSDIn = z_InOut - z_InLo; @@ -1915,12 +1898,12 @@ namespace lst { float kZ = (z_OutLo - z_InLo) / dzSDIn; float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f); - float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; - float drtErr = alpaka::math::sqrt( - acc, - lst::kPixelPSZpitch * lst::kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + - muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); + float drtErr = + alpaka::math::sqrt(acc, + kPixelPSZpitch * kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + + muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); float drtMean = drtSDIn * dzOutInAbs / alpaka::math::abs(acc, dzSDIn); float rtWindow = drtErr + rtGeom; @@ -1939,7 +1922,7 @@ namespace lst { float pvOffset = 0.1f / rtOut; float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); - float deltaPhiPos = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) return false; @@ -1949,7 +1932,7 @@ namespace lst { float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - float dPhi = lst::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); // Cut #5: deltaPhiChange if (alpaka::math::abs(acc, dPhi) > dPhiCut) @@ -1957,21 +1940,20 @@ namespace lst { float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); float sdOut_alpha = sdIn_alpha; //weird - float sdOut_dPhiPos = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[thirdMDIndex]); + float sdOut_dPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[thirdMDIndex]); float sdOut_dPhiChange = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); float sdOut_dPhiChange_min = __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]); float sdOut_dPhiChange_max = __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]); - float sdOut_alphaOutRHmin = lst::phi_mpi_pi(acc, sdOut_dPhiChange_min - sdOut_dPhiPos); - float sdOut_alphaOutRHmax = lst::phi_mpi_pi(acc, sdOut_dPhiChange_max - sdOut_dPhiPos); - float sdOut_alphaOut = lst::phi_mpi_pi(acc, sdOut_dPhiChange - sdOut_dPhiPos); + float sdOut_alphaOutRHmin = phi_mpi_pi(acc, sdOut_dPhiChange_min - sdOut_dPhiPos); + float sdOut_alphaOutRHmax = phi_mpi_pi(acc, sdOut_dPhiChange_max - sdOut_dPhiPos); + float sdOut_alphaOut = phi_mpi_pi(acc, sdOut_dPhiChange - sdOut_dPhiPos); float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - float betaIn = - sdIn_alpha - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); @@ -1979,7 +1961,7 @@ namespace lst { float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; float betaOut = - -sdOut_alphaOut + lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); float betaOutRHmin = betaOut - sdOut_alphaOutRHmin + sdOut_alphaOut; float betaOutRHmax = betaOut - sdOut_alphaOutRHmax + sdOut_alphaOut; @@ -2006,8 +1988,7 @@ namespace lst { float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); const float corrF = 1.f; float betaInCut = - alpaka::math::asin( - acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / sdIn_d); //Cut #6: first beta cut @@ -2015,7 +1996,7 @@ namespace lst { return false; float betaAv = 0.5f * (betaIn + betaOut); - float pt_beta = dr * lst::k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + float pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); int lIn = 11; //endcap int lOut = 13; //endcap @@ -2027,7 +2008,7 @@ namespace lst { (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; - lst::runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) @@ -2041,27 +2022,26 @@ namespace lst { betaOutRHmax *= betaOutMMSF; float min_ptBeta_maxPtBeta = alpaka::math::min( - acc, alpaka::math::abs(acc, pt_beta), lst::kPt_betaMax); //need to confirm the range-out value of 7 GeV + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); - const float alphaInAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, sdIn_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float alphaOutAbsReg = alpaka::math::max( - acc, - alpaka::math::abs(acc, sdOut_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * lst::k2Rinv1GeVf / 3.0f, lst::kSinAlphaMax))); - const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * lst::kDeltaZLum / z_InLo); - const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * lst::kDeltaZLum / z_OutLo); + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdIn_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdOut_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); const float dBetaRIn2 = 0; // TODO-RH float dBetaROut2 = 0; //TODO-RH - float betaOutCut = - alpaka::math::asin(acc, alpaka::math::min(acc, dr * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + - (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + float betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); //Cut #6: The real beta cut if (alpaka::math::abs(acc, betaOut) >= betaOutCut) @@ -2080,9 +2060,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletAlgoSelector(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t innerOuterLowerModuleIndex, uint16_t outerInnerLowerModuleIndex, @@ -2098,8 +2078,8 @@ namespace lst { short outerInnerLowerModuleSubdet = modulesInGPU.subdets[outerInnerLowerModuleIndex]; short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex]; - if (innerInnerLowerModuleSubdet == lst::Barrel and innerOuterLowerModuleSubdet == lst::Barrel and - outerInnerLowerModuleSubdet == lst::Barrel and outerOuterLowerModuleSubdet == lst::Barrel) { + if (innerInnerLowerModuleSubdet == ::lst::Barrel and innerOuterLowerModuleSubdet == ::lst::Barrel and + outerInnerLowerModuleSubdet == ::lst::Barrel and outerOuterLowerModuleSubdet == ::lst::Barrel) { return runQuintupletDefaultAlgoBBBB(acc, modulesInGPU, mdsInGPU, @@ -2114,8 +2094,8 @@ namespace lst { secondMDIndex, thirdMDIndex, fourthMDIndex); - } else if (innerInnerLowerModuleSubdet == lst::Barrel and innerOuterLowerModuleSubdet == lst::Barrel and - outerInnerLowerModuleSubdet == lst::Endcap and outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Barrel and innerOuterLowerModuleSubdet == ::lst::Barrel and + outerInnerLowerModuleSubdet == ::lst::Endcap and outerOuterLowerModuleSubdet == ::lst::Endcap) { return runQuintupletDefaultAlgoBBEE(acc, modulesInGPU, mdsInGPU, @@ -2130,8 +2110,8 @@ namespace lst { secondMDIndex, thirdMDIndex, fourthMDIndex); - } else if (innerInnerLowerModuleSubdet == lst::Barrel and innerOuterLowerModuleSubdet == lst::Barrel and - outerInnerLowerModuleSubdet == lst::Barrel and outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Barrel and innerOuterLowerModuleSubdet == ::lst::Barrel and + outerInnerLowerModuleSubdet == ::lst::Barrel and outerOuterLowerModuleSubdet == ::lst::Endcap) { return runQuintupletDefaultAlgoBBBB(acc, modulesInGPU, mdsInGPU, @@ -2146,8 +2126,8 @@ namespace lst { secondMDIndex, thirdMDIndex, fourthMDIndex); - } else if (innerInnerLowerModuleSubdet == lst::Barrel and innerOuterLowerModuleSubdet == lst::Endcap and - outerInnerLowerModuleSubdet == lst::Endcap and outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Barrel and innerOuterLowerModuleSubdet == ::lst::Endcap and + outerInnerLowerModuleSubdet == ::lst::Endcap and outerOuterLowerModuleSubdet == ::lst::Endcap) { return runQuintupletDefaultAlgoBBEE(acc, modulesInGPU, mdsInGPU, @@ -2162,8 +2142,8 @@ namespace lst { secondMDIndex, thirdMDIndex, fourthMDIndex); - } else if (innerInnerLowerModuleSubdet == lst::Endcap and innerOuterLowerModuleSubdet == lst::Endcap and - outerInnerLowerModuleSubdet == lst::Endcap and outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Endcap and innerOuterLowerModuleSubdet == ::lst::Endcap and + outerInnerLowerModuleSubdet == ::lst::Endcap and outerOuterLowerModuleSubdet == ::lst::Endcap) { return runQuintupletDefaultAlgoEEEE(acc, modulesInGPU, mdsInGPU, @@ -2185,10 +2165,10 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgo(TAcc const& acc, - lst::Modules& modulesInGPU, - lst::MiniDoublets& mdsInGPU, - lst::Segments& segmentsInGPU, - lst::Triplets& tripletsInGPU, + Modules& modulesInGPU, + MiniDoublets& mdsInGPU, + Segments& segmentsInGPU, + Triplets& tripletsInGPU, uint16_t lowerModuleIndex1, uint16_t lowerModuleIndex2, uint16_t lowerModuleIndex3, @@ -2278,24 +2258,24 @@ namespace lst { float x3Vec[] = {x3, x3, x3}; float y3Vec[] = {y3, y3, y3}; - if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex1] == lst::TwoS) { + if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex1] == ::lst::TwoS) { x1Vec[1] = mdsInGPU.anchorLowEdgeX[firstMDIndex]; x1Vec[2] = mdsInGPU.anchorHighEdgeX[firstMDIndex]; y1Vec[1] = mdsInGPU.anchorLowEdgeY[firstMDIndex]; y1Vec[2] = mdsInGPU.anchorHighEdgeY[firstMDIndex]; } - if (modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex2] == lst::TwoS) { + if (modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex2] == ::lst::TwoS) { x2Vec[1] = mdsInGPU.anchorLowEdgeX[secondMDIndex]; x2Vec[2] = mdsInGPU.anchorHighEdgeX[secondMDIndex]; y2Vec[1] = mdsInGPU.anchorLowEdgeY[secondMDIndex]; y2Vec[2] = mdsInGPU.anchorHighEdgeY[secondMDIndex]; } - if (modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex3] == lst::TwoS) { + if (modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex3] == ::lst::TwoS) { x3Vec[1] = mdsInGPU.anchorLowEdgeX[thirdMDIndex]; x3Vec[2] = mdsInGPU.anchorHighEdgeX[thirdMDIndex]; @@ -2310,8 +2290,8 @@ namespace lst { x1Vec[i] = x4; y1Vec[i] = y4; } - if (modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex4] == lst::TwoS) { + if (modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex4] == ::lst::TwoS) { x1Vec[1] = mdsInGPU.anchorLowEdgeX[fourthMDIndex]; x1Vec[2] = mdsInGPU.anchorHighEdgeX[fourthMDIndex]; @@ -2326,8 +2306,8 @@ namespace lst { x2Vec[i] = x5; y2Vec[i] = y5; } - if (modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap and - modulesInGPU.moduleType[lowerModuleIndex5] == lst::TwoS) { + if (modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap and + modulesInGPU.moduleType[lowerModuleIndex5] == ::lst::TwoS) { x2Vec[1] = mdsInGPU.anchorLowEdgeX[fifthMDIndex]; x2Vec[2] = mdsInGPU.anchorHighEdgeX[fifthMDIndex]; @@ -2376,23 +2356,23 @@ namespace lst { //split by category bool matchedRadii; - if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex4] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex5] == lst::Barrel) { + if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Barrel) { matchedRadii = matchRadiiBBBBB(acc, innerRadius, bridgeRadius, outerRadius); - } else if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex4] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) { + } else if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) { matchedRadii = matchRadiiBBBBE(acc, innerRadius, bridgeRadius, outerRadius); - } else if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) { + } else if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) { if (modulesInGPU.layers[lowerModuleIndex1] == 1) { matchedRadii = matchRadiiBBBEE12378(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); @@ -2405,17 +2385,17 @@ namespace lst { } } - else if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) { + else if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) { matchedRadii = matchRadiiBBEEE(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); - } else if (modulesInGPU.subdets[lowerModuleIndex1] == lst::Barrel and - modulesInGPU.subdets[lowerModuleIndex2] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex3] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex4] == lst::Endcap and - modulesInGPU.subdets[lowerModuleIndex5] == lst::Endcap) { + } else if (modulesInGPU.subdets[lowerModuleIndex1] == ::lst::Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex3] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex4] == ::lst::Endcap and + modulesInGPU.subdets[lowerModuleIndex5] == ::lst::Endcap) { matchedRadii = matchRadiiBEEEE(acc, innerRadius, bridgeRadius, @@ -2464,22 +2444,22 @@ namespace lst { #ifdef USE_T5_DNN unsigned int mdIndices[] = {firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, fifthMDIndex}; - float inference = lst::t5dnn::runInference(acc, - modulesInGPU, - mdsInGPU, - segmentsInGPU, - tripletsInGPU, - xVec, - yVec, - mdIndices, - lowerModuleIndices, - innerTripletIndex, - outerTripletIndex, - innerRadius, - outerRadius, - bridgeRadius); - TightCutFlag = TightCutFlag and (inference > lst::t5dnn::kLSTWp2); // T5-in-TC cut - if (inference <= lst::t5dnn::kLSTWp2) // T5-building cut + float inference = t5dnn::runInference(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + tripletsInGPU, + xVec, + yVec, + mdIndices, + lowerModuleIndices, + innerTripletIndex, + outerTripletIndex, + innerRadius, + outerRadius, + bridgeRadius); + TightCutFlag = TightCutFlag and (inference > t5dnn::kLSTWp2); // T5-in-TC cut + if (inference <= t5dnn::kLSTWp2) // T5-building cut return false; #endif @@ -2537,12 +2517,12 @@ namespace lst { struct CreateQuintupletsInGPUv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, - lst::Triplets tripletsInGPU, - lst::Quintuplets quintupletsInGPU, - lst::ObjectRanges rangesInGPU, + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + Quintuplets quintupletsInGPU, + ObjectRanges rangesInGPU, uint16_t nEligibleT5Modules) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -2624,7 +2604,7 @@ namespace lst { float eta = mdsInGPU.anchorEta[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + layer2_adjustment]]]; - float pt = (innerRadius + outerRadius) * lst::k2Rinv1GeVf; + float pt = (innerRadius + outerRadius) * k2Rinv1GeVf; float scores = chiSquared + nonAnchorChiSquared; addQuintupletToMemory(tripletsInGPU, quintupletsInGPU, @@ -2666,9 +2646,9 @@ namespace lst { struct CreateEligibleModulesListForQuintupletsGPU { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Triplets tripletsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Triplets tripletsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -2698,9 +2678,9 @@ namespace lst { if (tripletsInGPU.nTriplets[i] == 0) continue; - if (module_subdets == lst::Barrel and module_layers >= 3) + if (module_subdets == ::lst::Barrel and module_layers >= 3) continue; - if (module_subdets == lst::Endcap and module_layers > 1) + if (module_subdets == ::lst::Endcap and module_layers > 1) continue; int nEligibleT5Modules = alpaka::atomicAdd(acc, &nEligibleT5Modulesx, 1, alpaka::hierarchy::Threads{}); @@ -2770,9 +2750,9 @@ namespace lst { struct AddQuintupletRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Quintuplets quintupletsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Quintuplets quintupletsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -2792,5 +2772,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 2c9634fd34373..b74de58f3c233 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -11,7 +11,7 @@ #include "Hit.h" #include "ObjectRanges.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct Segments { FPX* dPhis; FPX* dPhiMins; @@ -176,7 +176,7 @@ namespace lst { inline void setData(SegmentsBuffer& buf) { data_.setData(buf); } }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(lst::Modules const& modulesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(Modules const& modulesInGPU, unsigned int moduleIndex) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing // This is the same as what was previously considered as"isNormalTiltedModules" @@ -186,18 +186,20 @@ namespace lst { short side = modulesInGPU.sides[moduleIndex]; short rod = modulesInGPU.rods[moduleIndex]; - return (subdet == Barrel) && (((side != Center) && (layer == 3)) || - ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || - ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); + return (subdet == ::lst::Barrel) && + (((side != ::lst::Center) && (layer == 3)) || + ((side == ::lst::NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || + ((side == ::lst::PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); } ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(short subdet, short layer, short side, short rod) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing // This is the same as what was previously considered as"isNormalTiltedModules" // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf - return (subdet == Barrel) && (((side != Center) && (layer == 3)) || - ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || - ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); + return (subdet == ::lst::Barrel) && + (((side != ::lst::Center) && (layer == 3)) || + ((side == ::lst::NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || + ((side == ::lst::PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); } ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(short layer, short ring, short subdet, short side, short rod) { @@ -216,11 +218,11 @@ namespace lst { float moduleSeparation = 0; - if (subdet == Barrel and side == Center) { + if (subdet == ::lst::Barrel and side == ::lst::Center) { moduleSeparation = miniDeltaFlat[iL]; } else if (isTighterTiltedModules_seg(subdet, layer, side, rod)) { moduleSeparation = miniDeltaTilted[iL]; - } else if (subdet == Endcap) { + } else if (subdet == ::lst::Endcap) { moduleSeparation = miniDeltaEndcap[iL][iR]; } else //Loose tilted modules { @@ -230,7 +232,7 @@ namespace lst { return moduleSeparation; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(lst::Modules const& modulesInGPU, unsigned int moduleIndex) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(Modules const& modulesInGPU, unsigned int moduleIndex) { static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; static constexpr float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; static constexpr float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; @@ -248,11 +250,11 @@ namespace lst { float moduleSeparation = 0; - if (subdet == Barrel and side == Center) { + if (subdet == ::lst::Barrel and side == ::lst::Center) { moduleSeparation = miniDeltaFlat[iL]; } else if (isTighterTiltedModules_seg(modulesInGPU, moduleIndex)) { moduleSeparation = miniDeltaTilted[iL]; - } else if (subdet == Endcap) { + } else if (subdet == ::lst::Endcap) { moduleSeparation = miniDeltaEndcap[iL][iR]; } else //Loose tilted modules { @@ -265,8 +267,8 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void dAlphaThreshold(TAcc const& acc, float* dAlphaThresholdValues, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, float xIn, float yIn, float zIn, @@ -279,7 +281,7 @@ namespace lst { uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, unsigned int outerMDIndex) { - float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel) + float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel) ? kMiniMulsPtScaleBarrel[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut : kMiniMulsPtScaleEndcap[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut; @@ -289,15 +291,15 @@ namespace lst { const float dAlpha_Bfield = alpaka::math::asin(acc, alpaka::math::min(acc, segmentDr * k2Rinv1GeVf / ptCut, kSinAlphaMax)); - bool isInnerTilted = modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel and - modulesInGPU.sides[innerLowerModuleIndex] != lst::Center; - bool isOuterTilted = modulesInGPU.subdets[outerLowerModuleIndex] == lst::Barrel and - modulesInGPU.sides[outerLowerModuleIndex] != lst::Center; + bool isInnerTilted = modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel and + modulesInGPU.sides[innerLowerModuleIndex] != ::lst::Center; + bool isOuterTilted = modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Barrel and + modulesInGPU.sides[outerLowerModuleIndex] != ::lst::Center; float drdzInner = modulesInGPU.drdzs[innerLowerModuleIndex]; float drdzOuter = modulesInGPU.drdzs[outerLowerModuleIndex]; - float innerModuleGapSize = lst::moduleGapSize_seg(modulesInGPU, innerLowerModuleIndex); - float outerModuleGapSize = lst::moduleGapSize_seg(modulesInGPU, outerLowerModuleIndex); + float innerModuleGapSize = moduleGapSize_seg(modulesInGPU, innerLowerModuleIndex); + float outerModuleGapSize = moduleGapSize_seg(modulesInGPU, outerLowerModuleIndex); const float innerminiTilt2 = isInnerTilted ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzInner * drdzInner) / (1.f + drdzInner * drdzInner) / (innerModuleGapSize * innerModuleGapSize)) @@ -313,14 +315,14 @@ namespace lst { float sdLumForInnerMini2; float sdLumForOuterMini2; - if (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel) { + if (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel) { sdLumForInnerMini2 = innerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); } else { sdLumForInnerMini2 = (mdsInGPU.dphis[innerMDIndex] * mdsInGPU.dphis[innerMDIndex]) * (kDeltaZLum * kDeltaZLum) / (mdsInGPU.dzs[innerMDIndex] * mdsInGPU.dzs[innerMDIndex]); } - if (modulesInGPU.subdets[outerLowerModuleIndex] == lst::Barrel) { + if (modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Barrel) { sdLumForOuterMini2 = outerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); } else { sdLumForOuterMini2 = (mdsInGPU.dphis[outerMDIndex] * mdsInGPU.dphis[outerMDIndex]) * (kDeltaZLum * kDeltaZLum) / @@ -330,23 +332,23 @@ namespace lst { // Unique stuff for the segment dudes alone float dAlpha_res_inner = 0.02f / miniDelta * - (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); + (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); float dAlpha_res_outer = 0.02f / miniDelta * - (modulesInGPU.subdets[outerLowerModuleIndex] == lst::Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); + (modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); float dAlpha_res = dAlpha_res_inner + dAlpha_res_outer; - if (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel and - modulesInGPU.sides[innerLowerModuleIndex] == lst::Center) { + if (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel and + modulesInGPU.sides[innerLowerModuleIndex] == ::lst::Center) { dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } else { dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForInnerMini2); } - if (modulesInGPU.subdets[outerLowerModuleIndex] == lst::Barrel and - modulesInGPU.sides[outerLowerModuleIndex] == lst::Center) { + if (modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Barrel and + modulesInGPU.sides[outerLowerModuleIndex] == ::lst::Center) { dAlphaThresholdValues[1] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } else { dAlphaThresholdValues[1] = @@ -357,7 +359,7 @@ namespace lst { dAlphaThresholdValues[2] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(lst::Segments& segmentsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(Segments& segmentsInGPU, unsigned int lowerMDIndex, unsigned int upperMDIndex, uint16_t innerLowerModuleIndex, @@ -388,8 +390,8 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelSegmentToMemory(TAcc const& acc, - lst::Segments& segmentsInGPU, - lst::MiniDoublets const& mdsInGPU, + Segments& segmentsInGPU, + MiniDoublets const& mdsInGPU, unsigned int innerMDIndex, unsigned int outerMDIndex, uint16_t pixelModuleIndex, @@ -427,7 +429,7 @@ namespace lst { mdsInGPU.anchorY[innerMDIndex] + circleRadius * alpaka::math::cos(acc, circlePhi)}; //check which of the circles can accommodate r3LH better (we won't get perfect agreement) - float bestChiSquared = lst::lst_INF; + float bestChiSquared = lst_INF; float chiSquared; size_t bestIndex; for (size_t i = 0; i < 2; i++) { @@ -451,8 +453,8 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoBarrel(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, uint16_t innerLowerModuleIndex, uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, @@ -463,7 +465,7 @@ namespace lst { float& dPhiChange, float& dPhiChangeMin, float& dPhiChangeMax) { - float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel) + float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel) ? kMiniMulsPtScaleBarrel[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut : kMiniMulsPtScaleEndcap[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut; @@ -494,12 +496,12 @@ namespace lst { float sdCut = sdSlope + alpaka::math::sqrt(acc, sdMuls * sdMuls + sdPVoff * sdPVoff); - dPhi = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + dPhi = phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); if (alpaka::math::abs(acc, dPhi) > sdCut) return false; - dPhiChange = lst::phi_mpi_pi(acc, lst::phi(acc, xOut - xIn, yOut - yIn) - mdsInGPU.anchorPhi[innerMDIndex]); + dPhiChange = phi_mpi_pi(acc, phi(acc, xOut - xIn, yOut - yIn) - mdsInGPU.anchorPhi[innerMDIndex]); if (alpaka::math::abs(acc, dPhiChange) > sdCut) return false; @@ -541,8 +543,8 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoEndcap(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, uint16_t innerLowerModuleIndex, uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, @@ -565,8 +567,8 @@ namespace lst { zOut = mdsInGPU.anchorZ[outerMDIndex]; rtOut = mdsInGPU.anchorRt[outerMDIndex]; - bool outerLayerEndcapTwoS = (modulesInGPU.subdets[outerLowerModuleIndex] == lst::Endcap) && - (modulesInGPU.moduleType[outerLowerModuleIndex] == lst::TwoS); + bool outerLayerEndcapTwoS = (modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Endcap) && + (modulesInGPU.moduleType[outerLowerModuleIndex] == ::lst::TwoS); float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float disks2SMinRadius = 60.f; @@ -594,14 +596,12 @@ namespace lst { if ((rtOut < rtLo) || (rtOut > rtHi)) return false; - dPhi = lst::phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + dPhi = phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); float sdCut = sdSlope; if (outerLayerEndcapTwoS) { - float dPhiPos_high = - lst::phi_mpi_pi(acc, mdsInGPU.anchorHighEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); - float dPhiPos_low = - lst::phi_mpi_pi(acc, mdsInGPU.anchorLowEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + float dPhiPos_high = phi_mpi_pi(acc, mdsInGPU.anchorHighEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + float dPhiPos_low = phi_mpi_pi(acc, mdsInGPU.anchorLowEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); dPhiMax = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_high : dPhiPos_low; dPhiMin = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_low : dPhiPos_high; @@ -657,8 +657,8 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgo(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, uint16_t innerLowerModuleIndex, uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, @@ -669,8 +669,8 @@ namespace lst { float& dPhiChange, float& dPhiChangeMin, float& dPhiChangeMax) { - if (modulesInGPU.subdets[innerLowerModuleIndex] == lst::Barrel and - modulesInGPU.subdets[outerLowerModuleIndex] == lst::Barrel) { + if (modulesInGPU.subdets[innerLowerModuleIndex] == ::lst::Barrel and + modulesInGPU.subdets[outerLowerModuleIndex] == ::lst::Barrel) { return runSegmentDefaultAlgoBarrel(acc, modulesInGPU, mdsInGPU, @@ -704,10 +704,10 @@ namespace lst { struct CreateSegmentsInGPUv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { auto const globalBlockIdx = alpaka::getIdx(acc); auto const blockThreadIdx = alpaka::getIdx(acc); auto const gridBlockExtent = alpaka::getWorkDiv(acc); @@ -798,9 +798,9 @@ namespace lst { struct CreateSegmentArrayRanges { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::MiniDoublets mdsInGPU) const { + Modules modulesInGPU, + ObjectRanges rangesInGPU, + MiniDoublets mdsInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -902,9 +902,9 @@ namespace lst { struct AddSegmentRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Segments segmentsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -927,11 +927,11 @@ namespace lst { struct AddPixelSegmentToEventKernel { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::Hits hitsInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, + Modules modulesInGPU, + ObjectRanges rangesInGPU, + Hits hitsInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, unsigned int* hitIndices0, unsigned int* hitIndices1, unsigned int* hitIndices2, @@ -1009,6 +1009,6 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 2a35542afc5fc..16f36df3257cd 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -12,7 +12,7 @@ #include "Hit.h" #include "ObjectRanges.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct TrackCandidates { short* trackCandidateType; // 4-T5 5-pT3 7-pT5 8-pLS unsigned int* directObjectIndices; // Will hold direct indices to each type containers @@ -108,7 +108,7 @@ namespace lst { inline void setData(TrackCandidatesBuffer& buf) { data_.setData(buf); } }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(lst::TrackCandidates& trackCandidatesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, unsigned int trackletIndex, unsigned int trackCandidateIndex, uint4 hitIndices, @@ -127,7 +127,7 @@ namespace lst { trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 3] = hitIndices.w; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(lst::TrackCandidates& trackCandidatesInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, short trackCandidateType, unsigned int innerTrackletIndex, unsigned int outerTrackletIndex, @@ -166,9 +166,9 @@ namespace lst { ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits(unsigned int ix, unsigned int jx, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Hits const& hitsInGPU) { + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Hits const& hitsInGPU) { int phits1[Params_pLS::kHits]; int phits2[Params_pLS::kHits]; @@ -207,11 +207,11 @@ namespace lst { struct CrossCleanpT3 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::PixelTriplets pixelTripletsInGPU, - lst::Segments segmentsInGPU, - lst::PixelQuintuplets pixelQuintupletsInGPU) const { + Modules modulesInGPU, + ObjectRanges rangesInGPU, + PixelTriplets pixelTripletsInGPU, + Segments segmentsInGPU, + PixelQuintuplets pixelQuintupletsInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -235,7 +235,7 @@ namespace lst { float eta2 = segmentsInGPU.eta[pLS_jx - prefix]; float phi2 = segmentsInGPU.phi[pLS_jx - prefix]; float dEta = alpaka::math::abs(acc, (eta1 - eta2)); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-5f) @@ -248,11 +248,11 @@ namespace lst { struct CrossCleanT5 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Quintuplets quintupletsInGPU, - lst::PixelQuintuplets pixelQuintupletsInGPU, - lst::PixelTriplets pixelTripletsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Quintuplets quintupletsInGPU, + PixelQuintuplets pixelQuintupletsInGPU, + PixelTriplets pixelTripletsInGPU, + ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -288,7 +288,7 @@ namespace lst { } float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-3f) @@ -303,14 +303,14 @@ namespace lst { struct CrossCleanpLS { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::PixelTriplets pixelTripletsInGPU, - lst::TrackCandidates trackCandidatesInGPU, - lst::Segments segmentsInGPU, - lst::MiniDoublets mdsInGPU, - lst::Hits hitsInGPU, - lst::Quintuplets quintupletsInGPU) const { + Modules modulesInGPU, + ObjectRanges rangesInGPU, + PixelTriplets pixelTripletsInGPU, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, + MiniDoublets mdsInGPU, + Hits hitsInGPU, + Quintuplets quintupletsInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -336,7 +336,7 @@ namespace lst { float eta2 = __H2F(quintupletsInGPU.eta[quintupletIndex]); float phi2 = __H2F(quintupletsInGPU.phi[quintupletIndex]); float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 1e-3f) @@ -353,7 +353,7 @@ namespace lst { float eta2 = __H2F(pixelTripletsInGPU.eta_pix[pT3Index]); float phi2 = __H2F(pixelTripletsInGPU.phi_pix[pT3Index]); float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 0.000001f) @@ -370,7 +370,7 @@ namespace lst { float eta2 = segmentsInGPU.eta[pLSIndex - prefix]; float phi2 = segmentsInGPU.phi[pLSIndex - prefix]; float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = lst::calculate_dPhi(phi1, phi2); + float dPhi = calculate_dPhi(phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 0.000001f) @@ -385,10 +385,10 @@ namespace lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - lst::PixelTriplets pixelTripletsInGPU, - lst::TrackCandidates trackCandidatesInGPU, - lst::Segments segmentsInGPU, - lst::ObjectRanges rangesInGPU) const { + PixelTriplets pixelTripletsInGPU, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -441,9 +441,9 @@ namespace lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - lst::Quintuplets quintupletsInGPU, - lst::TrackCandidates trackCandidatesInGPU, - lst::ObjectRanges rangesInGPU) const { + Quintuplets quintupletsInGPU, + TrackCandidates trackCandidatesInGPU, + ObjectRanges rangesInGPU) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -495,8 +495,8 @@ namespace lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - lst::TrackCandidates trackCandidatesInGPU, - lst::Segments segmentsInGPU, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, bool tc_pls_triplets) const { auto const globalThreadIdx = alpaka::getIdx(acc); auto const gridThreadExtent = alpaka::getWorkDiv(acc); @@ -534,10 +534,10 @@ namespace lst { template ALPAKA_FN_ACC void operator()(TAcc const& acc, uint16_t nLowerModules, - lst::PixelQuintuplets pixelQuintupletsInGPU, - lst::TrackCandidates trackCandidatesInGPU, - lst::Segments segmentsInGPU, - lst::ObjectRanges rangesInGPU) const { + PixelQuintuplets pixelQuintupletsInGPU, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -586,5 +586,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index 1f909bdc02d77..c5ac8bda543d8 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -11,7 +11,7 @@ #include "Hit.h" #include "ObjectRanges.h" -namespace lst { +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { struct Triplets { unsigned int* segmentIndices; uint16_t* lowerModuleIndices; //3 of them @@ -136,10 +136,10 @@ namespace lst { }; #ifdef CUT_VALUE_DEBUG - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets& tripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets& tripletsInGPU, unsigned int innerSegmentIndex, unsigned int outerSegmentIndex, uint16_t innerInnerLowerModuleIndex, @@ -154,10 +154,10 @@ namespace lst { float circleCenterY, unsigned int tripletIndex) #else - ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, - lst::Triplets& tripletsInGPU, + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets& tripletsInGPU, unsigned int innerSegmentIndex, unsigned int outerSegmentIndex, uint16_t innerInnerLowerModuleIndex, @@ -206,9 +206,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -266,9 +266,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBB(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -280,8 +280,8 @@ namespace lst { unsigned int innerSegmentIndex, float& betaIn, float& betaInCut) { - bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS); - bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::PS); + bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS); + bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::PS); float rtIn = mdsInGPU.anchorRt[firstMDIndex]; float rtMid = mdsInGPU.anchorRt[secondMDIndex]; @@ -291,17 +291,16 @@ namespace lst { float zMid = mdsInGPU.anchorZ[secondMDIndex]; zOut = mdsInGPU.anchorZ[thirdMDIndex]; - float alpha1GeVOut = - alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + float alpha1GeVOut = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float rtRatio_OutIn = rtOut / rtIn; // Outer segment beginning rt divided by inner segment beginning rt; float dzDrtScale = alpaka::math::tan(acc, alpha1GeVOut) / alpha1GeVOut; // The track can bend in r-z plane slightly - float zpitchIn = (isPSIn ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); - float zpitchOut = (isPSOut ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); + float zpitchIn = (isPSIn ? kPixelPSZpitch : kStrip2SZpitch); + float zpitchOut = (isPSOut ? kPixelPSZpitch : kStrip2SZpitch); const float zHi = - zIn + (zIn + lst::kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + (zpitchIn + zpitchOut); - const float zLo = zIn + (zIn - lst::kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - + zIn + (zIn + kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + (zpitchIn + zpitchOut); + const float zLo = zIn + (zIn - kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - (zpitchIn + zpitchOut); //slope-correction only on outer end //Cut 1 - z compatibility @@ -320,15 +319,14 @@ namespace lst { float dzErr = (zpitchIn + zpitchOut) * (zpitchIn + zpitchOut) * 2.f; float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rtOut - rtIn) / 50.f) * (r3In / rtIn); - float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; dzErr += muls2 * drt_OutIn * drt_OutIn / 3.f * coshEta * coshEta; dzErr = alpaka::math::sqrt(acc, dzErr); // Constructing upper and lower bound const float dzMean = dz_InSeg / drt_InSeg * drt_OutIn; - const float zWindow = - dzErr / drt_InSeg * drt_OutIn + - (zpitchIn + zpitchOut); //FIXME for lst::ptCut lower than ~0.8 need to add curv path correction + const float zWindow = dzErr / drt_InSeg * drt_OutIn + + (zpitchIn + zpitchOut); //FIXME for ptCut lower than ~0.8 need to add curv path correction const float zLoPointed = zIn + dzMean * (zIn > 0.f ? 1.f : dzDrtScale) - zWindow; const float zHiPointed = zIn + dzMean * (zIn < 0.f ? 1.f : dzDrtScale) + zWindow; @@ -342,7 +340,7 @@ namespace lst { float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - betaIn = alpha_InLo - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); //beta computation float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); @@ -355,8 +353,7 @@ namespace lst { (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); betaInCut = - alpaka::math::asin( - acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / drt_InSeg); //Cut #3: first beta cut @@ -365,9 +362,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBE(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -381,8 +378,8 @@ namespace lst { unsigned int outerSegmentIndex, float& betaIn, float& betaInCut) { - bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS); - bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::PS); + bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS); + bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::PS); float rtIn = mdsInGPU.anchorRt[firstMDIndex]; float rtMid = mdsInGPU.anchorRt[secondMDIndex]; @@ -392,22 +389,21 @@ namespace lst { float zMid = mdsInGPU.anchorZ[secondMDIndex]; zOut = mdsInGPU.anchorZ[thirdMDIndex]; - float alpha1GeV_OutLo = - alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly - float zpitchIn = (isPSIn ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); - float zpitchOut = (isPSOut ? lst::kPixelPSZpitch : lst::kStrip2SZpitch); + float zpitchIn = (isPSIn ? kPixelPSZpitch : kStrip2SZpitch); + float zpitchOut = (isPSOut ? kPixelPSZpitch : kStrip2SZpitch); float zGeom = zpitchIn + zpitchOut; // Cut #0: Preliminary (Only here in endcap case) if (zIn * zOut <= 0) return false; - float dLum = alpaka::math::copysign(acc, lst::kDeltaZLum, zIn); - bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::PS; - float rtGeom1 = isOutSgInnerMDPS ? lst::kPixelPSZpitch : lst::kStrip2SZpitch; + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::PS; + float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; float zGeom1 = alpaka::math::copysign(acc, zGeom, zIn); float rtLo = rtIn * (1.f + (zOut - zIn - zGeom1) / (zIn + zGeom1 + dLum) / dzDrtScale) - rtGeom1; //slope correction only on the lower end @@ -433,12 +429,12 @@ namespace lst { const float coshEta = dr3SDIn / drtSDIn; //direction estimate const float dzOutInAbs = alpaka::math::abs(acc, zOut - zIn); const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); - const float zGeom1_another = lst::kPixelPSZpitch; + const float zGeom1_another = kPixelPSZpitch; const float kZ = (zOut - zIn) / dzSDIn; float drtErr = zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ); const float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2 * (rtOut - rtIn) / 50.f) * (rIn / rtIn); - const float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; drtErr += muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta; drtErr = alpaka::math::sqrt(acc, drtErr); @@ -455,7 +451,7 @@ namespace lst { float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - betaIn = sdIn_alpha - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); float betaInRHmin = betaIn; float betaInRHmax = betaIn; @@ -476,8 +472,7 @@ namespace lst { float sdIn_d = rt_InOut - rt_InLo; float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); - betaInCut = alpaka::math::asin( - acc, alpaka::math::min(acc, (-sdIn_dr + dr) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / sdIn_d); //Cut #4: first beta cut @@ -486,9 +481,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintEEE(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -509,8 +504,7 @@ namespace lst { float zMid = mdsInGPU.anchorZ[secondMDIndex]; zOut = mdsInGPU.anchorZ[thirdMDIndex]; - float alpha1GeV_Out = - alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)); + float alpha1GeV_Out = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); float dzDrtScale = alpaka::math::tan(acc, alpha1GeV_Out) / alpha1GeV_Out; // The track can bend in r-z plane slightly @@ -519,13 +513,13 @@ namespace lst { if (zIn * zOut <= 0) return false; - float dLum = alpaka::math::copysign(acc, lst::kDeltaZLum, zIn); - bool isOutSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::PS; - bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == lst::PS; + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + bool isOutSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::PS; + bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == ::lst::PS; - float rtGeom = (isInSgInnerMDPS and isOutSgOuterMDPS) ? 2.f * lst::kPixelPSZpitch - : (isInSgInnerMDPS or isOutSgOuterMDPS) ? lst::kPixelPSZpitch + lst::kStrip2SZpitch - : 2.f * lst::kStrip2SZpitch; + float rtGeom = (isInSgInnerMDPS and isOutSgOuterMDPS) ? 2.f * kPixelPSZpitch + : (isInSgInnerMDPS or isOutSgOuterMDPS) ? kPixelPSZpitch + kStrip2SZpitch + : 2.f * kStrip2SZpitch; float dz = zOut - zIn; const float rtLo = rtIn * (1.f + dz / (zIn + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end @@ -535,7 +529,7 @@ namespace lst { if ((rtOut < rtLo) || (rtOut > rtHi)) return false; - bool isInSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == lst::PS; + bool isInSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == ::lst::PS; float drtSDIn = rtMid - rtIn; float dzSDIn = zMid - zIn; @@ -549,12 +543,12 @@ namespace lst { float kZ = (zOut - zIn) / dzSDIn; float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rtOut - rtIn) / 50.f); - float muls2 = thetaMuls2 * 9.f / (lst::ptCut * lst::ptCut) * 16.f; + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; - float drtErr = alpaka::math::sqrt( - acc, - lst::kPixelPSZpitch * lst::kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + - muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); + float drtErr = + alpaka::math::sqrt(acc, + kPixelPSZpitch * kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + + muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); float drtMean = drtSDIn * dzOutInAbs / alpaka::math::abs(acc, dzSDIn); float rtWindow = drtErr + rtGeom; @@ -577,7 +571,7 @@ namespace lst { float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; - betaIn = sdIn_alpha - lst::phi_mpi_pi(acc, lst::phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); @@ -599,8 +593,7 @@ namespace lst { float sdIn_d = rt_InOut - rt_InLo; float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); - betaInCut = alpaka::math::asin( - acc, alpaka::math::min(acc, (-sdIn_dr + dr) * lst::k2Rinv1GeVf / lst::ptCut, lst::kSinAlphaMax)) + + betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / sdIn_d); //Cut #4: first beta cut @@ -609,9 +602,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -629,8 +622,8 @@ namespace lst { short middleLowerModuleSubdet = modulesInGPU.subdets[middleLowerModuleIndex]; short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex]; - if (innerInnerLowerModuleSubdet == lst::Barrel and middleLowerModuleSubdet == lst::Barrel and - outerOuterLowerModuleSubdet == lst::Barrel) { + if (innerInnerLowerModuleSubdet == ::lst::Barrel and middleLowerModuleSubdet == ::lst::Barrel and + outerOuterLowerModuleSubdet == ::lst::Barrel) { return passPointingConstraintBBB(acc, modulesInGPU, mdsInGPU, @@ -646,8 +639,8 @@ namespace lst { innerSegmentIndex, betaIn, betaInCut); - } else if (innerInnerLowerModuleSubdet == lst::Barrel and middleLowerModuleSubdet == lst::Barrel and - outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Barrel and middleLowerModuleSubdet == ::lst::Barrel and + outerOuterLowerModuleSubdet == ::lst::Endcap) { return passPointingConstraintBBE(acc, modulesInGPU, mdsInGPU, @@ -665,8 +658,8 @@ namespace lst { outerSegmentIndex, betaIn, betaInCut); - } else if (innerInnerLowerModuleSubdet == lst::Barrel and middleLowerModuleSubdet == lst::Endcap and - outerOuterLowerModuleSubdet == lst::Endcap) { + } else if (innerInnerLowerModuleSubdet == ::lst::Barrel and middleLowerModuleSubdet == ::lst::Endcap and + outerOuterLowerModuleSubdet == ::lst::Endcap) { return passPointingConstraintBBE(acc, modulesInGPU, mdsInGPU, @@ -687,8 +680,8 @@ namespace lst { } - else if (innerInnerLowerModuleSubdet == lst::Endcap and middleLowerModuleSubdet == lst::Endcap and - outerOuterLowerModuleSubdet == lst::Endcap) { + else if (innerInnerLowerModuleSubdet == ::lst::Endcap and middleLowerModuleSubdet == ::lst::Endcap and + outerOuterLowerModuleSubdet == ::lst::Endcap) { return passPointingConstraintEEE(acc, modulesInGPU, mdsInGPU, @@ -744,9 +737,9 @@ namespace lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletConstraintsAndAlgo(TAcc const& acc, - lst::Modules const& modulesInGPU, - lst::MiniDoublets const& mdsInGPU, - lst::Segments const& segmentsInGPU, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, @@ -811,11 +804,11 @@ namespace lst { struct CreateTripletsInGPUv2 { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::MiniDoublets mdsInGPU, - lst::Segments segmentsInGPU, - lst::Triplets tripletsInGPU, - lst::ObjectRanges rangesInGPU, + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + ObjectRanges rangesInGPU, uint16_t* index_gpu, uint16_t nonZeroModules) const { auto const globalThreadIdx = alpaka::getIdx(acc); @@ -928,9 +921,9 @@ namespace lst { struct CreateTripletArrayRanges { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::ObjectRanges rangesInGPU, - lst::Segments segmentsInGPU) const { + Modules modulesInGPU, + ObjectRanges rangesInGPU, + Segments segmentsInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -1031,9 +1024,9 @@ namespace lst { struct AddTripletRangesToEventExplicit { template ALPAKA_FN_ACC void operator()(TAcc const& acc, - lst::Modules modulesInGPU, - lst::Triplets tripletsInGPU, - lst::ObjectRanges rangesInGPU) const { + Modules modulesInGPU, + Triplets tripletsInGPU, + ObjectRanges rangesInGPU) const { // implementation is 1D with a single block static_assert(std::is_same_v, "Should be Acc1D"); ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); @@ -1052,5 +1045,5 @@ namespace lst { } } }; -} // namespace lst +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc index bf513865ffbed..eb48917952a38 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc +++ b/RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc @@ -1,13 +1,15 @@ #include "AccessHelper.h" +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + // =============== // ----* Hit *---- // =============== //____________________________________________________________________________________________ std::tuple, std::vector> convertHitsToHitIdxsAndHitTypes( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, std::vector hits) { - lst::Hits const* hitsEvt = event->getHits()->data(); + Event* event, std::vector hits) { + Hits const* hitsEvt = event->getHits()->data(); std::vector hitidxs; std::vector hittypes; for (auto& hit : hits) { @@ -25,11 +27,11 @@ std::tuple, std::vector> convertHitsToHi // =============== //____________________________________________________________________________________________ -std::vector getPixelHitsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); - lst::Modules const* modulesEvt = event->getModules()->data(); +std::vector getPixelHitsFrompLS(Event* event, unsigned int pLS) { + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + ObjectRanges const* rangesEvt = event->getRanges()->data(); + ::lst::Modules const* modulesEvt = event->getModules()->data(); const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; unsigned int MD_1 = segments->mdIndices[2 * (pLS + pLS_offset)]; unsigned int MD_2 = segments->mdIndices[2 * (pLS + pLS_offset) + 1]; @@ -44,8 +46,8 @@ std::vector getPixelHitsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst: } //____________________________________________________________________________________________ -std::vector getPixelHitIdxsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { - lst::Hits const* hitsEvt = event->getHits()->data(); +std::vector getPixelHitIdxsFrompLS(Event* event, unsigned int pLS) { + Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getPixelHitsFrompLS(event, pLS); std::vector hitidxs; for (auto& hit : hits) @@ -54,15 +56,15 @@ std::vector getPixelHitIdxsFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::l } //____________________________________________________________________________________________ -std::vector getPixelHitTypesFrompLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pLS) { +std::vector getPixelHitTypesFrompLS(Event* event, unsigned int pLS) { std::vector hits = getPixelHitsFrompLS(event, pLS); std::vector hittypes(hits.size(), 0); return hittypes; } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompLS( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pLS) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompLS(Event* event, + unsigned pLS) { return convertHitsToHitIdxsAndHitTypes(event, getPixelHitsFrompLS(event, pLS)); } @@ -71,16 +73,16 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getHitsFromMD(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int MD) { - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); +std::vector getHitsFromMD(Event* event, unsigned int MD) { + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); unsigned int hit_1 = miniDoublets->anchorHitIndices[MD]; unsigned int hit_2 = miniDoublets->outerHitIndices[MD]; return {hit_1, hit_2}; } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromMD( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned MD) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromMD(Event* event, + unsigned MD) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromMD(event, MD)); } @@ -89,15 +91,15 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getMDsFromLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int LS) { - lst::Segments const* segments = event->getSegments()->data(); +std::vector getMDsFromLS(Event* event, unsigned int LS) { + Segments const* segments = event->getSegments()->data(); unsigned int MD_1 = segments->mdIndices[2 * LS]; unsigned int MD_2 = segments->mdIndices[2 * LS + 1]; return {MD_1, MD_2}; } //____________________________________________________________________________________________ -std::vector getHitsFromLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int LS) { +std::vector getHitsFromLS(Event* event, unsigned int LS) { std::vector MDs = getMDsFromLS(event, LS); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -105,8 +107,8 @@ std::vector getHitsFromLS(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromLS( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned LS) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromLS(Event* event, + unsigned LS) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromLS(event, LS)); } @@ -115,15 +117,15 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { - lst::Triplets const* triplets = event->getTriplets()->data(); +std::vector getLSsFromT3(Event* event, unsigned int T3) { + Triplets const* triplets = event->getTriplets()->data(); unsigned int LS_1 = triplets->segmentIndices[2 * T3]; unsigned int LS_2 = triplets->segmentIndices[2 * T3 + 1]; return {LS_1, LS_2}; } //____________________________________________________________________________________________ -std::vector getMDsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { +std::vector getMDsFromT3(Event* event, unsigned int T3) { std::vector LSs = getLSsFromT3(event, T3); std::vector MDs_0 = getMDsFromLS(event, LSs[0]); std::vector MDs_1 = getMDsFromLS(event, LSs[1]); @@ -131,7 +133,7 @@ std::vector getMDsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* } //____________________________________________________________________________________________ -std::vector getHitsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T3) { +std::vector getHitsFromT3(Event* event, unsigned int T3) { std::vector MDs = getMDsFromT3(event, T3); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -140,8 +142,8 @@ std::vector getHitsFromT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromT3( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned T3) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromT3(Event* event, + unsigned T3) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromT3(event, T3)); } @@ -150,15 +152,15 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getT3sFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { - lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); +std::vector getT3sFromT5(Event* event, unsigned int T5) { + Quintuplets const* quintuplets = event->getQuintuplets()->data(); unsigned int T3_1 = quintuplets->tripletIndices[2 * T5]; unsigned int T3_2 = quintuplets->tripletIndices[2 * T5 + 1]; return {T3_1, T3_2}; } //____________________________________________________________________________________________ -std::vector getLSsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { +std::vector getLSsFromT5(Event* event, unsigned int T5) { std::vector T3s = getT3sFromT5(event, T5); std::vector LSs_0 = getLSsFromT3(event, T3s[0]); std::vector LSs_1 = getLSsFromT3(event, T3s[1]); @@ -166,7 +168,7 @@ std::vector getLSsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* } //____________________________________________________________________________________________ -std::vector getMDsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { +std::vector getMDsFromT5(Event* event, unsigned int T5) { std::vector LSs = getLSsFromT5(event, T5); std::vector MDs_0 = getMDsFromLS(event, LSs[0]); std::vector MDs_1 = getMDsFromLS(event, LSs[1]); @@ -176,7 +178,7 @@ std::vector getMDsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* } //____________________________________________________________________________________________ -std::vector getHitsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { +std::vector getHitsFromT5(Event* event, unsigned int T5) { std::vector MDs = getMDsFromT5(event, T5); std::vector hits_0 = getHitsFromMD(event, MDs[0]); std::vector hits_1 = getHitsFromMD(event, MDs[1]); @@ -187,8 +189,8 @@ std::vector getHitsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event } //____________________________________________________________________________________________ -std::vector getHitIdxsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { - lst::Hits const* hitsEvt = event->getHits()->data(); +std::vector getHitIdxsFromT5(Event* event, unsigned int T5) { + Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFromT5(event, T5); std::vector hitidxs; for (auto& hit : hits) @@ -196,24 +198,24 @@ std::vector getHitIdxsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Ev return hitidxs; } //____________________________________________________________________________________________ -std::vector getModuleIdxsFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { +std::vector getModuleIdxsFromT5(Event* event, unsigned int T5) { std::vector hits = getHitsFromT5(event, T5); std::vector module_idxs; - lst::Hits const* hitsEvt = event->getHits()->data(); + Hits const* hitsEvt = event->getHits()->data(); for (auto& hitIdx : hits) { module_idxs.push_back(hitsEvt->moduleIndices[hitIdx]); } return module_idxs; } //____________________________________________________________________________________________ -std::vector getHitTypesFromT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int T5) { +std::vector getHitTypesFromT5(Event* event, unsigned int T5) { return {4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; ; } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromT5( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned T5) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromT5(Event* event, + unsigned T5) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFromT5(event, T5)); } @@ -222,47 +224,46 @@ std::tuple, std::vector> getHitIdxsAndHi // =============== //____________________________________________________________________________________________ -unsigned int getPixelLSFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { - lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); - lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); - lst::Modules const* modulesEvt = event->getModules()->data(); +unsigned int getPixelLSFrompT3(Event* event, unsigned int pT3) { + PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); + ObjectRanges const* rangesEvt = event->getRanges()->data(); + ::lst::Modules const* modulesEvt = event->getModules()->data(); const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; return pixelTriplets->pixelSegmentIndices[pT3] - pLS_offset; } //____________________________________________________________________________________________ -unsigned int getT3FrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { - lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); +unsigned int getT3FrompT3(Event* event, unsigned int pT3) { + PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); return pixelTriplets->tripletIndices[pT3]; } //____________________________________________________________________________________________ -std::vector getLSsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getLSsFrompT3(Event* event, unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getLSsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getMDsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getMDsFrompT3(Event* event, unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getMDsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getOuterTrackerHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, - unsigned int pT3) { +std::vector getOuterTrackerHitsFrompT3(Event* event, unsigned int pT3) { unsigned int T3 = getT3FrompT3(event, pT3); return getHitsFromT3(event, T3); } //____________________________________________________________________________________________ -std::vector getPixelHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getPixelHitsFrompT3(Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); return getPixelHitsFrompLS(event, pLS); } //____________________________________________________________________________________________ -std::vector getHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getHitsFrompT3(Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); unsigned int T3 = getT3FrompT3(event, pT3); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); @@ -272,8 +273,8 @@ std::vector getHitsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Even } //____________________________________________________________________________________________ -std::vector getHitIdxsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { - lst::Hits const* hitsEvt = event->getHits()->data(); +std::vector getHitIdxsFrompT3(Event* event, unsigned int pT3) { + Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFrompT3(event, pT3); std::vector hitidxs; for (auto& hit : hits) @@ -281,17 +282,17 @@ std::vector getHitIdxsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::E return hitidxs; } //____________________________________________________________________________________________ -std::vector getModuleIdxsFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getModuleIdxsFrompT3(Event* event, unsigned int pT3) { std::vector hits = getOuterTrackerHitsFrompT3(event, pT3); std::vector module_idxs; - lst::Hits const* hitsEvt = event->getHits()->data(); + Hits const* hitsEvt = event->getHits()->data(); for (auto& hitIdx : hits) { module_idxs.push_back(hitsEvt->moduleIndices[hitIdx]); } return module_idxs; } //____________________________________________________________________________________________ -std::vector getHitTypesFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT3) { +std::vector getHitTypesFrompT3(Event* event, unsigned int pT3) { unsigned int pLS = getPixelLSFrompT3(event, pT3); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); // pixel Hits list will be either 3 or 4 and depending on it return accordingly @@ -302,8 +303,8 @@ std::vector getHitTypesFrompT3(ALPAKA_ACCELERATOR_NAMESPACE::lst:: } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompT3( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pT3) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompT3(Event* event, + unsigned pT3) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFrompT3(event, pT3)); } @@ -312,53 +313,52 @@ std::tuple, std::vector> getHitIdxsAndHi // =============== //____________________________________________________________________________________________ -unsigned int getPixelLSFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { - lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); - lst::ObjectRanges const* rangesEvt = event->getRanges()->data(); - lst::Modules const* modulesEvt = event->getModules()->data(); +unsigned int getPixelLSFrompT5(Event* event, unsigned int pT5) { + PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); + ObjectRanges const* rangesEvt = event->getRanges()->data(); + ::lst::Modules const* modulesEvt = event->getModules()->data(); const unsigned int pLS_offset = rangesEvt->segmentModuleIndices[*(modulesEvt->nLowerModules)]; return pixelQuintuplets->pixelIndices[pT5] - pLS_offset; } //____________________________________________________________________________________________ -unsigned int getT5FrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { - lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); +unsigned int getT5FrompT5(Event* event, unsigned int pT5) { + PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); return pixelQuintuplets->T5Indices[pT5]; } //____________________________________________________________________________________________ -std::vector getT3sFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getT3sFrompT5(Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getT3sFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getLSsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getLSsFrompT5(Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getLSsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getMDsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getMDsFrompT5(Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getMDsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getOuterTrackerHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, - unsigned int pT5) { +std::vector getOuterTrackerHitsFrompT5(Event* event, unsigned int pT5) { unsigned int T5 = getT5FrompT5(event, pT5); return getHitsFromT5(event, T5); } //____________________________________________________________________________________________ -std::vector getPixelHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getPixelHitsFrompT5(Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); return getPixelHitsFrompLS(event, pLS); } //____________________________________________________________________________________________ -std::vector getHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getHitsFrompT5(Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); unsigned int T5 = getT5FrompT5(event, pT5); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); @@ -368,8 +368,8 @@ std::vector getHitsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Even } //____________________________________________________________________________________________ -std::vector getHitIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { - lst::Hits const* hitsEvt = event->getHits()->data(); +std::vector getHitIdxsFrompT5(Event* event, unsigned int pT5) { + Hits const* hitsEvt = event->getHits()->data(); std::vector hits = getHitsFrompT5(event, pT5); std::vector hitidxs; for (auto& hit : hits) @@ -378,10 +378,10 @@ std::vector getHitIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::E } //____________________________________________________________________________________________ -std::vector getModuleIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getModuleIdxsFrompT5(Event* event, unsigned int pT5) { std::vector hits = getOuterTrackerHitsFrompT5(event, pT5); std::vector module_idxs; - lst::Hits const* hitsEvt = event->getHits()->data(); + Hits const* hitsEvt = event->getHits()->data(); for (auto& hitIdx : hits) { module_idxs.push_back(hitsEvt->moduleIndices[hitIdx]); } @@ -389,7 +389,7 @@ std::vector getModuleIdxsFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst } //____________________________________________________________________________________________ -std::vector getHitTypesFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int pT5) { +std::vector getHitTypesFrompT5(Event* event, unsigned int pT5) { unsigned int pLS = getPixelLSFrompT5(event, pT5); std::vector pixelHits = getPixelHitsFrompLS(event, pLS); // pixel Hits list will be either 3 or 4 and depending on it return accordingly @@ -400,8 +400,8 @@ std::vector getHitTypesFrompT5(ALPAKA_ACCELERATOR_NAMESPACE::lst:: } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFrompT5( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned pT5) { +std::tuple, std::vector> getHitIdxsAndHitTypesFrompT5(Event* event, + unsigned pT5) { return convertHitsToHitIdxsAndHitTypes(event, getHitsFrompT5(event, pT5)); } @@ -410,9 +410,9 @@ std::tuple, std::vector> getHitIdxsAndHi // ============== //____________________________________________________________________________________________ -std::vector getLSsFromTC(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int TC) { +std::vector getLSsFromTC(Event* event, unsigned int TC) { // Get the type of the track candidate - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[TC]; unsigned int objidx = trackCandidates->directObjectIndices[TC]; switch (type) { @@ -432,10 +432,10 @@ std::vector getLSsFromTC(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* } //____________________________________________________________________________________________ -std::tuple, std::vector> getHitIdxsAndHitTypesFromTC( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned TC) { +std::tuple, std::vector> getHitIdxsAndHitTypesFromTC(Event* event, + unsigned TC) { // Get the type of the track candidate - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[TC]; unsigned int objidx = trackCandidates->directObjectIndices[TC]; switch (type) { diff --git a/RecoTracker/LSTCore/standalone/code/core/AnalysisConfig.h b/RecoTracker/LSTCore/standalone/code/core/AnalysisConfig.h index 8608bc95ed2fa..ce7ce3824849e 100644 --- a/RecoTracker/LSTCore/standalone/code/core/AnalysisConfig.h +++ b/RecoTracker/LSTCore/standalone/code/core/AnalysisConfig.h @@ -100,7 +100,7 @@ class AnalysisConfig { std::map>> moduleSimHits; std::map modulePopulation; - lst::ModuleConnectionMap moduleConnectiongMapLoose; + ::lst::ModuleConnectionMap moduleConnectiongMapLoose; // Boolean to trigger whether to run cut_value_ntupling bool do_cut_value_ntuple; diff --git a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc index 73b5daabbfc1a..3841affaaf059 100644 --- a/RecoTracker/LSTCore/standalone/code/core/trkCore.cc +++ b/RecoTracker/LSTCore/standalone/code/core/trkCore.cc @@ -20,7 +20,7 @@ bool goodEvent() { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, int evt) { +float runMiniDoublet(LSTEvent *event, int evt) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Mini-Doublet start " << evt << std::endl; @@ -73,7 +73,7 @@ float runMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, int evt) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { +float runSegment(LSTEvent *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Segment start" << std::endl; @@ -111,7 +111,7 @@ float runSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { +float runT3(LSTEvent *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco T3 start" << std::endl; @@ -153,7 +153,7 @@ float runT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runpT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { +float runpT3(LSTEvent *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Triplet pT3 start" << std::endl; @@ -170,7 +170,7 @@ float runpT3(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { +float runQuintuplet(LSTEvent *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Quintuplet start" << std::endl; @@ -216,7 +216,7 @@ float runQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runPixelLineSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool no_pls_dupclean) { +float runPixelLineSegment(LSTEvent *event, bool no_pls_dupclean) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Line Segment start" << std::endl; @@ -231,7 +231,7 @@ float runPixelLineSegment(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runPixelQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { +float runPixelQuintuplet(LSTEvent *event) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco Pixel Quintuplet start" << std::endl; @@ -248,7 +248,7 @@ float runPixelQuintuplet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event) { } //___________________________________________________________________________________________________________________________________________________________________________________________ -float runTrackCandidate(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, bool no_pls_dupclean, bool tc_pls_triplets) { +float runTrackCandidate(LSTEvent *event, bool no_pls_dupclean, bool tc_pls_triplets) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Reco TrackCandidate start" << std::endl; @@ -845,7 +845,7 @@ void addInputsToLineSegmentTrackingPreLoad(std::vector> &out_ } //___________________________________________________________________________________________________________________________________________________________________________________________ -float addInputsToEventPreLoad(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event *event, +float addInputsToEventPreLoad(LSTEvent *event, bool useOMP, std::vector trkX, std::vector trkY, @@ -1150,7 +1150,7 @@ void writeMetaData() { // DEPRECATED FUNCTIONS //__________________________________________________________________________________________ -[[deprecated]] float addInputsToLineSegmentTracking(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event &event, bool useOMP) { +[[deprecated]] float addInputsToLineSegmentTracking(LSTEvent &event, bool useOMP) { TStopwatch my_timer; if (ana.verbose >= 2) std::cout << "Loading Inputs (i.e. outer tracker hits, and pixel line segements) to the Line Segment Tracking.... " @@ -1346,6 +1346,6 @@ void writeMetaData() { } //__________________________________________________________________________________________ -[[deprecated]] float addInputsToLineSegmentTrackingUsingExplicitMemory(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event &event) { +[[deprecated]] float addInputsToLineSegmentTrackingUsingExplicitMemory(LSTEvent &event) { return addInputsToLineSegmentTracking(event, true); } diff --git a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc index 911a34f519a6d..7c330a768a175 100644 --- a/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc +++ b/RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc @@ -1,5 +1,7 @@ #include "write_lst_ntuple.h" +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + //________________________________________________________________________________________________________________________________ void createOutputBranches() { createRequiredOutputBranches(); @@ -7,7 +9,7 @@ void createOutputBranches() { } //________________________________________________________________________________________________________________________________ -void fillOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void fillOutputBranches(Event* event) { setOutputBranches(event); setOptionalOutputBranches(event); if (ana.gnn_ntuple) @@ -181,7 +183,7 @@ void createGnnNtupleBranches() { } //________________________________________________________________________________________________________________________________ -void setOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void setOutputBranches(Event* event) { // ============ Sim tracks ============= int n_accepted_simtrk = 0; for (unsigned int isimtrk = 0; isimtrk < trk.sim_pt().size(); ++isimtrk) { @@ -224,7 +226,7 @@ void setOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { std::vector> tc_matched_simIdx; // ============ Track candidates ============= - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { // Compute reco quantities of track candidate based on final object @@ -276,7 +278,7 @@ void setOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setOptionalOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void setOptionalOutputBranches(Event* event) { #ifdef CUT_VALUE_DEBUG setPixelQuintupletOutputBranches(event); @@ -287,12 +289,12 @@ void setOptionalOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) } //________________________________________________________________________________________________________________________________ -void setPixelQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void setPixelQuintupletOutputBranches(Event* event) { // ============ pT5 ============= - lst::PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); - lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); - lst::Segments const* segments = event->getSegments()->data(); - lst::Modules const* modules = event->getModules()->data(); + PixelQuintuplets const* pixelQuintuplets = event->getPixelQuintuplets()->data(); + Quintuplets const* quintuplets = event->getQuintuplets()->data(); + Segments const* segments = event->getSegments()->data(); + ::lst::Modules const* modules = event->getModules()->data(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); unsigned int nPixelQuintuplets = @@ -303,7 +305,7 @@ void setPixelQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* for (unsigned int pT5 = 0; pT5 < nPixelQuintuplets; pT5++) { unsigned int T5Index = getT5FrompT5(event, pT5); unsigned int pLSIndex = getPixelLSFrompT5(event, pT5); - float pt = (__H2F(quintuplets->innerRadius[T5Index]) * lst::k2Rinv1GeVf * 2 + segments->ptIn[pLSIndex]) / 2; + float pt = (__H2F(quintuplets->innerRadius[T5Index]) * k2Rinv1GeVf * 2 + segments->ptIn[pLSIndex]) / 2; float eta = segments->eta[pLSIndex]; float phi = segments->phi[pLSIndex]; @@ -363,10 +365,10 @@ void setPixelQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* } //________________________________________________________________________________________________________________________________ -void setQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); - lst::Modules const* modules = event->getModules()->data(); +void setQuintupletOutputBranches(Event* event) { + Quintuplets const* quintuplets = event->getQuintuplets()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); + ::lst::Modules const* modules = event->getModules()->data(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); std::vector sim_t5_matched(n_accepted_simtrk); @@ -376,7 +378,7 @@ void setQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event int nQuintuplets = quintuplets->nQuintuplets[lowerModuleIdx]; for (unsigned int idx = 0; idx < nQuintuplets; idx++) { unsigned int quintupletIndex = ranges->quintupletModuleIndices[lowerModuleIdx] + idx; - float pt = __H2F(quintuplets->innerRadius[quintupletIndex]) * lst::k2Rinv1GeVf * 2; + float pt = __H2F(quintuplets->innerRadius[quintupletIndex]) * k2Rinv1GeVf * 2; float eta = __H2F(quintuplets->eta[quintupletIndex]); float phi = __H2F(quintuplets->phi[quintupletIndex]); @@ -434,10 +436,10 @@ void setQuintupletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event } //________________________________________________________________________________________________________________________________ -void setPixelTripletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::Segments const* segments = event->getSegments()->data(); +void setPixelTripletOutputBranches(Event* event) { + PixelTriplets const* pixelTriplets = event->getPixelTriplets()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + Segments const* segments = event->getSegments()->data(); int n_accepted_simtrk = ana.tx->getBranch>("sim_TC_matched").size(); unsigned int nPixelTriplets = *pixelTriplets->nPixelTriplets; @@ -497,14 +499,14 @@ void setPixelTripletOutputBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* eve } //________________________________________________________________________________________________________________________________ -void setGnnNtupleBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void setGnnNtupleBranches(Event* event) { // Get relevant information - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); std::set mds_used_in_sg; std::map md_index_map; @@ -638,10 +640,10 @@ void setGnnNtupleBranches(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void setGnnNtupleMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int MD) { +void setGnnNtupleMiniDoublet(Event* event, unsigned int MD) { // Get relevant information - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); // Get the hit indices unsigned int hit0 = miniDoublets->anchorHitIndices[MD]; @@ -678,7 +680,7 @@ void setGnnNtupleMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, un float dphichange = miniDoublets->dphichanges[MD]; // Computing pt - float pt = hit0_r * lst::k2Rinv1GeVf / sin(dphichange); + float pt = hit0_r * k2Rinv1GeVf / sin(dphichange); // T5 eta and phi are computed using outer and innermost hits lst_math::Hit hitA(trk.ph2_x()[anchitidx], trk.ph2_y()[anchitidx], trk.ph2_z()[anchitidx]); @@ -706,10 +708,9 @@ void setGnnNtupleMiniDoublet(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, un } //________________________________________________________________________________________________________________________________ -std::tuple> parseTrackCandidate( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { +std::tuple> parseTrackCandidate(Event* event, unsigned int idx) { // Get the type of the track candidate - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); short type = trackCandidates->trackCandidateType[idx]; enum { pT5 = 7, pT3 = 5, T5 = 4, pLS = 8 }; @@ -740,12 +741,12 @@ std::tuple> parseTrackCandidate( } //________________________________________________________________________________________________________________________________ -std::tuple, std::vector> parsepT5( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { +std::tuple, std::vector> parsepT5(Event* event, + unsigned int idx) { // Get relevant information - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); - lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); - lst::Segments const* segments = event->getSegments()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Quintuplets const* quintuplets = event->getQuintuplets()->data(); + Segments const* segments = event->getSegments()->data(); // // pictorial representation of a pT5 @@ -841,7 +842,7 @@ std::tuple, std::vectorptIn[pLS]; const float eta_pLS = segments->eta[pLS]; const float phi_pLS = segments->phi[pLS]; - float pt_T5 = __H2F(quintuplets->innerRadius[T5Index]) * 2 * lst::k2Rinv1GeVf; + float pt_T5 = __H2F(quintuplets->innerRadius[T5Index]) * 2 * k2Rinv1GeVf; const float pt = (pt_T5 + pt_pLS) / 2; // Form the hit idx/type std::vector @@ -852,12 +853,12 @@ std::tuple, std::vector, std::vector> parsepT3( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { +std::tuple, std::vector> parsepT3(Event* event, + unsigned int idx) { // Get relevant information - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); - lst::Triplets const* triplets = event->getTriplets()->data(); - lst::Segments const* segments = event->getSegments()->data(); + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Triplets const* triplets = event->getTriplets()->data(); + Segments const* segments = event->getSegments()->data(); // // pictorial representation of a pT3 @@ -874,7 +875,7 @@ std::tuple, std::vectorptIn[pLS]; const float eta_pLS = segments->eta[pLS]; const float phi_pLS = segments->phi[pLS]; - float pt_T3 = triplets->circleRadius[T3] * 2 * lst::k2Rinv1GeVf; + float pt_T3 = triplets->circleRadius[T3] * 2 * k2Rinv1GeVf; // average pt const float pt = (pt_pLS + pt_T3) / 2; @@ -887,10 +888,10 @@ std::tuple, std::vector, std::vector> parseT5( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); - lst::Quintuplets const* quintuplets = event->getQuintuplets()->data(); +std::tuple, std::vector> parseT5(Event* event, + unsigned int idx) { + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Quintuplets const* quintuplets = event->getQuintuplets()->data(); unsigned int T5 = trackCandidates->directObjectIndices[idx]; std::vector hits = getHitsFromT5(event, T5); @@ -906,7 +907,7 @@ std::tuple, std::vectorinnerRadius[T5] * lst::k2Rinv1GeVf * 2; + const float pt = quintuplets->innerRadius[T5] * k2Rinv1GeVf * 2; // T5 eta and phi are computed using outer and innermost hits lst_math::Hit hitA(trk.ph2_x()[Hit_0], trk.ph2_y()[Hit_0], trk.ph2_z()[Hit_0]); @@ -921,10 +922,10 @@ std::tuple, std::vector, std::vector> parsepLS( - ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event, unsigned int idx) { - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); - lst::Segments const* segments = event->getSegments()->data(); +std::tuple, std::vector> parsepLS(Event* event, + unsigned int idx) { + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Segments const* segments = event->getSegments()->data(); // Getting pLS index unsigned int pLS = trackCandidates->directObjectIndices[idx]; @@ -942,9 +943,9 @@ std::tuple, std::vectorgetModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); +void printHitMultiplicities(Event* event) { + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); int nHits = 0; for (unsigned int idx = 0; idx <= *(modules->nLowerModules); @@ -957,9 +958,9 @@ void printHitMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printMiniDoubletMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Modules const* modules = event->getModules()->data(); +void printMiniDoubletMultiplicities(Event* event) { + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + ::lst::Modules const* modules = event->getModules()->data(); int nMiniDoublets = 0; int totOccupancyMiniDoublets = 0; @@ -976,7 +977,7 @@ void printMiniDoubletMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* ev } //________________________________________________________________________________________________________________________________ -void printAllObjects(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { +void printAllObjects(Event* event) { printMDs(event); printLSs(event); printpLSs(event); @@ -984,11 +985,11 @@ void printAllObjects(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printMDs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); +void printMDs(Event* event) { + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); // Then obtain the lower module index for (unsigned int idx = 0; idx <= *(modules->nLowerModules); ++idx) { @@ -1006,12 +1007,12 @@ void printMDs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); +void printLSs(Event* event) { + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); int nSegments = 0; for (unsigned int i = 0; i < *(modules->nLowerModules); ++i) { @@ -1038,12 +1039,12 @@ void printLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printpLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); +void printpLSs(Event* event) { + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); unsigned int i = *(modules->nLowerModules); unsigned int idx = i; //modules->lowerModuleIndices[i]; @@ -1068,12 +1069,12 @@ void printpLSs(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void printT3s(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::Triplets const* triplets = event->getTriplets()->data(); - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Hits const* hitsEvt = event->getHits()->data(); - lst::Modules const* modules = event->getModules()->data(); +void printT3s(Event* event) { + Triplets const* triplets = event->getTriplets()->data(); + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + Hits const* hitsEvt = event->getHits()->data(); + ::lst::Modules const* modules = event->getModules()->data(); int nTriplets = 0; for (unsigned int i = 0; i < *(modules->nLowerModules); ++i) { // unsigned int idx = modules->lowerModuleIndices[i]; @@ -1110,13 +1111,13 @@ void printT3s(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { } //________________________________________________________________________________________________________________________________ -void debugPrintOutlierMultiplicities(ALPAKA_ACCELERATOR_NAMESPACE::lst::Event* event) { - lst::TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); - lst::Triplets const* triplets = event->getTriplets()->data(); - lst::Segments const* segments = event->getSegments()->data(); - lst::MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); - lst::Modules const* modules = event->getModules()->data(); - lst::ObjectRanges const* ranges = event->getRanges()->data(); +void debugPrintOutlierMultiplicities(Event* event) { + TrackCandidates const* trackCandidates = event->getTrackCandidates()->data(); + Triplets const* triplets = event->getTriplets()->data(); + Segments const* segments = event->getSegments()->data(); + MiniDoublets const* miniDoublets = event->getMiniDoublets()->data(); + ::lst::Modules const* modules = event->getModules()->data(); + ObjectRanges const* ranges = event->getRanges()->data(); //int nTrackCandidates = 0; for (unsigned int idx = 0; idx <= *(modules->nLowerModules); ++idx) { if (trackCandidates->nTrackCandidates[idx] > 50000) {