Skip to content

Commit

Permalink
Update to clang format 17 (cms-patatrack#34)
Browse files Browse the repository at this point in the history
* Switch to `fedora-latest`

* Update to `clang-format-17`

* Go back to ubuntu

* Fix `clang-format` file

* Formatting
  • Loading branch information
sbaldu authored Mar 19, 2024
1 parent ad43e2b commit 0a2df86
Show file tree
Hide file tree
Showing 28 changed files with 856 additions and 559 deletions.
6 changes: 4 additions & 2 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
Language: Cpp
BasedOnStyle: Google
ColumnLimit: 108
ColumnLimit: 90
NamespaceIndentation: All
SortIncludes: false
IndentWidth: 2
Expand All @@ -11,8 +11,10 @@ PenaltyExcessCharacter: 100
AlignAfterOpenBracket: Align
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
BinPackParameters: false
AlwaysBreakTemplateDeclarations: Yes
ReflowComments: false
BinPackArguments: false
BinPackParameters: false
DerivePointerAlignment: false
PointerAlignment: Left
ReferenceAlignment: Left
2 changes: 1 addition & 1 deletion .github/workflows/clang_format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ jobs:
- name: Run clang-format style check
uses: jidicula/clang-format-action@v4.11.0
with:
clang-format-version: '16'
clang-format-version: '17'
check-path: ${{ matrix.path }}
exclude-regex: 'CLUEstering/include/test/doctest.h'
3 changes: 2 additions & 1 deletion CLUEstering/alpaka/AlpakaCore/AllocatorPolicy.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ namespace cms::alpakatools {
template <typename TDev>
constexpr inline AllocatorPolicy allocator_policy = AllocatorPolicy::Synchronous;

#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || \
defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
template <>
constexpr inline AllocatorPolicy allocator_policy<alpaka::DevCpu> =
#if !defined ALPAKA_DISABLE_CACHING_ALLOCATOR
Expand Down
22 changes: 17 additions & 5 deletions CLUEstering/alpaka/AlpakaCore/CachedBufAlloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ namespace cms::alpakatools {

//! The caching memory allocator implementation for the pinned host memory
template <typename TElem, typename TDim, typename TIdx>
struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking, void> {
struct CachedBufAlloc<TElem,
TDim,
TIdx,
alpaka::DevCpu,
alpaka::QueueCudaRtNonBlocking,
void> {
template <typename TExtent>
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
alpaka::QueueCudaRtNonBlocking queue,
Expand Down Expand Up @@ -96,7 +101,12 @@ namespace cms::alpakatools {

//! The caching memory allocator implementation for the pinned host memory
template <typename TElem, typename TDim, typename TIdx>
struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtNonBlocking, void> {
struct CachedBufAlloc<TElem,
TDim,
TIdx,
alpaka::DevCpu,
alpaka::QueueHipRtNonBlocking,
void> {
template <typename TExtent>
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
alpaka::QueueHipRtNonBlocking queue,
Expand Down Expand Up @@ -152,9 +162,11 @@ namespace cms::alpakatools {
} // namespace traits

template <typename TElem, typename TIdx, typename TExtent, typename TQueue, typename TDev>
ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(
dev, queue, extent);
ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev,
TQueue queue,
TExtent const& extent = TExtent()) {
return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::
allocCachedBuf(dev, queue, extent);
}

} // namespace cms::alpakatools
Expand Down
105 changes: 62 additions & 43 deletions CLUEstering/alpaka/AlpakaCore/CachingAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,16 @@ namespace cms::alpakatools {
template <typename TDevice, typename TQueue>
class CachingAllocator {
public:
using Device = TDevice; // the "memory device", where the memory will be allocated
using Queue = TQueue; // the queue used to submit the memory operations
using Device = TDevice; // the "memory device", where the memory will be allocated
using Queue = TQueue; // the queue used to submit the memory operations
using Event = alpaka::Event<Queue>; // the events used to synchronise the operations
using Buffer = alpaka::Buf<Device, std::byte, alpaka::DimInt<1u>, size_t>;

// The "memory device" type can either be the same as the "synchronisation device" type, or be the host CPU.
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the \"synchronisation device\" "
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or
std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the "
"\"synchronisation device\" "
"type, or be the "
"host CPU.");

Expand All @@ -112,8 +114,8 @@ namespace cms::alpakatools {
size_t maxCachedBytes, // total storage for the allocator (0 means no limit);
double
maxCachedFraction, // fraction of total device memory taken for the allocator (0 means no limit);
// if both maxCachedBytes and maxCachedFraction are non-zero,
// the smallest resulting value is used.
// if both maxCachedBytes and maxCachedFraction are non-zero,
// the smallest resulting value is used.
bool reuseSameQueueAllocations, // reuse non-ready allocations if they are in the same queue as the new one;
// this is safe only if all memory operations are scheduled in the same queue
bool debug)
Expand All @@ -135,7 +137,8 @@ namespace cms::alpakatools {
<< " resulting bins:\n";
for (auto bin = minBin_; bin <= maxBin_; ++bin) {
auto binSize = detail::power(binGrowth, bin);
out << " " << std::right << std::setw(12) << detail::as_bytes(binSize) << '\n';
out << " " << std::right << std::setw(12) << detail::as_bytes(binSize)
<< '\n';
}
out << " maximum amount of cached memory: " << detail::as_bytes(maxCachedBytes_);
std::cout << out.str() << std::endl;
Expand Down Expand Up @@ -201,22 +204,26 @@ namespace cms::alpakatools {

if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " returned " << block.bytes
<< " bytes at " << ptr << " from associated queue " << block.queue->m_spQueueImpl.get()
<< " , event " << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), " << liveBlocks_.size()
<< " live blocks (" << cachedBytes_.live << " bytes) outstanding." << std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " returned "
<< block.bytes << " bytes at " << ptr << " from associated queue "
<< block.queue->m_spQueueImpl.get() << " , event "
<< block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), "
<< liveBlocks_.size() << " live blocks (" << cachedBytes_.live
<< " bytes) outstanding." << std::endl;
std::cout << out.str() << std::endl;
}
} else {
// if the buffer is not recached, it is automatically freed when block goes out of scope
if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes
<< " bytes at " << ptr << " from associated queue " << block.queue->m_spQueueImpl.get()
<< ", event " << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), " << liveBlocks_.size()
<< " live blocks (" << cachedBytes_.live << " bytes) outstanding." << std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed "
<< block.bytes << " bytes at " << ptr << " from associated queue "
<< block.queue->m_spQueueImpl.get() << ", event "
<< block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), "
<< liveBlocks_.size() << " live blocks (" << cachedBytes_.live
<< " bytes) outstanding." << std::endl;
std::cout << out.str() << std::endl;
}
}
Expand Down Expand Up @@ -257,10 +264,11 @@ namespace cms::alpakatools {
return std::make_tuple(minBin_, minBinBytes_);
}
if (bytes > maxBinBytes_) {
throw std::runtime_error("Requested allocation size " + std::to_string(bytes) +
" bytes is too large for the caching detail with maximum bin " +
std::to_string(maxBinBytes_) +
" bytes. You might want to increase the maximum bin size");
throw std::runtime_error(
"Requested allocation size " + std::to_string(bytes) +
" bytes is too large for the caching detail with maximum bin " +
std::to_string(maxBinBytes_) +
" bytes. You might want to increase the maximum bin size");
}
unsigned int bin = minBin_;
size_t binBytes = minBinBytes_;
Expand Down Expand Up @@ -301,11 +309,13 @@ namespace cms::alpakatools {

if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " reused cached block at "
<< block.buffer->data() << " (" << block.bytes << " bytes) for queue "
<< block.queue->m_spQueueImpl.get() << ", event " << block.event->m_spEventImpl.get()
<< " (previously associated with stream " << iBlock->second.queue->m_spQueueImpl.get()
<< " , event " << iBlock->second.event->m_spEventImpl.get() << ")." << std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_)
<< " reused cached block at " << block.buffer->data() << " ("
<< block.bytes << " bytes) for queue " << block.queue->m_spQueueImpl.get()
<< ", event " << block.event->m_spEventImpl.get()
<< " (previously associated with stream "
<< iBlock->second.queue->m_spQueueImpl.get() << " , event "
<< iBlock->second.event->m_spEventImpl.get() << ")." << std::endl;
std::cout << out.str() << std::endl;
}

Expand All @@ -324,11 +334,14 @@ namespace cms::alpakatools {
return alpaka::allocBuf<std::byte, size_t>(device_, bytes);
} else if constexpr (std::is_same_v<Device, alpaka::DevCpu>) {
// allocate pinned host memory accessible by the queue's platform
return alpaka::allocMappedBuf<alpaka::Pltf<alpaka::Dev<Queue>>, std::byte, size_t>(device_, bytes);
return alpaka::allocMappedBuf<alpaka::Pltf<alpaka::Dev<Queue>>, std::byte, size_t>(
device_, bytes);
} else {
// unsupported combination
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the \"synchronisation device\" "
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or
std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the "
"\"synchronisation device\" "
"type, or be "
"the host CPU.");
}
Expand All @@ -341,8 +354,9 @@ namespace cms::alpakatools {
// the allocation attempt failed: free all cached blocks on the device and retry
if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " failed to allocate "
<< block.bytes << " bytes for queue " << block.queue->m_spQueueImpl.get()
out << "\t" << deviceType_ << " " << alpaka::getName(device_)
<< " failed to allocate " << block.bytes << " bytes for queue "
<< block.queue->m_spQueueImpl.get()
<< ", retrying after freeing cached allocations" << std::endl;
std::cout << out.str() << std::endl;
}
Expand All @@ -366,10 +380,10 @@ namespace cms::alpakatools {

if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " allocated new block at "
<< block.buffer->data() << " (" << block.bytes << " bytes associated with queue "
<< block.queue->m_spQueueImpl.get() << ", event " << block.event->m_spEventImpl.get() << "."
<< std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_)
<< " allocated new block at " << block.buffer->data() << " (" << block.bytes
<< " bytes associated with queue " << block.queue->m_spQueueImpl.get()
<< ", event " << block.event->m_spEventImpl.get() << "." << std::endl;
std::cout << out.str() << std::endl;
}
}
Expand All @@ -383,10 +397,11 @@ namespace cms::alpakatools {

if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << iBlock->second.bytes
<< " bytes.\n\t\t " << (cachedBlocks_.size() - 1) << " available blocks cached ("
<< cachedBytes_.free << " bytes), " << liveBlocks_.size() << " live blocks ("
<< cachedBytes_.live << " bytes) outstanding." << std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed "
<< iBlock->second.bytes << " bytes.\n\t\t " << (cachedBlocks_.size() - 1)
<< " available blocks cached (" << cachedBytes_.free << " bytes), "
<< liveBlocks_.size() << " live blocks (" << cachedBytes_.live
<< " bytes) outstanding." << std::endl;
std::cout << out.str() << std::endl;
}

Expand All @@ -395,18 +410,22 @@ namespace cms::alpakatools {
}

// TODO replace with a tbb::concurrent_multimap ?
using CachedBlocks = std::multimap<unsigned int, BlockDescriptor>; // ordered by the allocation bin
using CachedBlocks =
std::multimap<unsigned int, BlockDescriptor>; // ordered by the allocation bin
// TODO replace with a tbb::concurrent_map ?
using BusyBlocks = std::map<void*, BlockDescriptor>; // ordered by the address of the allocated memory
using BusyBlocks =
std::map<void*, BlockDescriptor>; // ordered by the address of the allocated memory

inline static const std::string deviceType_ = boost::core::demangle(typeid(Device).name());
inline static const std::string deviceType_ =
boost::core::demangle(typeid(Device).name());

mutable std::mutex mutex_;
Device device_; // the device where the memory is allocated

CachedBytes cachedBytes_;
CachedBlocks cachedBlocks_; // Set of cached device allocations available for reuse
BusyBlocks liveBlocks_; // map of pointers to the live device allocations currently in use
BusyBlocks
liveBlocks_; // map of pointers to the live device allocations currently in use

const unsigned int binGrowth_; // Geometric growth factor for bin-sizes
const unsigned int minBin_;
Expand Down
24 changes: 17 additions & 7 deletions CLUEstering/alpaka/AlpakaCore/HostOnlyTask.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,22 @@ namespace alpaka {
struct Enqueue<QueueCudaRtNonBlocking, HostOnlyTask> {
using TApi = ApiCudaRt;

static void CUDART_CB callback(cudaStream_t /*queue*/, cudaError_t /*status*/, void* arg) {
static void CUDART_CB callback(cudaStream_t /*queue*/,
cudaError_t /*status*/,
void* arg) {
//ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(status);
std::unique_ptr<HostOnlyTask> pTask(static_cast<HostOnlyTask*>(arg));
(*pTask)();
}

ALPAKA_FN_HOST static auto enqueue(QueueCudaRtNonBlocking& queue, HostOnlyTask task) -> void {
ALPAKA_FN_HOST static auto enqueue(QueueCudaRtNonBlocking& queue, HostOnlyTask task)
-> void {
auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaStreamAddCallback(
alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
cudaStreamAddCallback(alpaka::getNativeHandle(queue),
callback,
static_cast<void*>(pTask.release()),
0u));
}
};
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
Expand All @@ -52,10 +58,14 @@ namespace alpaka {
(*pTask)();
}

ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, HostOnlyTask task) -> void {
ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, HostOnlyTask task)
-> void {
auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipStreamAddCallback(
alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
hipStreamAddCallback(alpaka::getNativeHandle(queue),
callback,
static_cast<void*>(pTask.release()),
0u));
}
};
#endif // ALPAKA_ACC_GPU_HIP_ENABLED
Expand Down
3 changes: 2 additions & 1 deletion CLUEstering/alpaka/AlpakaCore/alpakaConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ namespace alpaka_common {

// trick to force expanding ALPAKA_ACCELERATOR_NAMESPACE before stringification inside DEFINE_FWK_MODULE
#define DEFINE_FWK_ALPAKA_MODULE2(name) DEFINE_FWK_MODULE(name)
#define DEFINE_FWK_ALPAKA_MODULE(name) DEFINE_FWK_ALPAKA_MODULE2(ALPAKA_ACCELERATOR_NAMESPACE::name)
#define DEFINE_FWK_ALPAKA_MODULE(name) \
DEFINE_FWK_ALPAKA_MODULE2(ALPAKA_ACCELERATOR_NAMESPACE::name)

#define DEFINE_FWK_ALPAKA_EVENTSETUP_MODULE2(name) DEFINE_FWK_EVENTSETUP_MODULE(name)
#define DEFINE_FWK_ALPAKA_EVENTSETUP_MODULE(name) \
Expand Down
3 changes: 2 additions & 1 deletion CLUEstering/alpaka/AlpakaCore/alpakaDevices.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
namespace cms::alpakatools {

// alpaka host device
inline const alpaka_common::DevHost host = alpaka::getDevByIdx<alpaka_common::PltfHost>(0u);
inline const alpaka_common::DevHost host =
alpaka::getDevByIdx<alpaka_common::PltfHost>(0u);

// alpaka accelerator devices
template <typename TPlatform>
Expand Down
12 changes: 8 additions & 4 deletions CLUEstering/alpaka/AlpakaCore/alpakaFwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,14 @@ namespace alpaka {
template <typename TApi, bool TBlocking>
class QueueUniformCudaHipRt;
}
using QueueCudaRtBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiCudaRt, true>;
using QueueCudaRtNonBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiCudaRt, false>;
using QueueHipRtBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiHipRt, true>;
using QueueHipRtNonBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiHipRt, false>;
using QueueCudaRtBlocking =
uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiCudaRt, true>;
using QueueCudaRtNonBlocking =
uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiCudaRt, false>;
using QueueHipRtBlocking =
uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiHipRt, true>;
using QueueHipRtNonBlocking =
uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiHipRt, false>;

// Events
template <typename TDev>
Expand Down
Loading

0 comments on commit 0a2df86

Please sign in to comment.