From 89ba16b84a2142da229bc9e887e62c8604d5d743 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 6 Aug 2024 15:04:53 -0700 Subject: [PATCH 1/4] [flang][cuda] Force default allocator in device code --- flang/runtime/descriptor.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp index 34f7a02ea8c7b..6d43bacaed697 100644 --- a/flang/runtime/descriptor.cpp +++ b/flang/runtime/descriptor.cpp @@ -162,11 +162,17 @@ RT_API_ATTRS int Descriptor::Allocate() { elementBytes = raw_.elem_len = 0; } std::size_t byteSize{Elements() * elementBytes}; + + // Force default allocator in device code. +#ifdef RT_DEVICE_COMPILATION + AllocFct alloc{allocatorRegistry.GetAllocator(kDefaultAllocator)}; +#else + AllocFct alloc{allocatorRegistry.GetAllocator(GetAllocIdx())}; +#endif + // Zero size allocation is possible in Fortran and the resulting // descriptor must be allocated/associated. Since std::malloc(0) // result is implementation defined, always allocate at least one byte. - - AllocFct alloc{allocatorRegistry.GetAllocator(GetAllocIdx())}; void *p{alloc(byteSize ? byteSize : 1)}; if (!p) { return CFI_ERROR_MEM_ALLOCATION; @@ -209,7 +215,12 @@ RT_API_ATTRS int Descriptor::Deallocate() { if (!descriptor.base_addr) { return CFI_ERROR_BASE_ADDR_NULL; } else { + // Force default deallocator in device code. +#ifdef RT_DEVICE_COMPILATION + FreeFct free{allocatorRegistry.GetDeallocator(kDefaultAllocator)}; +#else FreeFct free{allocatorRegistry.GetDeallocator(GetAllocIdx())}; +#endif free(descriptor.base_addr); descriptor.base_addr = nullptr; return CFI_SUCCESS; From c5faa72b42500a97b23c10f8af2088631adde59d Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 6 Aug 2024 17:01:04 -0700 Subject: [PATCH 2/4] Put logic in a function --- flang/include/flang/Runtime/descriptor.h | 8 ++++++++ flang/runtime/descriptor.cpp | 16 ++-------------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h index 043f6931afad9..8f08dfd857e66 100644 --- a/flang/include/flang/Runtime/descriptor.h +++ b/flang/include/flang/Runtime/descriptor.h @@ -436,6 +436,14 @@ class Descriptor { RT_API_ATTRS inline int GetAllocIdx() const { return (raw_.extra & _CFI_ALLOCATOR_IDX_MASK) >> _CFI_ALLOCATOR_IDX_SHIFT; } + RT_API_ATTRS inline int GetNormalizedAllocIdx() const { +#ifdef RT_DEVICE_COMPILATION + // Force default allocator in device code. + return kDefaultAllocator; +#else + return GetAllocIdx(); +#endif + } RT_API_ATTRS inline void SetAllocIdx(int pos) { raw_.extra &= ~_CFI_ALLOCATOR_IDX_MASK; // Clear the allocator index bits. raw_.extra |= (pos << _CFI_ALLOCATOR_IDX_SHIFT); diff --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp index 6d43bacaed697..74a3f069700be 100644 --- a/flang/runtime/descriptor.cpp +++ b/flang/runtime/descriptor.cpp @@ -162,14 +162,7 @@ RT_API_ATTRS int Descriptor::Allocate() { elementBytes = raw_.elem_len = 0; } std::size_t byteSize{Elements() * elementBytes}; - - // Force default allocator in device code. -#ifdef RT_DEVICE_COMPILATION - AllocFct alloc{allocatorRegistry.GetAllocator(kDefaultAllocator)}; -#else - AllocFct alloc{allocatorRegistry.GetAllocator(GetAllocIdx())}; -#endif - + AllocFct alloc{allocatorRegistry.GetAllocator(GetNormalizedAllocIdx())}; // Zero size allocation is possible in Fortran and the resulting // descriptor must be allocated/associated. Since std::malloc(0) // result is implementation defined, always allocate at least one byte. @@ -215,12 +208,7 @@ RT_API_ATTRS int Descriptor::Deallocate() { if (!descriptor.base_addr) { return CFI_ERROR_BASE_ADDR_NULL; } else { - // Force default deallocator in device code. -#ifdef RT_DEVICE_COMPILATION - FreeFct free{allocatorRegistry.GetDeallocator(kDefaultAllocator)}; -#else - FreeFct free{allocatorRegistry.GetDeallocator(GetAllocIdx())}; -#endif + FreeFct free{allocatorRegistry.GetDeallocator(GetNormalizedAllocIdx())}; free(descriptor.base_addr); descriptor.base_addr = nullptr; return CFI_SUCCESS; From cfa14bf5dca708ef56bfd08b376218e7b7a43a88 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Thu, 8 Aug 2024 10:07:18 -0700 Subject: [PATCH 3/4] Add MapAllocIdx --- flang/include/flang/Runtime/descriptor.h | 8 -------- flang/runtime/descriptor.cpp | 13 +++++++++++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h index 8f08dfd857e66..043f6931afad9 100644 --- a/flang/include/flang/Runtime/descriptor.h +++ b/flang/include/flang/Runtime/descriptor.h @@ -436,14 +436,6 @@ class Descriptor { RT_API_ATTRS inline int GetAllocIdx() const { return (raw_.extra & _CFI_ALLOCATOR_IDX_MASK) >> _CFI_ALLOCATOR_IDX_SHIFT; } - RT_API_ATTRS inline int GetNormalizedAllocIdx() const { -#ifdef RT_DEVICE_COMPILATION - // Force default allocator in device code. - return kDefaultAllocator; -#else - return GetAllocIdx(); -#endif - } RT_API_ATTRS inline void SetAllocIdx(int pos) { raw_.extra &= ~_CFI_ALLOCATOR_IDX_MASK; // Clear the allocator index bits. raw_.extra |= (pos << _CFI_ALLOCATOR_IDX_SHIFT); diff --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp index 74a3f069700be..10919913ebaa8 100644 --- a/flang/runtime/descriptor.cpp +++ b/flang/runtime/descriptor.cpp @@ -154,6 +154,15 @@ RT_API_ATTRS std::size_t Descriptor::Elements() const { return elements; } +RT_API_ATTRS static int MapAllocIdx(const Descriptor &desc) { +#ifdef RT_DEVICE_COMPILATION + // Force default allocator in device code. + return kDefaultAllocator; +#else + return desc.GetAllocIdx(); +#endif +} + RT_API_ATTRS int Descriptor::Allocate() { std::size_t elementBytes{ElementBytes()}; if (static_cast(elementBytes) < 0) { @@ -162,7 +171,7 @@ RT_API_ATTRS int Descriptor::Allocate() { elementBytes = raw_.elem_len = 0; } std::size_t byteSize{Elements() * elementBytes}; - AllocFct alloc{allocatorRegistry.GetAllocator(GetNormalizedAllocIdx())}; + AllocFct alloc{allocatorRegistry.GetAllocator(MapAllocIdx(*this))}; // Zero size allocation is possible in Fortran and the resulting // descriptor must be allocated/associated. Since std::malloc(0) // result is implementation defined, always allocate at least one byte. @@ -208,7 +217,7 @@ RT_API_ATTRS int Descriptor::Deallocate() { if (!descriptor.base_addr) { return CFI_ERROR_BASE_ADDR_NULL; } else { - FreeFct free{allocatorRegistry.GetDeallocator(GetNormalizedAllocIdx())}; + FreeFct free{allocatorRegistry.GetDeallocator(MapAllocIdx(*this))}; free(descriptor.base_addr); descriptor.base_addr = nullptr; return CFI_SUCCESS; From 991c437c62e6c671b2e87cb7fa3eb9f4214df160 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Fri, 9 Aug 2024 08:37:49 -0700 Subject: [PATCH 4/4] Update descriptor.cpp --- flang/runtime/descriptor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp index 10919913ebaa8..32f43e89dc7a3 100644 --- a/flang/runtime/descriptor.cpp +++ b/flang/runtime/descriptor.cpp @@ -154,7 +154,7 @@ RT_API_ATTRS std::size_t Descriptor::Elements() const { return elements; } -RT_API_ATTRS static int MapAllocIdx(const Descriptor &desc) { +RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) { #ifdef RT_DEVICE_COMPILATION // Force default allocator in device code. return kDefaultAllocator;