[flang][cuda] Data transfer with descriptor #114598

clementval · 2024-11-01T19:35:12Z

Reopen PR #114302 as it was automatically closed.

Review in #114302

llvmbot · 2024-11-01T19:35:47Z

@llvm/pr-subscribers-flang-runtime

Author: Valentin Clement (バレンタインクレメン) (clementval)

Changes

Reopen PR #114302 as it was automatically closed.

Review in #114302

Full diff: https://github.com/llvm/llvm-project/pull/114598.diff

2 Files Affected:

(modified) flang/runtime/CUDA/memory.cpp (+32-2)
(modified) flang/unittests/Runtime/CUDA/Memory.cpp (+40)

diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index d03f1cc0e48d66..daf1db684a3d2e 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -9,10 +9,32 @@
 #include "flang/Runtime/CUDA/memory.h"
 #include "../terminator.h"
 #include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/assign.h"
 
 #include "cuda_runtime.h"
 
 namespace Fortran::runtime::cuda {
+static void *MemmoveHostToDevice(
+    void *dst, const void *src, std::size_t count) {
+  // TODO: Use cudaMemcpyAsync when we have support for stream.
+  CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
+  return dst;
+}
+
+static void *MemmoveDeviceToHost(
+    void *dst, const void *src, std::size_t count) {
+  // TODO: Use cudaMemcpyAsync when we have support for stream.
+  CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost));
+  return dst;
+}
+
+static void *MemmoveDeviceToDevice(
+    void *dst, const void *src, std::size_t count) {
+  // TODO: Use cudaMemcpyAsync when we have support for stream.
+  CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToDevice));
+  return dst;
+}
+
 extern "C" {
 
 void *RTDEF(CUFMemAlloc)(
@@ -90,8 +112,16 @@ void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc,
 void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
     unsigned mode, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
-  terminator.Crash(
-      "not yet implemented: CUDA data transfer between two descriptors");
+  MemmoveFct memmoveFct;
+  if (mode == kHostToDevice) {
+    memmoveFct = &MemmoveHostToDevice;
+  } else if (mode == kDeviceToHost) {
+    memmoveFct = &MemmoveDeviceToHost;
+  } else if (mode == kDeviceToDevice) {
+    memmoveFct = &MemmoveDeviceToDevice;
+  }
+  Fortran::runtime::Assign(
+      *dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct);
 }
 }
 } // namespace Fortran::runtime::cuda
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 157d3cdb531def..3492b72aac0919 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -9,11 +9,17 @@
 #include "flang/Runtime/CUDA/memory.h"
 #include "gtest/gtest.h"
 #include "../../../runtime/terminator.h"
+#include "../tools.h"
 #include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/allocator.h"
 #include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/CUDA/descriptor.h"
+#include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry.h"
 
 #include "cuda_runtime.h"
 
+using namespace Fortran::runtime;
 using namespace Fortran::runtime::cuda;
 
 TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
@@ -29,3 +35,37 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
   EXPECT_EQ(42, host);
   RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
 }
+
+static OwningPtr<Descriptor> createAllocatable(
+    Fortran::common::TypeCategory tc, int kind, int rank = 1) {
+  return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
+      CFI_attribute_allocatable);
+}
+
+TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
+  using Fortran::common::TypeCategory;
+  RTNAME(CUFRegisterAllocator)();
+  // INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
+  auto dev{createAllocatable(TypeCategory::Integer, 4)};
+  dev->SetAllocIdx(kDeviceAllocatorPos);
+  EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
+  RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
+  RTNAME(AllocatableAllocate)
+  (*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+  EXPECT_TRUE(dev->IsAllocated());
+
+  // Create temp array to transfer to device.
+  auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+      std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
+  RTNAME(CUFDataTransferDescDesc)(dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
+
+  // Retrieve data from device.
+  auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+      std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
+  RTNAME(CUFDataTransferDescDesc)
+  (host.get(), dev.get(), kDeviceToHost, __FILE__, __LINE__);
+
+  for (unsigned i = 0; i < 10; ++i) {
+    EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
+  }
+}

github-actions · 2024-11-01T19:39:14Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 88a0a318e80565fef9367728b878641d261acfb6 0ec816899e28aa901b850da5618619de89eaf289 --extensions cpp -- flang/runtime/CUDA/memory.cpp flang/unittests/Runtime/CUDA/Memory.cpp

View the diff from clang-format here.

diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 3492b72aac..7c8b7aa5a4 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -57,7 +57,8 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
   // Create temp array to transfer to device.
   auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
       std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
-  RTNAME(CUFDataTransferDescDesc)(dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
+  RTNAME(CUFDataTransferDescDesc)
+  (dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
 
   // Retrieve data from device.
   auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},

Reopen PR llvm#114302 as it was automatically closed. Review in llvm#114302

[flang][cuda] Data transfer with descriptor

677ef06

llvmbot added flang:runtime flang Flang issues not falling into any other category labels Nov 1, 2024

clang-format

0ec8168

clementval merged commit 3247386 into llvm:main Nov 1, 2024
2 of 5 checks passed

smallp-o-p pushed a commit to smallp-o-p/llvm-project that referenced this pull request Nov 3, 2024

[flang][cuda] Data transfer with descriptor (llvm#114598)

a103169

Reopen PR llvm#114302 as it was automatically closed. Review in llvm#114302

smallp-o-p pushed a commit to smallp-o-p/llvm-project that referenced this pull request Nov 3, 2024

[flang][cuda] Data transfer with descriptor (llvm#114598)

4da1f61

Reopen PR llvm#114302 as it was automatically closed. Review in llvm#114302

NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024

[flang][cuda] Data transfer with descriptor (llvm#114598)

58666fd

Reopen PR llvm#114302 as it was automatically closed. Review in llvm#114302

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[flang][cuda] Data transfer with descriptor #114598

[flang][cuda] Data transfer with descriptor #114598

clementval commented Nov 1, 2024 •

edited

Loading

llvmbot commented Nov 1, 2024

github-actions bot commented Nov 1, 2024

[flang][cuda] Data transfer with descriptor #114598

[flang][cuda] Data transfer with descriptor #114598

Conversation

clementval commented Nov 1, 2024 • edited Loading

llvmbot commented Nov 1, 2024

github-actions bot commented Nov 1, 2024

clementval commented Nov 1, 2024 •

edited

Loading