Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[flang][cuda] Data transfer with descriptor #114598

Merged
merged 2 commits into from
Nov 1, 2024

Conversation

clementval
Copy link
Contributor

@clementval clementval commented Nov 1, 2024

Reopen PR #114302 as it was automatically closed.

Review in #114302

@llvmbot llvmbot added flang:runtime flang Flang issues not falling into any other category labels Nov 1, 2024
@llvmbot
Copy link
Member

llvmbot commented Nov 1, 2024

@llvm/pr-subscribers-flang-runtime

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Reopen PR #114302 as it was automatically closed.

Review in #114302


Full diff: https://github.com/llvm/llvm-project/pull/114598.diff

2 Files Affected:

  • (modified) flang/runtime/CUDA/memory.cpp (+32-2)
  • (modified) flang/unittests/Runtime/CUDA/Memory.cpp (+40)
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index d03f1cc0e48d66..daf1db684a3d2e 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -9,10 +9,32 @@
 #include "flang/Runtime/CUDA/memory.h"
 #include "../terminator.h"
 #include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/assign.h"
 
 #include "cuda_runtime.h"
 
 namespace Fortran::runtime::cuda {
+static void *MemmoveHostToDevice(
+    void *dst, const void *src, std::size_t count) {
+  // TODO: Use cudaMemcpyAsync when we have support for stream.
+  CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
+  return dst;
+}
+
+static void *MemmoveDeviceToHost(
+    void *dst, const void *src, std::size_t count) {
+  // TODO: Use cudaMemcpyAsync when we have support for stream.
+  CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost));
+  return dst;
+}
+
+static void *MemmoveDeviceToDevice(
+    void *dst, const void *src, std::size_t count) {
+  // TODO: Use cudaMemcpyAsync when we have support for stream.
+  CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToDevice));
+  return dst;
+}
+
 extern "C" {
 
 void *RTDEF(CUFMemAlloc)(
@@ -90,8 +112,16 @@ void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc,
 void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
     unsigned mode, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
-  terminator.Crash(
-      "not yet implemented: CUDA data transfer between two descriptors");
+  MemmoveFct memmoveFct;
+  if (mode == kHostToDevice) {
+    memmoveFct = &MemmoveHostToDevice;
+  } else if (mode == kDeviceToHost) {
+    memmoveFct = &MemmoveDeviceToHost;
+  } else if (mode == kDeviceToDevice) {
+    memmoveFct = &MemmoveDeviceToDevice;
+  }
+  Fortran::runtime::Assign(
+      *dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct);
 }
 }
 } // namespace Fortran::runtime::cuda
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 157d3cdb531def..3492b72aac0919 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -9,11 +9,17 @@
 #include "flang/Runtime/CUDA/memory.h"
 #include "gtest/gtest.h"
 #include "../../../runtime/terminator.h"
+#include "../tools.h"
 #include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/allocator.h"
 #include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/CUDA/descriptor.h"
+#include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry.h"
 
 #include "cuda_runtime.h"
 
+using namespace Fortran::runtime;
 using namespace Fortran::runtime::cuda;
 
 TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
@@ -29,3 +35,37 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
   EXPECT_EQ(42, host);
   RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
 }
+
+static OwningPtr<Descriptor> createAllocatable(
+    Fortran::common::TypeCategory tc, int kind, int rank = 1) {
+  return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
+      CFI_attribute_allocatable);
+}
+
+TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
+  using Fortran::common::TypeCategory;
+  RTNAME(CUFRegisterAllocator)();
+  // INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
+  auto dev{createAllocatable(TypeCategory::Integer, 4)};
+  dev->SetAllocIdx(kDeviceAllocatorPos);
+  EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
+  RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
+  RTNAME(AllocatableAllocate)
+  (*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+  EXPECT_TRUE(dev->IsAllocated());
+
+  // Create temp array to transfer to device.
+  auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+      std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
+  RTNAME(CUFDataTransferDescDesc)(dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
+
+  // Retrieve data from device.
+  auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+      std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
+  RTNAME(CUFDataTransferDescDesc)
+  (host.get(), dev.get(), kDeviceToHost, __FILE__, __LINE__);
+
+  for (unsigned i = 0; i < 10; ++i) {
+    EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
+  }
+}

@clementval clementval merged commit 3247386 into llvm:main Nov 1, 2024
2 of 5 checks passed
Copy link

github-actions bot commented Nov 1, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 88a0a318e80565fef9367728b878641d261acfb6 0ec816899e28aa901b850da5618619de89eaf289 --extensions cpp -- flang/runtime/CUDA/memory.cpp flang/unittests/Runtime/CUDA/Memory.cpp
View the diff from clang-format here.
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 3492b72aac..7c8b7aa5a4 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -57,7 +57,8 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
   // Create temp array to transfer to device.
   auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
       std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
-  RTNAME(CUFDataTransferDescDesc)(dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
+  RTNAME(CUFDataTransferDescDesc)
+  (dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
 
   // Retrieve data from device.
   auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},

smallp-o-p pushed a commit to smallp-o-p/llvm-project that referenced this pull request Nov 3, 2024
Reopen PR llvm#114302 as it was automatically closed. 

Review in llvm#114302
smallp-o-p pushed a commit to smallp-o-p/llvm-project that referenced this pull request Nov 3, 2024
Reopen PR llvm#114302 as it was automatically closed. 

Review in llvm#114302
NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024
Reopen PR llvm#114302 as it was automatically closed. 

Review in llvm#114302
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
flang:runtime flang Flang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants