-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][cuda] Data transfer with descriptor #114598
Merged
Merged
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
llvmbot
added
flang:runtime
flang
Flang issues not falling into any other category
labels
Nov 1, 2024
@llvm/pr-subscribers-flang-runtime Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesReopen PR #114302 as it was automatically closed. Review in #114302 Full diff: https://github.com/llvm/llvm-project/pull/114598.diff 2 Files Affected:
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index d03f1cc0e48d66..daf1db684a3d2e 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -9,10 +9,32 @@
#include "flang/Runtime/CUDA/memory.h"
#include "../terminator.h"
#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/assign.h"
#include "cuda_runtime.h"
namespace Fortran::runtime::cuda {
+static void *MemmoveHostToDevice(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
+ return dst;
+}
+
+static void *MemmoveDeviceToHost(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost));
+ return dst;
+}
+
+static void *MemmoveDeviceToDevice(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToDevice));
+ return dst;
+}
+
extern "C" {
void *RTDEF(CUFMemAlloc)(
@@ -90,8 +112,16 @@ void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc,
void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
unsigned mode, const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
- terminator.Crash(
- "not yet implemented: CUDA data transfer between two descriptors");
+ MemmoveFct memmoveFct;
+ if (mode == kHostToDevice) {
+ memmoveFct = &MemmoveHostToDevice;
+ } else if (mode == kDeviceToHost) {
+ memmoveFct = &MemmoveDeviceToHost;
+ } else if (mode == kDeviceToDevice) {
+ memmoveFct = &MemmoveDeviceToDevice;
+ }
+ Fortran::runtime::Assign(
+ *dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct);
}
}
} // namespace Fortran::runtime::cuda
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 157d3cdb531def..3492b72aac0919 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -9,11 +9,17 @@
#include "flang/Runtime/CUDA/memory.h"
#include "gtest/gtest.h"
#include "../../../runtime/terminator.h"
+#include "../tools.h"
#include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/allocator.h"
#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/CUDA/descriptor.h"
+#include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry.h"
#include "cuda_runtime.h"
+using namespace Fortran::runtime;
using namespace Fortran::runtime::cuda;
TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
@@ -29,3 +35,37 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
EXPECT_EQ(42, host);
RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
}
+
+static OwningPtr<Descriptor> createAllocatable(
+ Fortran::common::TypeCategory tc, int kind, int rank = 1) {
+ return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
+ CFI_attribute_allocatable);
+}
+
+TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
+ using Fortran::common::TypeCategory;
+ RTNAME(CUFRegisterAllocator)();
+ // INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
+ auto dev{createAllocatable(TypeCategory::Integer, 4)};
+ dev->SetAllocIdx(kDeviceAllocatorPos);
+ EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
+ RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
+ RTNAME(AllocatableAllocate)
+ (*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_TRUE(dev->IsAllocated());
+
+ // Create temp array to transfer to device.
+ auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+ std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
+ RTNAME(CUFDataTransferDescDesc)(dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
+
+ // Retrieve data from device.
+ auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+ std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
+ RTNAME(CUFDataTransferDescDesc)
+ (host.get(), dev.get(), kDeviceToHost, __FILE__, __LINE__);
+
+ for (unsigned i = 0; i < 10; ++i) {
+ EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
+ }
+}
|
You can test this locally with the following command:git-clang-format --diff 88a0a318e80565fef9367728b878641d261acfb6 0ec816899e28aa901b850da5618619de89eaf289 --extensions cpp -- flang/runtime/CUDA/memory.cpp flang/unittests/Runtime/CUDA/Memory.cpp View the diff from clang-format here.diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 3492b72aac..7c8b7aa5a4 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -57,7 +57,8 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
// Create temp array to transfer to device.
auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
- RTNAME(CUFDataTransferDescDesc)(dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
+ RTNAME(CUFDataTransferDescDesc)
+ (dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
// Retrieve data from device.
auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
|
smallp-o-p
pushed a commit
to smallp-o-p/llvm-project
that referenced
this pull request
Nov 3, 2024
Reopen PR llvm#114302 as it was automatically closed. Review in llvm#114302
smallp-o-p
pushed a commit
to smallp-o-p/llvm-project
that referenced
this pull request
Nov 3, 2024
Reopen PR llvm#114302 as it was automatically closed. Review in llvm#114302
NoumanAmir657
pushed a commit
to NoumanAmir657/llvm-project
that referenced
this pull request
Nov 4, 2024
Reopen PR llvm#114302 as it was automatically closed. Review in llvm#114302
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Reopen PR #114302 as it was automatically closed.
Review in #114302