Skip to content

Commit

Permalink
[flang][cuda] Data transfer with descriptor (#114598)
Browse files Browse the repository at this point in the history
Reopen PR #114302 as it was automatically closed. 

Review in #114302
  • Loading branch information
clementval authored Nov 1, 2024
1 parent 1a18767 commit 3247386
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 2 deletions.
34 changes: 32 additions & 2 deletions flang/runtime/CUDA/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,32 @@
#include "flang/Runtime/CUDA/memory.h"
#include "../terminator.h"
#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/assign.h"

#include "cuda_runtime.h"

namespace Fortran::runtime::cuda {
static void *MemmoveHostToDevice(
void *dst, const void *src, std::size_t count) {
// TODO: Use cudaMemcpyAsync when we have support for stream.
CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
return dst;
}

static void *MemmoveDeviceToHost(
void *dst, const void *src, std::size_t count) {
// TODO: Use cudaMemcpyAsync when we have support for stream.
CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost));
return dst;
}

static void *MemmoveDeviceToDevice(
void *dst, const void *src, std::size_t count) {
// TODO: Use cudaMemcpyAsync when we have support for stream.
CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToDevice));
return dst;
}

extern "C" {

void *RTDEF(CUFMemAlloc)(
Expand Down Expand Up @@ -90,8 +112,16 @@ void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc,
void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
unsigned mode, const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
terminator.Crash(
"not yet implemented: CUDA data transfer between two descriptors");
MemmoveFct memmoveFct;
if (mode == kHostToDevice) {
memmoveFct = &MemmoveHostToDevice;
} else if (mode == kDeviceToHost) {
memmoveFct = &MemmoveDeviceToHost;
} else if (mode == kDeviceToDevice) {
memmoveFct = &MemmoveDeviceToDevice;
}
Fortran::runtime::Assign(
*dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct);
}
}
} // namespace Fortran::runtime::cuda
40 changes: 40 additions & 0 deletions flang/unittests/Runtime/CUDA/Memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,17 @@
#include "flang/Runtime/CUDA/memory.h"
#include "gtest/gtest.h"
#include "../../../runtime/terminator.h"
#include "../tools.h"
#include "flang/Common/Fortran.h"
#include "flang/Runtime/CUDA/allocator.h"
#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/CUDA/descriptor.h"
#include "flang/Runtime/allocatable.h"
#include "flang/Runtime/allocator-registry.h"

#include "cuda_runtime.h"

using namespace Fortran::runtime;
using namespace Fortran::runtime::cuda;

TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
Expand All @@ -29,3 +35,37 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
EXPECT_EQ(42, host);
RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
}

static OwningPtr<Descriptor> createAllocatable(
Fortran::common::TypeCategory tc, int kind, int rank = 1) {
return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
CFI_attribute_allocatable);
}

TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
using Fortran::common::TypeCategory;
RTNAME(CUFRegisterAllocator)();
// INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
auto dev{createAllocatable(TypeCategory::Integer, 4)};
dev->SetAllocIdx(kDeviceAllocatorPos);
EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
RTNAME(AllocatableAllocate)
(*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
EXPECT_TRUE(dev->IsAllocated());

// Create temp array to transfer to device.
auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
RTNAME(CUFDataTransferDescDesc)(dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);

// Retrieve data from device.
auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
RTNAME(CUFDataTransferDescDesc)
(host.get(), dev.get(), kDeviceToHost, __FILE__, __LINE__);

for (unsigned i = 0; i < 10; ++i) {
EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
}
}

0 comments on commit 3247386

Please sign in to comment.