Skip to content

Commit

Permalink
[CustomDevice] add stream safe allocator support (PaddlePaddle#55393)
Browse files Browse the repository at this point in the history
  • Loading branch information
ronny1996 authored and cqulilujia committed Jul 24, 2023
1 parent ecea9d4 commit 7022e06
Show file tree
Hide file tree
Showing 6 changed files with 460 additions and 6 deletions.
3 changes: 2 additions & 1 deletion paddle/fluid/memory/allocation/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ if(UNIX AND NOT APPLE)
endif()

if(WITH_CUSTOM_DEVICE)
list(APPEND ALLOCATOR_SRCS custom_allocator.cc)
list(APPEND ALLOCATOR_SRCS custom_allocator.cc
stream_safe_custom_device_allocator.cc)
endif()

if(WITH_XPU)
Expand Down
127 changes: 123 additions & 4 deletions paddle/fluid/memory/allocation/allocator_facade.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
#include "paddle/fluid/memory/allocation/custom_allocator.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.h"
#endif

#include "paddle/fluid/platform/flags.h"

PADDLE_DEFINE_EXPORTED_int64(
Expand Down Expand Up @@ -174,6 +174,11 @@ class AllocatorFacadePrivate {
std::map<platform::XPUPlace,
std::map<XPUStream, std::shared_ptr<Allocator>>>;
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
using CustomDeviceAllocatorMap =
std::map<platform::CustomPlace,
std::map<phi::stream::stream_t, std::shared_ptr<Allocator>>>;
#endif

explicit AllocatorFacadePrivate(bool allow_free_idle_chunk = true) {
strategy_ = GetAllocatorStrategy();
Expand Down Expand Up @@ -564,6 +569,46 @@ class AllocatorFacadePrivate {
}
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
bool HasCustomDevice(const platform::CustomPlace& place,
phi::stream::stream_t stream) {
auto it = custom_device_allocators_.find(place);
if (it == custom_device_allocators_.end()) {
return false;
}
auto& allocator_map = it->second;
return allocator_map.find(stream) != allocator_map.end();
}

const std::shared_ptr<Allocator>& GetAllocator(
const platform::CustomPlace& place,
phi::stream::stream_t stream,
bool create_if_not_found = false) {
/* shared_lock_guard */ {
std::shared_lock<std::shared_timed_mutex> lock_guard(
custom_device_allocator_mutex_);
if (LIKELY(HasCustomDevice(place, stream))) {
return custom_device_allocators_[place][stream];
} else {
PADDLE_ENFORCE_NE(create_if_not_found,
false,
platform::errors::NotFound(
"No allocator found for stream %s in place %s "
"with create_if_not_found = false",
stream,
place));
}
}

/* unique_lock_guard */ {
std::unique_lock<std::shared_timed_mutex> lock_guard(
custom_device_allocator_mutex_);
InitStreamSafeCustomDeviceAllocator(place, stream);
return custom_device_allocators_[place][stream];
}
}
#endif

private:
class ZeroSizeAllocator : public Allocator {
public:
Expand Down Expand Up @@ -1008,9 +1053,17 @@ class AllocatorFacadePrivate {
allocators_[p] = std::make_shared<NaiveBestFitAllocator>(p);
}

void InitNaiveBestFitCustomDeviceAllocator(platform::CustomPlace p,
phi::stream::stream_t stream) {
custom_device_allocators_[p][stream] =
std::make_shared<NaiveBestFitAllocator>(p);
}

void InitAutoGrowthCustomDeviceAllocator(platform::CustomPlace p,
bool allow_free_idle_chunk) {
auto chunk_size = FLAGS_auto_growth_chunk_size_in_mb << 20;
VLOG(4) << "FLAGS_auto_growth_chunk_size_in_mb is "
<< FLAGS_auto_growth_chunk_size_in_mb;
auto custom_allocator =
std::make_shared<paddle::memory::allocation::CustomAllocator>(p);
allocators_[p] = std::make_shared<AutoGrowthBestFitAllocator>(
Expand All @@ -1019,6 +1072,40 @@ class AllocatorFacadePrivate {
/*chunk_size=*/chunk_size,
allow_free_idle_chunk);
}

void InitAutoGrowthCustomDeviceAllocator(platform::CustomPlace p,
phi::stream::stream_t stream) {
auto chunk_size = FLAGS_auto_growth_chunk_size_in_mb << 20;
VLOG(4) << "FLAGS_auto_growth_chunk_size_in_mb is "
<< FLAGS_auto_growth_chunk_size_in_mb;

auto custom_allocator =
std::make_shared<paddle::memory::allocation::CustomAllocator>(p);
auto alignment = phi::DeviceManager::GetMinChunkSize(p);
custom_device_allocators_[p][stream] =
std::make_shared<AutoGrowthBestFitAllocator>(
custom_allocator, alignment, chunk_size, allow_free_idle_chunk_);
}

void WrapStreamSafeCustomDeviceAllocator(platform::CustomPlace p,
phi::stream::stream_t stream) {
std::shared_ptr<Allocator>& allocator =
custom_device_allocators_[p][stream];
allocator =
std::make_shared<StreamSafeCustomDeviceAllocator>(allocator, p, stream);
}

void InitStreamSafeCustomDeviceAllocator(platform::CustomPlace p,
phi::stream::stream_t stream) {
VLOG(8) << "Init CustomDevice allocator for stream " << stream
<< " in place " << p;
if (strategy_ == AllocatorStrategy::kAutoGrowth) {
InitAutoGrowthCustomDeviceAllocator(p, stream);
} else {
InitNaiveBestFitCustomDeviceAllocator(p, stream);
}
WrapStreamSafeCustomDeviceAllocator(p, stream);
}
#endif

void InitSystemAllocators() {
Expand Down Expand Up @@ -1161,6 +1248,15 @@ class AllocatorFacadePrivate {
std::shared_timed_mutex xpu_allocator_mutex_;
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
// a standalone custom device allocator to support multi-stream GC in new
// executor
std::map<platform::Place, std::shared_ptr<StreamSafeCustomDeviceAllocator>>
default_stream_safe_custom_device_allocators_;
CustomDeviceAllocatorMap custom_device_allocators_;
std::shared_timed_mutex custom_device_allocator_mutex_;
#endif

AllocatorStrategy strategy_;
AllocatorMap allocators_;
static AllocatorMap zero_size_allocators_;
Expand Down Expand Up @@ -1252,6 +1348,16 @@ std::shared_ptr<phi::Allocation> AllocatorFacade::AllocShared(
AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
size_t size,
const phi::Stream& stream) {
#ifdef PADDLE_WITH_CUSTOM_DEVICE
if (platform::is_custom_place(place)) {
platform::CustomPlace p(place);
phi::stream::stream_t s =
reinterpret_cast<phi::stream::stream_t>(stream.id());
return GetPrivate()
->GetAllocator(p, s, /* create_if_not_found = */ true)
->Allocate(size);
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
AllocatorFacadePrivate* m = GetPrivate();
if (!m->IsStreamSafeCUDAAllocatorUsed()) {
Expand All @@ -1270,8 +1376,8 @@ AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
#elif defined(PADDLE_WITH_XPU)
return GetAllocator(place)->Allocate(size);
#else
PADDLE_THROW(
platform::errors::PreconditionNotMet("Not compiled with GPU or XPU."));
PADDLE_THROW(platform::errors::PreconditionNotMet(
"Not compiled with GPU or XPU or CustomDevice."));
#endif
}

Expand Down Expand Up @@ -1376,6 +1482,19 @@ void AllocatorFacade::RemoveMemoryPoolOfCUDAGraph(int64_t id) {
#endif
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator(
const platform::Place& place, phi::stream::stream_t stream) {
AllocatorFacadePrivate* m = GetPrivate();
if (!FLAGS_use_stream_safe_cuda_allocator) {
return m->GetAllocator(place,
stream,
/*create_if_not_found=*/true);
}
return m->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1);
}
#endif

UNUSED static std::shared_ptr<NaiveBestFitAllocator> unused_obj =
std::make_shared<NaiveBestFitAllocator>(platform::CPUPlace());

Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/memory/allocation/allocator_facade.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/stream.h"

#ifdef PADDLE_WITH_CUSTOM_DEVICE
#include "paddle/fluid/memory/allocation/custom_allocator.h"
#include "paddle/phi/backends/device_manager.h"
#endif

namespace paddle {
namespace memory {
namespace allocation {
Expand Down Expand Up @@ -91,6 +96,10 @@ class AllocatorFacade {
void RemoveMemoryPoolOfCUDAGraph(int64_t id);
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
const std::shared_ptr<Allocator>& GetAllocator(const platform::Place& place,
phi::stream::stream_t stream);
#endif
// TODO(yy): Allocate a Copy-On-Write allocation?
private:
AllocatorFacade();
Expand Down
Loading

0 comments on commit 7022e06

Please sign in to comment.