Skip to content

Commit

Permalink
use spin lock in auto growth allocator (#34910)
Browse files Browse the repository at this point in the history
* use spin lock in auto growth allocator, test=develop

* use pthread spin lock, test=develop

* use lock guard, test=develop

* use malloc spin lock, test=develop

* use lock_guard, test=develop
  • Loading branch information
wanghuancoder authored Aug 20, 2021
1 parent 4416c79 commit 6bacfb0
Show file tree
Hide file tree
Showing 4 changed files with 280 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator(
Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t size) {
size = AlignedSize(size, alignment_);

std::lock_guard<std::mutex> guard(mtx_);
std::lock_guard<SpinLock> guard(spinlock_);
auto iter = free_blocks_.lower_bound(std::make_pair(size, nullptr));
BlockIt block_it;
if (iter != free_blocks_.end()) {
Expand Down Expand Up @@ -94,11 +94,12 @@ Allocation *AutoGrowthBestFitAllocator::AllocateImpl(size_t size) {
VLOG(2) << "Not found and reallocate " << realloc_size << ", and remaining "
<< remaining_size;
}

return new BlockAllocation(block_it);
}

void AutoGrowthBestFitAllocator::FreeImpl(Allocation *allocation) {
std::lock_guard<std::mutex> guard(mtx_);
std::lock_guard<SpinLock> guard(spinlock_);
auto block_it = static_cast<BlockAllocation *>(allocation)->block_it_;
auto &blocks = block_it->chunk_->blocks_;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <utility>

#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/memory/allocation/spin_lock.h"

namespace paddle {
namespace memory {
Expand Down Expand Up @@ -86,7 +87,7 @@ class AutoGrowthBestFitAllocator : public Allocator {
size_t alignment_;
size_t chunk_size_;

mutable std::mutex mtx_;
SpinLock spinlock_;
};

} // namespace allocation
Expand Down
36 changes: 36 additions & 0 deletions paddle/fluid/memory/allocation/spin_lock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/fluid/memory/allocation/spin_lock_c.h"

namespace paddle {
namespace memory {

class SpinLock {
public:
SpinLock() { INITIAL_LOCK(&mlock_); }

void lock() { ACQUIRE_LOCK(&mlock_); }

void unlock() { RELEASE_LOCK(&mlock_); }
DISABLE_COPY_AND_ASSIGN(SpinLock);

private:
MLOCK_T mlock_;
};

} // namespace memory
} // namespace paddle
239 changes: 239 additions & 0 deletions paddle/fluid/memory/allocation/spin_lock_c.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/*
the spinlock implementation is borrowed from Doug Lea's malloc, released to the
public domain, as explained at
http://creativecommons.org/licenses/publicdomain. Send questions,
comments, complaints, performance data, etc to dl@cs.oswego.edu
*/

#pragma once

#ifndef WIN32
#include <pthread.h>
#if defined(__SVR4) && defined(__sun) /* solaris */
#include <thread.h>
#endif /* solaris */
#else
#ifndef _M_AMD64
/* These are already defined on AMD64 builds */
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
LONG __cdecl _InterlockedCompareExchange(LONG volatile* Dest, LONG Exchange,
LONG Comp);
LONG __cdecl _InterlockedExchange(LONG volatile* Target, LONG Value);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* _M_AMD64 */
#pragma intrinsic(_InterlockedCompareExchange)
#pragma intrinsic(_InterlockedExchange)
#define interlockedcompareexchange _InterlockedCompareExchange
#define interlockedexchange _InterlockedExchange
#endif /* Win32 */

#ifndef FORCEINLINE
#if defined(__GNUC__)
#define FORCEINLINE __inline __attribute__((always_inline))
#elif defined(_MSC_VER)
#define FORCEINLINE __forceinline
#endif
#endif
#ifndef NOINLINE
#if defined(__GNUC__)
#define NOINLINE __attribute__((noinline))
#elif defined(_MSC_VER)
#define NOINLINE __declspec(noinline)
#else
#define NOINLINE
#endif
#endif

#ifdef __cplusplus
extern "C" {
#ifndef FORCEINLINE
#define FORCEINLINE inline
#endif
#endif /* __cplusplus */
#ifndef FORCEINLINE
#define FORCEINLINE
#endif
#ifdef __cplusplus
}; /* end of extern "C" */
#endif /* __cplusplus */

#ifndef WIN32

/* Custom pthread-style spin locks on x86 and x64 for gcc */
struct pthread_mlock_t {
volatile unsigned int l;
unsigned int c;
pthread_t threadid;
};
#define MLOCK_T struct pthread_mlock_t
#define CURRENT_THREAD pthread_self()
#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
#define ACQUIRE_LOCK(sl) pthread_acquire_lock(sl)
#define RELEASE_LOCK(sl) pthread_release_lock(sl)
#define TRY_LOCK(sl) pthread_try_lock(sl)
#define SPINS_PER_YIELD 63

// static MLOCK_T malloc_global_mutex = { 0, 0, 0};

static FORCEINLINE int pthread_acquire_lock(MLOCK_T* sl) {
int spins = 0;
volatile unsigned int* lp = &sl->l;
for (;;) {
if (*lp != 0) {
if (sl->threadid == CURRENT_THREAD) {
++sl->c;
return 0;
}
} else {
/* place args to cmpxchgl in locals to evade oddities in some gccs */
int cmp = 0;
int val = 1;
int ret;
__asm__ __volatile__("lock; cmpxchgl %1, %2"
: "=a"(ret)
: "r"(val), "m"(*(lp)), "0"(cmp)
: "memory", "cc");
if (!ret) {
assert(!sl->threadid);
sl->threadid = CURRENT_THREAD;
sl->c = 1;
return 0;
}
}
if ((++spins & SPINS_PER_YIELD) == 0) {
#if defined(__SVR4) && defined(__sun) /* solaris */
thr_yield();
#else
#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
sched_yield();
#else /* no-op yield on unknown systems */
; // NOLINT
#endif /* __linux__ || __FreeBSD__ || __APPLE__ */
#endif /* solaris */
}
}
}

static FORCEINLINE void pthread_release_lock(MLOCK_T* sl) {
volatile unsigned int* lp = &sl->l;
assert(*lp != 0);
assert(sl->threadid == CURRENT_THREAD);
if (--sl->c == 0) {
sl->threadid = 0;
int prev = 0;
int ret;
__asm__ __volatile__("lock; xchgl %0, %1"
: "=r"(ret)
: "m"(*(lp)), "0"(prev)
: "memory");
}
}

static FORCEINLINE int pthread_try_lock(MLOCK_T* sl) {
volatile unsigned int* lp = &sl->l;
if (*lp != 0) {
if (sl->threadid == CURRENT_THREAD) {
++sl->c;
return 1;
}
} else {
int cmp = 0;
int val = 1;
int ret;
__asm__ __volatile__("lock; cmpxchgl %1, %2"
: "=a"(ret)
: "r"(val), "m"(*(lp)), "0"(cmp)
: "memory", "cc");
if (!ret) {
assert(!sl->threadid);
sl->threadid = CURRENT_THREAD;
sl->c = 1;
return 1;
}
}
return 0;
}

#else /* WIN32 */
/* Custom win32-style spin locks on x86 and x64 for MSC */
struct win32_mlock_t {
volatile long l; // NOLINT
unsigned int c;
long threadid; // NOLINT
};

#define MLOCK_T struct win32_mlock_t
#define CURRENT_THREAD GetCurrentThreadId()
#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
#define ACQUIRE_LOCK(sl) win32_acquire_lock(sl)
#define RELEASE_LOCK(sl) win32_release_lock(sl)
#define TRY_LOCK(sl) win32_try_lock(sl)
#define SPINS_PER_YIELD 63

// static MLOCK_T malloc_global_mutex = { 0, 0, 0};

static FORCEINLINE int win32_acquire_lock(MLOCK_T *sl) {
int spins = 0;
for (;;) {
if (sl->l != 0) {
if (sl->threadid == CURRENT_THREAD) {
++sl->c;
return 0;
}
} else {
if (!interlockedexchange(&sl->l, 1)) {
assert(!sl->threadid);
sl->threadid = CURRENT_THREAD;
sl->c = 1;
return 0;
}
}
if ((++spins & SPINS_PER_YIELD) == 0) SleepEx(0, FALSE);
}
}

static FORCEINLINE void win32_release_lock(MLOCK_T *sl) {
assert(sl->threadid == CURRENT_THREAD);
assert(sl->l != 0);
if (--sl->c == 0) {
sl->threadid = 0;
interlockedexchange(&sl->l, 0);
}
}

static FORCEINLINE int win32_try_lock(MLOCK_T *sl) {
if (sl->l != 0) {
if (sl->threadid == CURRENT_THREAD) {
++sl->c;
return 1;
}
} else {
if (!interlockedexchange(&sl->l, 1)) {
assert(!sl->threadid);
sl->threadid = CURRENT_THREAD;
sl->c = 1;
return 1;
}
}
return 0;
}

#endif /* WIN32 */

0 comments on commit 6bacfb0

Please sign in to comment.