Skip to content

Commit

Permalink
Merge pull request #1 from borisfom/caffe-0.14-cnmem
Browse files Browse the repository at this point in the history
Caffe 0.14 cnmem
  • Loading branch information
borisfom committed Oct 9, 2015
2 parents 848bfda + 36eaec8 commit 636998d
Show file tree
Hide file tree
Showing 13 changed files with 263 additions and 313 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "cnmem"]
path = cnmem
url = https://github.com/NVIDIA/cnmem.git
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -301,12 +301,16 @@ endif
# cuDNN acceleration configuration.
ifeq ($(USE_CUDNN), 1)
LIBRARIES += cudnn
INCLUDE_DIRS += ${CUDNN_DIR}/include
LIBRARY_DIRS += ${CUDNN_DIR}/install/cuda/lib64
COMMON_FLAGS += -DUSE_CUDNN
endif

# cuMEM integration
ifeq ($(USE_CNMEM), 1)
LIBRARIES += cnmem
LIBRARY_DIRS += ${CNMEM_DIR}/build
INCLUDE_DIRS += ${CNMEM_DIR}/include
COMMON_FLAGS += -DUSE_CNMEM
endif

Expand Down
1 change: 1 addition & 0 deletions cnmem
Submodule cnmem added at e817a7
61 changes: 61 additions & 0 deletions include/caffe/CuMem.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#ifndef CAFFE_CUMEM_HPP_
#define CAFFE_CUMEM_HPP_

#include "common.hpp"

#ifdef USE_CNMEM
// CNMEM integration
#include <cnmem.h>
#endif

namespace caffe {

class CuMem {
public:
#ifndef CPU_ONLY
static void mallocGPU(void **ptr, size_t size,
cudaStream_t stream = cudaStreamDefault);
static void freeGPU(void *ptr, cudaStream_t = cudaStreamDefault);
static void registerStream(cudaStream_t stream);
#endif

static bool usingPool() {
return using_pool_;
}

static void getInfo(size_t *free_mem, size_t *used_mem);

private:
static void init(const std::vector<int>& gpus_, bool use_pool=true);
static void destroy();

friend class CuMemActivator;
static bool using_pool_;
static bool initialized_;


};

class CuMemActivator {
public:
explicit CuMemActivator(const std::vector<int>& gpus)
: using_pool_(false) {
if (gpus.size() > 0) {
#ifdef USE_CNMEM
using_pool_ = true;
#endif
CuMem::init(gpus, using_pool_);
}
}
~CuMemActivator() {
if (using_pool_) {
CuMem::destroy();
}
}
private:
int using_pool_;
};

} // namespace caffe

# endif
73 changes: 7 additions & 66 deletions include/caffe/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@

#include "caffe/util/device_alternate.hpp"

#ifdef USE_CNMEM
// cuMEM integration
#include <cnmem.h>
#endif

// gflags 2.1 issue: namespace google was changed to gflags without warning.
// Luckily we will be able to use GFLAGS_GFLAGS_H_ to detect if it is version
// 2.1. If yes, we will add a temporary solution to redirect the namespace.
Expand Down Expand Up @@ -70,6 +65,12 @@ private:\
// is executed we will see a fatal log.
#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"

#include "CuMem.hpp"

// bfomitchev: temporary, for better merge
#define MemoryHandler CuMem
#define MemoryHandlerActivator CuMemActivator

// See PR #1236
namespace cv { class Mat; }

Expand Down Expand Up @@ -184,67 +185,7 @@ class Caffe {
DISABLE_COPY_AND_ASSIGN(Caffe);
};

class MemoryHandler {
public:
static MemoryHandler& Get();
#ifndef CPU_ONLY
static void mallocGPU(void **ptr, size_t size,
cudaStream_t stream = cudaStreamDefault);
static void freeGPU(void *ptr, cudaStream_t = cudaStreamDefault);
static void registerStream(cudaStream_t stream);
#endif
static void setGPUs(const std::vector<int>& gpus) { Get().gpus_ = gpus; }
static void usePool() { Get().using_pool_ = true; }
static bool usingPool() {
#ifdef USE_CNMEM
return Get().using_pool_;
#else
return false;
#endif
}
static void getInfo(size_t *free_mem, size_t *used_mem);
static void destroy();
~MemoryHandler() { }

private:
MemoryHandler() : using_pool_(false), initialized_(false) {}
static void Init();
// static void Destroy();
#ifndef CPU_ONLY
void allocate_memory(void **ptr, size_t size, cudaStream_t stream);
void free_memory(void *ptr, cudaStream_t stream);
#endif
DISABLE_COPY_AND_ASSIGN(MemoryHandler);

bool using_pool_;
bool initialized_;
std::vector<int> gpus_;
};

class MemoryHandlerActivator {
public:
explicit MemoryHandlerActivator(const std::vector<int>& gpus)
: using_pool_(false) {
if (gpus.size() > 0) {
using_pool_ = true;
MemoryHandler::usePool();
MemoryHandler::setGPUs(gpus);
#ifndef CPU_ONLY
void* temp;
MemoryHandler::mallocGPU(&temp, 4);
MemoryHandler::freeGPU(temp);
#endif
}
}
~MemoryHandlerActivator() {
if (using_pool_) {
MemoryHandler::destroy();
}
}
private:
int using_pool_;
};

} // namespace caffe

#endif // CAFFE_COMMON_HPP_

3 changes: 2 additions & 1 deletion include/caffe/util/device_alternate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
CHECK_EQ(status, CNMEM_STATUS_SUCCESS) << " " \
<< cnmemGetErrorString(status); \
} while (0)

#else
#define CNMEM_CHECK(condition)
#endif

// CUDA: grid stride looping
Expand Down
98 changes: 98 additions & 0 deletions src/caffe/CuMem.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#include "caffe/common.hpp"
#include "caffe/CuMem.hpp"

#include <boost/thread.hpp>

namespace caffe {

bool CuMem::using_pool_ = false;
bool CuMem::initialized_ = false;

using namespace boost;

#ifndef CNMEM_CHECK
# define CNMEM_CHECK(x)
#endif

#ifndef CPU_ONLY // CPU-only Caffe.

void CuMem::mallocGPU(void **ptr, size_t size, cudaStream_t stream) {
CHECK(initialized_);
if (using_pool_) {
CNMEM_CHECK(cnmemMalloc(ptr, size, stream));
} else {
CUDA_CHECK(cudaMalloc(ptr, size));
}
}


void CuMem::freeGPU(void *ptr, cudaStream_t stream) {
CHECK(initialized_);
if (using_pool_) {
CNMEM_CHECK(cnmemFree(ptr, stream));
} else {
CUDA_CHECK(cudaFree(ptr));
}
}

void CuMem::registerStream(cudaStream_t stream) {
CHECK(initialized_);
if (using_pool_) {
CNMEM_CHECK(cnmemRegisterStream(stream));
}
}

void CuMem::destroy() {
CHECK(initialized_);
CNMEM_CHECK(cnmemFinalize());
initialized_ = false;
using_pool_ = false;
}

void CuMem::init(const std::vector<int>& gpus, bool use_pool) {
CHECK(!initialized_);
#ifdef USE_CNMEM
if (false /* use_pool */) {
using_pool_ = true;
cnmemDevice_t *devs = new cnmemDevice_t[gpus.size()];

int initial_device;
CUDA_CHECK(cudaGetDevice(&initial_device));

for (int i = 0; i < gpus.size(); i++) {
CUDA_CHECK(cudaSetDevice(gpus[i]));

devs[i].device = gpus[i];

size_t free_mem, used_mem;
CUDA_CHECK(cudaMemGetInfo(&free_mem, &used_mem));

devs[i].size = size_t(0.95*free_mem);
devs[i].numStreams = 0;
devs[i].streams = NULL;
}
CNMEM_CHECK(cnmemInit(gpus.size(), devs, CNMEM_FLAGS_DEFAULT));
initialized_ = true;

CUDA_CHECK(cudaSetDevice(initial_device));

delete [] devs;
}
#endif
initialized_ = true;
std::cout << "CuMem initialized" <<
(using_pool_ ? " with CNMEM pool.\n" : " with CUDA allocator.\n");
}

void CuMem::getInfo(size_t *free_mem, size_t *total_mem) {
if (using_pool_) {
CNMEM_CHECK(cnmemMemGetInfo(free_mem, total_mem, cudaStreamDefault));
} else {
CUDA_CHECK(cudaMemGetInfo(free_mem, total_mem));
}
}

}

#endif // CPU_ONLY

Loading

0 comments on commit 636998d

Please sign in to comment.