forked from BVLC/caffe
-
Notifications
You must be signed in to change notification settings - Fork 262
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from borisfom/caffe-0.14-cnmem
Caffe 0.14 cnmem
- Loading branch information
Showing
13 changed files
with
263 additions
and
313 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "cnmem"] | ||
path = cnmem | ||
url = https://github.com/NVIDIA/cnmem.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#ifndef CAFFE_CUMEM_HPP_ | ||
#define CAFFE_CUMEM_HPP_ | ||
|
||
#include "common.hpp" | ||
|
||
#ifdef USE_CNMEM | ||
// CNMEM integration | ||
#include <cnmem.h> | ||
#endif | ||
|
||
namespace caffe { | ||
|
||
class CuMem { | ||
public: | ||
#ifndef CPU_ONLY | ||
static void mallocGPU(void **ptr, size_t size, | ||
cudaStream_t stream = cudaStreamDefault); | ||
static void freeGPU(void *ptr, cudaStream_t = cudaStreamDefault); | ||
static void registerStream(cudaStream_t stream); | ||
#endif | ||
|
||
static bool usingPool() { | ||
return using_pool_; | ||
} | ||
|
||
static void getInfo(size_t *free_mem, size_t *used_mem); | ||
|
||
private: | ||
static void init(const std::vector<int>& gpus_, bool use_pool=true); | ||
static void destroy(); | ||
|
||
friend class CuMemActivator; | ||
static bool using_pool_; | ||
static bool initialized_; | ||
|
||
|
||
}; | ||
|
||
class CuMemActivator { | ||
public: | ||
explicit CuMemActivator(const std::vector<int>& gpus) | ||
: using_pool_(false) { | ||
if (gpus.size() > 0) { | ||
#ifdef USE_CNMEM | ||
using_pool_ = true; | ||
#endif | ||
CuMem::init(gpus, using_pool_); | ||
} | ||
} | ||
~CuMemActivator() { | ||
if (using_pool_) { | ||
CuMem::destroy(); | ||
} | ||
} | ||
private: | ||
int using_pool_; | ||
}; | ||
|
||
} // namespace caffe | ||
|
||
# endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
#include "caffe/common.hpp" | ||
#include "caffe/CuMem.hpp" | ||
|
||
#include <boost/thread.hpp> | ||
|
||
namespace caffe { | ||
|
||
bool CuMem::using_pool_ = false; | ||
bool CuMem::initialized_ = false; | ||
|
||
using namespace boost; | ||
|
||
#ifndef CNMEM_CHECK | ||
# define CNMEM_CHECK(x) | ||
#endif | ||
|
||
#ifndef CPU_ONLY // CPU-only Caffe. | ||
|
||
void CuMem::mallocGPU(void **ptr, size_t size, cudaStream_t stream) { | ||
CHECK(initialized_); | ||
if (using_pool_) { | ||
CNMEM_CHECK(cnmemMalloc(ptr, size, stream)); | ||
} else { | ||
CUDA_CHECK(cudaMalloc(ptr, size)); | ||
} | ||
} | ||
|
||
|
||
void CuMem::freeGPU(void *ptr, cudaStream_t stream) { | ||
CHECK(initialized_); | ||
if (using_pool_) { | ||
CNMEM_CHECK(cnmemFree(ptr, stream)); | ||
} else { | ||
CUDA_CHECK(cudaFree(ptr)); | ||
} | ||
} | ||
|
||
void CuMem::registerStream(cudaStream_t stream) { | ||
CHECK(initialized_); | ||
if (using_pool_) { | ||
CNMEM_CHECK(cnmemRegisterStream(stream)); | ||
} | ||
} | ||
|
||
void CuMem::destroy() { | ||
CHECK(initialized_); | ||
CNMEM_CHECK(cnmemFinalize()); | ||
initialized_ = false; | ||
using_pool_ = false; | ||
} | ||
|
||
void CuMem::init(const std::vector<int>& gpus, bool use_pool) { | ||
CHECK(!initialized_); | ||
#ifdef USE_CNMEM | ||
if (false /* use_pool */) { | ||
using_pool_ = true; | ||
cnmemDevice_t *devs = new cnmemDevice_t[gpus.size()]; | ||
|
||
int initial_device; | ||
CUDA_CHECK(cudaGetDevice(&initial_device)); | ||
|
||
for (int i = 0; i < gpus.size(); i++) { | ||
CUDA_CHECK(cudaSetDevice(gpus[i])); | ||
|
||
devs[i].device = gpus[i]; | ||
|
||
size_t free_mem, used_mem; | ||
CUDA_CHECK(cudaMemGetInfo(&free_mem, &used_mem)); | ||
|
||
devs[i].size = size_t(0.95*free_mem); | ||
devs[i].numStreams = 0; | ||
devs[i].streams = NULL; | ||
} | ||
CNMEM_CHECK(cnmemInit(gpus.size(), devs, CNMEM_FLAGS_DEFAULT)); | ||
initialized_ = true; | ||
|
||
CUDA_CHECK(cudaSetDevice(initial_device)); | ||
|
||
delete [] devs; | ||
} | ||
#endif | ||
initialized_ = true; | ||
std::cout << "CuMem initialized" << | ||
(using_pool_ ? " with CNMEM pool.\n" : " with CUDA allocator.\n"); | ||
} | ||
|
||
void CuMem::getInfo(size_t *free_mem, size_t *total_mem) { | ||
if (using_pool_) { | ||
CNMEM_CHECK(cnmemMemGetInfo(free_mem, total_mem, cudaStreamDefault)); | ||
} else { | ||
CUDA_CHECK(cudaMemGetInfo(free_mem, total_mem)); | ||
} | ||
} | ||
|
||
} | ||
|
||
#endif // CPU_ONLY | ||
|
Oops, something went wrong.