Skip to content

Commit

Permalink
more build updates:
Browse files Browse the repository at this point in the history
(1) nccl submodule, cnmem submodule
(2) mpi ops fallback test
(3) a bit more blob interface
(4) fixed tests
(5) caffe2.python.io -> caffe2.python.dataio to avoid name conflicts
(6) In the build system autogen __init__.py instead of having manual
rules just to copy over an empty __init__.py.
  • Loading branch information
Yangqing committed Aug 3, 2016
1 parent b2c2d0b commit 1ede7a7
Show file tree
Hide file tree
Showing 29 changed files with 156 additions and 1,629 deletions.
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
[submodule "third_party/pybind11"]
path = third_party/pybind11
url = https://github.com/pybind/pybind11.git
[submodule "third_party/nccl"]
path = third_party/nccl
url = https://github.com/nvidia/nccl.git
[submodule "third_party/cnmem"]
path = third_party/cnmem
url = https://github.com/nvidia/cnmem.git
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ lint:
@find caffe2 -type f -exec python brewtool/cpplint.py {} \;

linecount:
@cloc --read-lang-def=brewtool/caffe.cloc caffe2 pycaffe2 || \
@cloc --read-lang-def=brewtool/caffe.cloc caffe2 || \
echo "Cloc is not available on the machine. You can install cloc with " && \
echo " sudo apt-get install cloc"
1 change: 1 addition & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class Config(object):
'arch=compute_30,code=sm_30',
'arch=compute_35,code=sm_35',
'arch=compute_50,code=sm_50',
'arch=compute_61,code=sm_61',
]
# additional CUDA cflags to pass to nvcc.
CUDA_CFLAGS = []
Expand Down
4 changes: 0 additions & 4 deletions caffe/BREW

This file was deleted.

Empty file removed caffe/__init__.py
Empty file.
8 changes: 0 additions & 8 deletions caffe/proto/BREW
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,3 @@ proto_library(
name = 'caffe_proto',
srcs = ['caffe.proto'],
)

filegroup(
name = "caffe_proto_py",
srcs = ["__init__.py"],
deps = [
"//caffe:caffe_python",
]
)
Empty file removed caffe/proto/__init__.py
Empty file.
9 changes: 3 additions & 6 deletions caffe2/BREW
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ cc_library(
deps = [
":core",
":core_gpu_cu",
"//third_party/cnmem:cnmem",
"//third_party:cnmem",
"//third_party:cuda",
],
whole_archive = True,
Expand All @@ -48,6 +48,7 @@ cc_test(
excludes=["*gpu_test*"]),
deps = [
":core",
"//caffe2/operators:core_ops",
"//third_party:gtest",
"//caffe2/test:caffe2_gtest_main",
],
Expand All @@ -63,11 +64,6 @@ cc_test(
],
)

filegroup(
name = "caffe2_python",
srcs = ["__init__.py"],
)

cc_library(
name = "all_available_ops",
srcs = [],
Expand All @@ -79,6 +75,7 @@ cc_library(
optional_deps = [
"//caffe2/operators:core_ops_gpu",
"//caffe2/operators:core_ops_cudnn",
"//caffe2/contrib/nccl:nccl_ops",
"//caffe2/cuda_rtc:rtc_ops",
"//caffe2/db:db_gpu",
"//caffe2/image:image_ops",
Expand Down
5 changes: 0 additions & 5 deletions caffe2/__init__.py

This file was deleted.

10 changes: 10 additions & 0 deletions caffe2/contrib/nccl/BREW
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
cc_library(
name = "nccl_ops",
srcs = Glob(["*.cc"]),
hdrs = Glob(["*.h"]),
deps = [
"//caffe2:core_gpu",
"//third_party:nccl",
],
whole_archive = True,
)
45 changes: 37 additions & 8 deletions caffe2/core/blob.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ class Blob {
return *static_cast<const T*>(pointer_);
}

const void* GetRaw() const { return pointer_; }
void* GetRaw() { return pointer_; }

/**
* @brief Gets a mutable pointer to the stored object.
*
Expand All @@ -73,6 +76,7 @@ class Blob {
return static_cast<T*>(pointer_);
} else {
if (is_new_object) *is_new_object = true;
VLOG(1) << "Create new mutable object " << TypeMeta::Name<T>();
return Reset<T>(new T());
}
}
Expand All @@ -87,28 +91,53 @@ class Blob {
*/
template <class T>
T* Reset(T* allocated) {
if (pointer_) {
CHECK_NOTNULL(destroy_)(pointer_);
if (pointer_ && destroy_) {
destroy_(pointer_);
}
VLOG(1) << "Create new mutable object " << TypeMeta::Name<T>();
meta_ = TypeMeta::Make<T>();
pointer_ = static_cast<void*>(allocated);
destroy_ = &Destroy<T>;
return allocated;
}

/**
* Sets the underlying object to the allocated one, but does not take over
* the ownership of the passed in pointer. If there is already an object in
* the Blob, the old object is freed.
*
* Unlike Reset, this does not take over the ownership of the pointer and the
* caller is responsible for making sure that the lifetime of the allocated
* blob outlasts the lifetime of any access to this blob, until another Reset
* call is made or the blob is destructed.
*/
template <class T>
typename std::remove_const<T>::type* ShareExternal(
typename std::remove_const<T>::type* allocated) {
return static_cast<T*>(
ShareExternal(static_cast<void*>(allocated),
TypeMeta::Make<typename std::remove_const<T>::type>()));
}

void* ShareExternal(void* allocated, const TypeMeta& meta) {
if (pointer_ && destroy_) {
destroy_(pointer_);
}
meta_ = meta;
pointer_ = static_cast<void*>(allocated);
destroy_ = nullptr;
return allocated;
}

/**
* Resets the Blob to an empty one.
*/
inline void Reset() {
if (pointer_) {
CHECK_NOTNULL(destroy_)(pointer_);
pointer_ = nullptr;
meta_ = TypeMeta();
destroy_ = nullptr;
if (pointer_ && destroy_) {
destroy_(pointer_);
}
pointer_ = nullptr;
meta_ = TypeMeta();
destroy_ = nullptr;
}

/**
Expand Down
27 changes: 27 additions & 0 deletions caffe2/core/blob_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,32 @@ TEST(BlobTest, BlobWrongType) {
ASSERT_THROW(blob.Get<int>(), EnforceNotMet);
}

TEST(BlobTest, BlobReset) {
Blob blob;
std::unique_ptr<Foo> foo(new Foo());
EXPECT_TRUE(blob.Reset(foo.release()) != nullptr);
// Also test that Reset works.
blob.Reset();
}

TEST(BlobTest, BlobShareExternalPointer) {
Blob blob;
std::unique_ptr<Foo> foo(new Foo());
EXPECT_EQ(blob.ShareExternal<Foo>(foo.get()), foo.get());
EXPECT_TRUE(blob.IsType<Foo>());
// Also test that Reset works.
blob.Reset();
}

TEST(BlobTest, BlobShareExternalObject) {
Blob blob;
Foo foo;
EXPECT_EQ(blob.ShareExternal<Foo>(&foo), &foo);
EXPECT_TRUE(blob.IsType<Foo>());
// Also test that Reset works.
blob.Reset();
}

TEST(BlobTest, StringSerialization) {
const std::string kTestString = "Hello world?";
Blob blob;
Expand Down Expand Up @@ -558,6 +584,7 @@ TYPED_TEST(TypedTensorTest, BigTensorSerialization) {
"DUMMY_ENGINE");
Workspace ws;
auto load_op = CreateOperator(op_def, &ws);
EXPECT_TRUE(load_op != nullptr);
LOG(INFO) << "Running operator";

load_op->Run();
Expand Down
3 changes: 3 additions & 0 deletions caffe2/core/context_gpu_test.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <chrono>
#include <future>
#include <random>
#include <thread>
Expand Down Expand Up @@ -55,6 +56,8 @@ namespace {
void TEST_GetStreamAddress(cudaStream_t* ptr) {
CUDAContext context(0);
*ptr = context.cuda_stream();
// Sleep for a while so we have concurrent thread executions
std::this_thread::sleep_for(std::chrono::seconds(1));
}
} // namespace

Expand Down
6 changes: 5 additions & 1 deletion caffe2/mpi/mpi_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,13 @@ class MPIBroadcastOp final : public Operator<Context> {

bool RunOnDevice() override {
MPI_Comm comm = OperatorBase::Input<MPICommonWorldWrapper>(0).comm();
CAFFE_ENFORCE(OperatorBase::OutputIsType<Tensor<Context>>(0),
"Output is of wrong type.");
auto* output = Output(0);
// Make sure that output is already allocated.
CHECK_GT(output->size(), 0);
CAFFE_ENFORCE(output->size() > 0,
"Broadcast op uses in-place operation so the output "
"should be already allocated.");
MPI_CHECK(MPI_Bcast(
output->raw_mutable_data(),
output->nbytes(),
Expand Down
17 changes: 15 additions & 2 deletions caffe2/operators/operator_fallback_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,18 @@ class GPUFallbackOp final : public Operator<CUDAContext> {

bool RunOnDevice() override {
for (int i = 0; i < InputSize(); ++i) {
local_input_blobs_[i]->template GetMutable<TensorCPU>()->CopyFrom(
Input(i), &context_);
if (OperatorBase::InputIsType<TensorCUDA>(i)) {
local_input_blobs_[i]->template GetMutable<TensorCPU>()->CopyFrom(
Input(i), &context_);
} else {
VLOG(1) << "Input " << i << " is not TensorCUDA. Skipping copy.";
// Note(jiayq): This removes a const but conceptually
// local_input_blobs will only be used as const blob input for the
// base op so we are still fine.
local_input_blobs_[i]->ShareExternal(
const_cast<void*>(OperatorBase::Inputs()[i]->GetRaw()),
OperatorBase::Inputs()[i]->meta());
}
}
// Sync to make sure copies are done.
context_.FinishDeviceComputation();
Expand All @@ -65,6 +75,9 @@ class GPUFallbackOp final : public Operator<CUDAContext> {
return false;
}
for (int i = 0; i < OutputSize(); ++i) {
CAFFE_ENFORCE(local_output_blobs_[i]->IsType<TensorCPU>(),
"GPU fallback op currently does not support non-TensorCPU "
"output type.");
Output(i)->CopyFrom(
local_output_blobs_[i]->template Get<TensorCPU>(), &context_);
}
Expand Down
8 changes: 0 additions & 8 deletions caffe2/proto/BREW
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,3 @@ proto_library(
name = 'caffe2_proto',
srcs = Glob(['*.proto']),
)

filegroup(
name = "caffe2_proto_py",
srcs = ["__init__.py"],
deps = [
"//caffe2:caffe2_python",
]
)
Empty file removed caffe2/proto/__init__.py
Empty file.
4 changes: 2 additions & 2 deletions caffe2/python/BREW
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ py_library(
srcs=Glob(["*.py"], excludes=["*_test.py"]),
deps=[
":caffe2_python_cpu",
"//caffe/proto:caffe_proto_py",
"//caffe2/proto:caffe2_proto_py",
"//caffe/proto:caffe_proto",
"//caffe2/proto:caffe2_proto",
"//caffe2/python/mint:mint",
],
optional_deps=[
Expand Down
4 changes: 0 additions & 4 deletions caffe2/python/__init__.py

This file was deleted.

File renamed without changes.
2 changes: 1 addition & 1 deletion caffe2/python/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from __future__ import unicode_literals

from caffe2.python import core, workspace
from caffe2.python.io import Reader, Writer
from caffe2.python.dataio import Reader, Writer
from caffe2.python.schema import Struct
import numpy as np

Expand Down
1 change: 0 additions & 1 deletion caffe2/python/mint/BREW
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
py_library(
name = "mint",
srcs = [
"__init__.py",
"app.py",
"static/css/simple-sidebar.css",
"templates/index.html",
Expand Down
Empty file removed caffe2/python/mint/__init__.py
Empty file.
47 changes: 43 additions & 4 deletions third_party/BREW
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,49 @@ cc_thirdparty_target(
],
)

cc_thirdparty_target(
name="cnmen",
deps=["//third_party/cnmem:cnmem"],
cc_obj_files = [],
shell_script(
name = "cnmem_header",
srcs = ["cnmem/include/cnmem.h"],
commands=[
"DST=$CAFFE2_GENDIR/third_party/include/",
"mkdir -p $DST",
"cp $CAFFE2_SRCDIR/$CAFFE2_CWD/cnmem/include/cnmem.h $DST/",
],
)

cc_library(
name = "cnmem",
srcs = [
"cnmem/src/cnmem.cpp",
],
deps = [
":cnmem_header",
":cuda",
]
)

shell_script(
name = "nccl_header",
srcs = ["nccl/src/nccl.h"],
commands=[
"DST=$CAFFE2_GENDIR/third_party/include/",
"mkdir -p $DST",
"cp $CAFFE2_SRCDIR/$CAFFE2_CWD/nccl/src/nccl.h $DST/",
],
)

cuda_library(
name = "nccl",
srcs = Glob(["nccl/src/*.cu"]),
deps = [
":nccl_header",
":cuda",
],
compiler_flags=[
"-Wno-switch", # NCCL does not follow strict switch enum check.
"-DNCCL_MAJOR=1 -DNCCL_MINOR=2 -DNCCL_PATCH=3",
"-DCUDA_MAJOR=__CUDACC_VER_MAJOR__ -DCUDA_MINOR=__CUDACC_VER_MINOR__",
],
)

###############################################################################
Expand Down
1 change: 1 addition & 0 deletions third_party/cnmem
Submodule cnmem added at 28a182
Loading

0 comments on commit 1ede7a7

Please sign in to comment.