Skip to content

Commit

Permalink
[Caffe] Optional NNPACK integration
Browse files Browse the repository at this point in the history
The approach is pretty straightforward and very similar to the CuDNN integration:

- Add some CMake magic to detect NNPACK if it's installed on the user's
  system.
- Add some Makefile.config options
- Add some thread-local state (a threadpool for NNPACK to caffe::Caffe).
- Add an NNPACK `Engine` parameter to Convolution/Pooling/InnerProduct
  layers.
- In LayerFactory, ifdef around whether NNPACK is enabled and dispatch
  to NNPACK*Layer or *Layer depending on the Engine parameter.
- In NNPACK*Layer, ensure the NNPACK preconditions hold and call the
  appropriate nnpack_*  function, otherwise fall back to *Layer.

I'm not sure how common it is to be running CNN inference on Haswell and
beyond CPUs for Caffe users, but if it is it might be a nice boost for
them.
  • Loading branch information
ajtulloch committed Jun 2, 2016
1 parent 4f7b90d commit 05151a4
Show file tree
Hide file tree
Showing 21 changed files with 892 additions and 1 deletion.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ include(cmake/ConfigGen.cmake)
# ---[ Options
caffe_option(CPU_ONLY "Build Caffe without CUDA support" OFF) # TODO: rename to USE_CUDA
caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ONLY)
caffe_option(USE_NNPACK "Build Caffe with NNPACK library support" OFF)
caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON)
caffe_option(BUILD_python "Build Python wrapper" ON)
set(python_version "2" CACHE STRING "Specify which Python version to use")
Expand Down
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,15 @@ ifeq ($(USE_CUDNN), 1)
COMMON_FLAGS += -DUSE_CUDNN
endif

# NNPACK acceleration configuration.
ifeq ($(USE_NNPACK), 1)
LIBRARIES += nnpack
COMMON_FLAGS += -DUSE_NNPACK
INCLUDE_DIRS += $(NNPACK_INCLUDE)
INCLUDE_DIRS += $(NNPACK_INCLUDE)/../third-party/pthreadpool/include
LIBRARY_DIRS += $(NNPACK_LIB)
endif

# configure IO libraries
ifeq ($(USE_OPENCV), 1)
COMMON_FLAGS += -DUSE_OPENCV
Expand Down
5 changes: 5 additions & 0 deletions Makefile.config.example
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
# possibility of simultaneous read and write
# ALLOW_LMDB_NOLOCK := 1

# NNPACK acceleration switch (uncomment to build with NNPACK).
# USE_NNPACK := 1
# NNPACK_INCLUDE := /path/to/NNPACK/include
# NNPACK_LIB := /path/to/nnpack/NNPACK/lib

# Uncomment if you're using OpenCV 3
# OPENCV_VERSION := 3

Expand Down
8 changes: 8 additions & 0 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,14 @@ if(NOT HAVE_CUDA)
add_definitions(-DCPU_ONLY)
endif()

# ---[ NNPACK
if(USE_NNPACK)
find_package(NNPACK REQUIRED)
include_directories(SYSTEM ${NNPACK_INCLUDE_DIR})
include_directories(SYSTEM ${NNPACK_INCLUDE_DIR}/../third-party/pthreadpool/include)
list(APPEND Caffe_LINKER_LIBS ${NNPACK_LIB})
endif()

# ---[ OpenCV
if(USE_OPENCV)
find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs)
Expand Down
54 changes: 54 additions & 0 deletions cmake/Modules/FindNNPACK.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
SET(NNPACK_INCLUDE_SEARCH_PATHS
/usr/include
/usr/local/include
/opt/NNPACK/include
$ENV{NNPACK_ROOT}
$ENV{NNPACK_ROOT}/include
)

SET(NNPACK_LIB_SEARCH_PATHS
/lib/
/lib64/
/usr/lib
/usr/lib64
/usr/local/lib
/usr/local/lib64
/opt/NNPACK/lib
$ENV{NNPACK_ROOT}
$ENV{NNPACK_ROOT}/lib
)

FIND_PATH(NNPACK_INCLUDE_DIR NAMES nnpack.h PATHS ${NNPACK_INCLUDE_SEARCH_PATHS})
FIND_LIBRARY(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_LIB_SEARCH_PATHS})

SET(NNPACK_FOUND ON)

# Check include files
IF(NOT NNPACK_INCLUDE_DIR)
SET(NNPACK_FOUND OFF)
MESSAGE(STATUS "Could not find NNPACK include. Turning NNPACK_FOUND off")
ENDIF()

# Check libraries
IF(NOT NNPACK_LIB)
SET(NNPACK_FOUND OFF)
MESSAGE(STATUS "Could not find NNPACK lib. Turning NNPACK_FOUND off")
ENDIF()

IF (NNPACK_FOUND)
add_definitions(-DUSE_NNPACK)
IF (NOT NNPACK_FIND_QUIETLY)
MESSAGE(STATUS "Found NNPACK libraries: ${NNPACK_LIB}")
MESSAGE(STATUS "Found NNPACK include: ${NNPACK_INCLUDE_DIR}")
ENDIF (NOT NNPACK_FIND_QUIETLY)
ELSE (NNPACK_FOUND)
IF (NNPACK_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "Could not find NNPACK")
ENDIF (NNPACK_FIND_REQUIRED)
ENDIF (NNPACK_FOUND)

MARK_AS_ADVANCED(
NNPACK_INCLUDE_DIR
NNPACK_LIB
NNPACK
)
3 changes: 3 additions & 0 deletions cmake/Summary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ function(caffe_print_configuration_summary)
if(USE_OPENCV)
caffe_status(" OpenCV : Yes (ver. ${OpenCV_VERSION})")
endif()
if(USE_NNPACK)
caffe_status(" NNPACK : Yes")
endif()
caffe_status(" CUDA : " HAVE_CUDA THEN "Yes (ver. ${CUDA_VERSION})" ELSE "No" )
caffe_status("")
if(HAVE_CUDA)
Expand Down
15 changes: 15 additions & 0 deletions include/caffe/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@

#include "caffe/util/device_alternate.hpp"

#ifdef USE_NNPACK
#include "caffe/nnpack_pool.h"
#endif

// Convert macro to string
#define STRINGIFY(m) #m
#define AS_STRING(m) STRINGIFY(m)
Expand Down Expand Up @@ -138,6 +142,13 @@ class Caffe {
}
#endif

#ifdef USE_NNPACK
template<typename Dtype> static bool nnpack_supported();
inline static pthreadpool_t nnpack_threadpool() {
return Get().nnpack_threadpool_.pool();
}
#endif

// Returns the mode: running on CPU or GPU.
inline static Brew mode() { return Get().mode_; }
// The setters for the variables
Expand Down Expand Up @@ -175,6 +186,10 @@ class Caffe {
int solver_count_;
bool root_solver_;

#ifdef USE_NNPACK
NNPACKPool nnpack_threadpool_;
#endif

private:
// The private constructor to avoid duplicate instantiation.
Caffe();
Expand Down
37 changes: 37 additions & 0 deletions include/caffe/nnpack_pool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#pragma once

#include <boost/noncopyable.hpp>

#include "nnpack.h"

namespace caffe {
class NNPACKPool : public boost::noncopyable {
public:
NNPACKPool() {
#ifdef USE_MKL
const size_t num_mkl_threads = mkl_get_max_threads();
#else
// Can we do better here?
const size_t num_mkl_threads = 1;
#endif
if (num_mkl_threads > 1) {
pool_ = pthreadpool_create(num_mkl_threads);
} else {
pool_ = NULL;
}

}
~NNPACKPool() {
if (pool_) {
pthreadpool_destroy(pool_);
}
pool_ = NULL;
}

pthreadpool_t pool() { return pool_; };

private:
pthreadpool_t pool_;
};

}
13 changes: 13 additions & 0 deletions src/caffe/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,19 @@ void GlobalInit(int* pargc, char*** pargv) {
::google::InstallFailureSignalHandler();
}

#ifdef USE_NNPACK
template<>
bool Caffe::nnpack_supported<double>() {
return false;
}

template<>
bool Caffe::nnpack_supported<float>() {
static enum nnp_status nnpack_status = nnp_initialize();
return nnpack_status == nnp_status_success;
}
#endif

#ifdef CPU_ONLY // CPU-only Caffe.

Caffe::Caffe()
Expand Down
57 changes: 57 additions & 0 deletions src/caffe/layer_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include "caffe/layers/tanh_layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/inner_product_layer.hpp"

#ifdef USE_CUDNN
#include "caffe/layers/cudnn_conv_layer.hpp"
#include "caffe/layers/cudnn_lcn_layer.hpp"
Expand All @@ -27,6 +29,12 @@
#include "caffe/layers/cudnn_tanh_layer.hpp"
#endif

#ifdef USE_NNPACK
#include "caffe/layers/nnpack_convolution_layer.hpp"
#include "caffe/layers/nnpack_pooling_layer.hpp"
#include "caffe/layers/nnpack_inner_product_layer.hpp"
#endif

#ifdef WITH_PYTHON_LAYER
#include "caffe/layers/python_layer.hpp"
#endif
Expand Down Expand Up @@ -57,6 +65,17 @@ shared_ptr<Layer<Dtype> > GetConvolutionLayer(
}
if (engine == ConvolutionParameter_Engine_CAFFE) {
return shared_ptr<Layer<Dtype> >(new ConvolutionLayer<Dtype>(param));
#ifdef USE_NNPACK
} else if (engine == ConvolutionParameter_Engine_NNPACK) {
// If we're in CPU mode and on supported processor, we can use NNPACK.
// Otherwise, we can't fall-through (since we'll get an unknown
// layer, so just return the default ConvolutionLayer
if ((Caffe::mode() == Caffe::CPU) && Caffe::nnpack_supported<Dtype>()) {
return shared_ptr<Layer<Dtype> >(
new NNPackConvolutionLayer<Dtype>(param));
}
return shared_ptr<Layer<Dtype> >(new ConvolutionLayer<Dtype>(param));
#endif
#ifdef USE_CUDNN
} else if (engine == ConvolutionParameter_Engine_CUDNN) {
if (use_dilation) {
Expand Down Expand Up @@ -84,6 +103,16 @@ shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {
}
if (engine == PoolingParameter_Engine_CAFFE) {
return shared_ptr<Layer<Dtype> >(new PoolingLayer<Dtype>(param));
#ifdef USE_NNPACK
} else if (engine == PoolingParameter_Engine_NNPACK) {
// If we're in CPU mode and on supported processor, we can use NNPACK.
// Otherwise, we can't fall-through (since we'll get an unknown
// layer, so just return the default ConvolutionLayer
if ((Caffe::mode() == Caffe::CPU) && Caffe::nnpack_supported<Dtype>()) {
return shared_ptr<Layer<Dtype> >(new NNPackPoolingLayer<Dtype>(param));
}
return shared_ptr<Layer<Dtype> >(new PoolingLayer<Dtype>(param));
#endif
#ifdef USE_CUDNN
} else if (engine == PoolingParameter_Engine_CUDNN) {
if (param.top_size() > 1) {
Expand All @@ -109,6 +138,34 @@ shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {

REGISTER_LAYER_CREATOR(Pooling, GetPoolingLayer);

// Get pooling layer according to engine.
template <typename Dtype>
shared_ptr<Layer<Dtype> > GetInnerProductLayer(const LayerParameter& param) {
InnerProductParameter_Engine engine = param.inner_product_param().engine();
if (engine == InnerProductParameter_Engine_DEFAULT) {
engine = InnerProductParameter_Engine_CAFFE;
}
if (engine == InnerProductParameter_Engine_CAFFE) {
return shared_ptr<Layer<Dtype> >(new InnerProductLayer<Dtype>(param));
#ifdef USE_NNPACK
} else if (engine == InnerProductParameter_Engine_NNPACK) {
// If we're in CPU mode and on supported processor, we can use NNPACK.
// Otherwise, we can't fall-through (since we'll get an unknown
// layer, so just return the default InnerProductLayer
if ((Caffe::mode() == Caffe::CPU) && Caffe::nnpack_supported<Dtype>()) {
return shared_ptr<Layer<Dtype> >(
new NNPackInnerProductLayer<Dtype>(param));
}
return shared_ptr<Layer<Dtype> >(new InnerProductLayer<Dtype>(param));
#endif
} else {
LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
}
return shared_ptr<Layer<Dtype> >();
}

REGISTER_LAYER_CREATOR(InnerProduct, GetInnerProductLayer);

// Get LRN layer according to engine
template <typename Dtype>
shared_ptr<Layer<Dtype> > GetLRNLayer(const LayerParameter& param) {
Expand Down
1 change: 0 additions & 1 deletion src/caffe/layers/inner_product_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,5 @@ STUB_GPU(InnerProductLayer);
#endif

INSTANTIATE_CLASS(InnerProductLayer);
REGISTER_LAYER_CLASS(InnerProduct);

} // namespace caffe
Loading

0 comments on commit 05151a4

Please sign in to comment.