diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc index 529da36c82cd..8d190597ab0b 100644 --- a/src/c_api/c_api_ndarray.cc +++ b/src/c_api/c_api_ndarray.cc @@ -289,7 +289,6 @@ void PushFCompute(const FCompute& fn, requested}; if (ctx.dev_mask() == gpu::kDevMask) { #if MXNET_USE_CUDA -#if __CUDACC__ GetDefaultBlobs(ndinputs, &input_blobs, &temp_in, opctx); GetDefaultBlobs(ndoutputs, &output_blobs, &temp_out, opctx); std::vector req(output_blobs.size(), kWriteTo); @@ -297,7 +296,6 @@ void PushFCompute(const FCompute& fn, // cast to original storage type, if necessary CastNonDefaultStorage(ndoutputs, temp_out, opctx); rctx.get_stream()->Wait(); -#endif // __CUDACC__ #else LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR; #endif diff --git a/src/common/utils.cc b/src/common/utils.cc new file mode 100644 index 000000000000..5bfb959fdf34 --- /dev/null +++ b/src/common/utils.cc @@ -0,0 +1,23 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file utils.cc + * \brief cpu implementation of util functions + */ + +#include "./utils.h" +#include "../operator/nn/cast_storage-inl.h" + +namespace mxnet { +namespace common { + + +template<> +void CastStorageDispatch(mshadow::Stream* s, + const NDArray& input, + const NDArray& output) { + mxnet::op::CastStorageComputeImpl(s, input, output); +} + + +} // namespace common +} // namespace mxnet diff --git a/src/common/utils.cu b/src/common/utils.cu new file mode 100644 index 000000000000..a249be5bb9f5 --- /dev/null +++ b/src/common/utils.cu @@ -0,0 +1,21 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file utils.cu + * \brief gpu implementation of util functions + */ + +#include "./utils.h" +#include "../operator/nn/cast_storage-inl.h" + +namespace mxnet { +namespace common { + +template<> +void CastStorageDispatch(mshadow::Stream* s, + const NDArray& input, + const NDArray& output) { + mxnet::op::CastStorageComputeImpl(s, input, output); +} + +} // namespace common +} // namespace mxnet diff --git a/src/common/utils.h b/src/common/utils.h index 803c3af486c7..e87e42497b59 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -6,7 +6,12 @@ #ifndef MXNET_COMMON_UTILS_H_ #define MXNET_COMMON_UTILS_H_ -#if DMLC_USE_CXX11 +#include +#include +#include +#include +#include + #include #include #include @@ -15,20 +20,14 @@ #include #include #include -#endif // DMLC_USE_CXX11 - -#include -#include -#include -#include -#include -#include "../operator/nn/cast_storage-inl.h" namespace mxnet { namespace common { -#if DMLC_USE_CXX11 +template +void CastStorageDispatch(mshadow::Stream* s, const NDArray& input, const NDArray& output); + /* * \brief Get the corresponding tensor blobs from default storage NDArrays. * If any NDArray is of non-default storage, it is casted to default storage and @@ -54,7 +53,7 @@ inline bool GetDefaultBlobs(const std::vector& nds, << "doesn't support NDArray inputs with non-default storage."; } NDArray temp(nd.shape(), nd.ctx(), false); - mxnet::op::CastStorageComputeImpl(ctx.get_stream(), nd, temp); + CastStorageDispatch(ctx.get_stream(), nd, temp); temps->push_back(temp); blobs->push_back(temp.data()); casted = true; @@ -65,14 +64,6 @@ inline bool GetDefaultBlobs(const std::vector& nds, return casted; } -template -inline void GetOutputBlobs(const std::vector& nds, - std::vector *blobs) { - for (auto& nd : nds) { - blobs->push_back(nd.data()); - } -} - /* * \brief Cast the NDArrays in `src` according to the storage types of the NDArrays * in `dst`. The ones with default storage in `dst` are ignored. @@ -98,7 +89,7 @@ inline void CastNonDefaultStorage(const std::vector& dst, << "You are probably executing an operator which " << "doesn't support NDArray inputs with non-default storage."; } - mxnet::op::CastStorageComputeImpl(ctx.get_stream(), src[src_idx++], dst[i]); + CastStorageDispatch(ctx.get_stream(), src[src_idx++], dst[i]); } } CHECK_EQ(src_idx, src.size()) << "Not all src NDArrays are casted"; @@ -299,8 +290,6 @@ typename helper::UniqueIf::UnknownBound MakeUnique(size_t n) { template typename helper::UniqueIf::KnownBound MakeUnique(Args&&... args) = delete; -#endif // DMLC_USE_CXX11 - } // namespace common } // namespace mxnet #endif // MXNET_COMMON_UTILS_H_ diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index 3e037a98b736..981fab2a4686 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -40,13 +40,11 @@ class ForwardOpExecutor : public OpExecutor { temp_in_.clear(); temp_out_.clear(); temp_aux_.clear(); if (is_gpu) { #if MXNET_USE_CUDA -#if __CUDACC__ GetDefaultBlobs(in_array_, &in_data_, &temp_in_, op_ctx); GetDefaultBlobs(aux_array_, &aux_data_, &temp_aux_, op_ctx); GetDefaultBlobs(out_array, &out_data_, &temp_out_, op_ctx); op_->Forward(op_ctx, in_data_, req, out_data_, aux_data_); CastNonDefaultStorage(out_array, temp_out_, op_ctx); -#endif // __CUDACC__ #elif NDEBUG == 0 LOG(DEBUG) << MXNET_GPU_NOT_ENABLED_ERROR; #endif @@ -173,12 +171,10 @@ class FComputeExecutor : public OpExecutor { temp_in_.clear(); temp_out_.clear(); if (is_gpu) { #if MXNET_USE_CUDA -#if __CUDACC__ GetDefaultBlobs(in_array, &in_data_, &temp_in_, op_ctx); GetDefaultBlobs(out_array, &out_data_, &temp_out_, op_ctx); fcompute_(attrs_, op_ctx, in_data_, req, out_data_); CastNonDefaultStorage(out_array, temp_out_, op_ctx); -#endif // __CUDACC__ #else LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR; #endif diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 22cb70e275a2..c894f27c25b7 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -12,6 +12,7 @@ #include #include #include "./ndarray_function.h" +#include "../common/utils.h" #include "../operator/tensor/matrix_op-inl.h" #include "../operator/tensor/init_op.h" #include "./autograd.h" @@ -466,7 +467,7 @@ void CopyFromToImpl(const NDArray from, NDArray *to, RunContext ctx) { } else { casted_nd = NDArray(to_stype, shape, from_ctx); } - op::CastStorageComputeImpl(s, from, casted_nd); + common::CastStorageDispatch(s, from, casted_nd); } else { casted_nd = from; } @@ -510,7 +511,6 @@ void CopyFromTo(const NDArray &from, NDArray *to, int priority) { FnProperty::kNormal, priority, PROFILER_MESSAGE("CopyCPU2CPU")); } else { #if MXNET_USE_CUDA -#if __CUDACC__ if (a == cpu::kDevMask && b == gpu::kDevMask) { Engine::Get()->PushSync([from, ret](RunContext ctx) { NDArray nd(ret); @@ -533,7 +533,6 @@ void CopyFromTo(const NDArray &from, NDArray *to, int priority) { } else { LOG(FATAL) << "unknown device mask"; } -#endif // __CUDACC__ #else LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR; #endif diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h index 6a9ee30f1b04..d4a473c8be0c 100644 --- a/src/operator/mxnet_op.h +++ b/src/operator/mxnet_op.h @@ -211,7 +211,6 @@ __global__ void mxnet_generic_kernel(int N, Args... args) { } } - template struct Kernel { template diff --git a/src/operator/nn/cast_storage-inl.h b/src/operator/nn/cast_storage-inl.h index 921003453e1a..b60665c73fe9 100644 --- a/src/operator/nn/cast_storage-inl.h +++ b/src/operator/nn/cast_storage-inl.h @@ -10,7 +10,7 @@ #include "../mxnet_op.h" #ifdef __CUDACC__ #include "./cast_storage-inl.cuh" -#endif +#endif // __CUDACC__ namespace mxnet {