Fix refactor sparse gpu test (apache#104)

* Fix gpu build * Fix * Fix gpu test failure
reminisce · Jun 21, 2017 · 905304c · 905304c
1 parent ddbe565
commit 905304c
Show file tree

Hide file tree

Showing 8 changed files with 58 additions and 33 deletions.
diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc
@@ -289,15 +289,13 @@ void PushFCompute(const FCompute& fn,
  requested};
  if (ctx.dev_mask() == gpu::kDevMask) {
 #if MXNET_USE_CUDA
-#if __CUDACC__
  GetDefaultBlobs<gpu>(ndinputs, &input_blobs, &temp_in, opctx);
  GetDefaultBlobs<gpu>(ndoutputs, &output_blobs, &temp_out, opctx);
  std::vector<OpReqType> req(output_blobs.size(), kWriteTo);
  fn(attrs, opctx, input_blobs, req, output_blobs);
  // cast to original storage type, if necessary
  CastNonDefaultStorage<gpu>(ndoutputs, temp_out, opctx);
  rctx.get_stream<gpu>()->Wait();
-#endif // __CUDACC__
 #else
  LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
 #endif

diff --git a/src/common/utils.cc b/src/common/utils.cc
@@ -0,0 +1,23 @@
+/*!
+ * Copyright (c) 2017 by Contributors
+ * \file utils.cc
+ * \brief cpu implementation of util functions
+ */
+
+#include "./utils.h"
+#include "../operator/nn/cast_storage-inl.h"
+
+namespace mxnet {
+namespace common {
+
+
+template<>
+void CastStorageDispatch<cpu>(mshadow::Stream<cpu>* s,
+ const NDArray& input,
+ const NDArray& output) {
+ mxnet::op::CastStorageComputeImpl(s, input, output);
+}
+
+
+} // namespace common
+} // namespace mxnet
diff --git a/src/common/utils.cu b/src/common/utils.cu
@@ -0,0 +1,21 @@
+/*!
+ * Copyright (c) 2017 by Contributors
+ * \file utils.cu
+ * \brief gpu implementation of util functions
+ */
+
+#include "./utils.h"
+#include "../operator/nn/cast_storage-inl.h"
+
+namespace mxnet {
+namespace common {
+
+template<>
+void CastStorageDispatch<gpu>(mshadow::Stream<gpu>* s,
+ const NDArray& input,
+ const NDArray& output) {
+ mxnet::op::CastStorageComputeImpl(s, input, output);
+}
+
+} // namespace common
+} // namespace mxnet
diff --git a/src/common/utils.h b/src/common/utils.h
@@ -6,7 +6,12 @@
 #ifndef MXNET_COMMON_UTILS_H_
 #define MXNET_COMMON_UTILS_H_
 
-#if DMLC_USE_CXX11
+#include <dmlc/logging.h>
+#include <mxnet/engine.h>
+#include <mxnet/ndarray.h>
+#include <mxnet/op_attr_types.h>
+#include <nnvm/graph_attr_types.h>
+
 #include <memory>
 #include <vector>
 #include <type_traits>
@@ -15,20 +20,14 @@
 #include <thread>
 #include <algorithm>
 #include <functional>
-#endif // DMLC_USE_CXX11
-
-#include <dmlc/logging.h>
-#include <mxnet/engine.h>
-#include <mxnet/ndarray.h>
-#include <mxnet/op_attr_types.h>
-#include <nnvm/graph_attr_types.h>
-#include "../operator/nn/cast_storage-inl.h"
 
 namespace mxnet {
 
 namespace common {
 
-#if DMLC_USE_CXX11
+template<typename xpu>
+void CastStorageDispatch(mshadow::Stream<xpu>* s, const NDArray& input, const NDArray& output);
+
 /*
  * \brief Get the corresponding tensor blobs from default storage NDArrays.
  * If any NDArray is of non-default storage, it is casted to default storage and
@@ -54,7 +53,7 @@ inline bool GetDefaultBlobs(const std::vector<NDArray>& nds,
  << "doesn't support NDArray inputs with non-default storage.";
  }
  NDArray temp(nd.shape(), nd.ctx(), false);
- mxnet::op::CastStorageComputeImpl<xpu>(ctx.get_stream<xpu>(), nd, temp);
+ CastStorageDispatch<xpu>(ctx.get_stream<xpu>(), nd, temp);
  temps->push_back(temp);
  blobs->push_back(temp.data());
  casted = true;
@@ -65,14 +64,6 @@ inline bool GetDefaultBlobs(const std::vector<NDArray>& nds,
  return casted;
 }
 
-template <typename xpu>
-inline void GetOutputBlobs(const std::vector<NDArray>& nds,
- std::vector<TBlob> *blobs) {
- for (auto& nd : nds) {
- blobs->push_back(nd.data());
- }
-}
-
 /*
  * \brief Cast the NDArrays in `src` according to the storage types of the NDArrays
  * in `dst`. The ones with default storage in `dst` are ignored.
@@ -98,7 +89,7 @@ inline void CastNonDefaultStorage(const std::vector<NDArray>& dst,
  << "You are probably executing an operator which "
  << "doesn't support NDArray inputs with non-default storage.";
  }
- mxnet::op::CastStorageComputeImpl(ctx.get_stream<xpu>(), src[src_idx++], dst[i]);
+ CastStorageDispatch<xpu>(ctx.get_stream<xpu>(), src[src_idx++], dst[i]);
  }
  }
  CHECK_EQ(src_idx, src.size()) << "Not all src NDArrays are casted";
@@ -299,8 +290,6 @@ typename helper::UniqueIf<T>::UnknownBound MakeUnique(size_t n) {
 template <class T, class... Args>
 typename helper::UniqueIf<T>::KnownBound MakeUnique(Args&&... args) = delete;
 
-#endif // DMLC_USE_CXX11
-
 } // namespace common
 } // namespace mxnet
 #endif // MXNET_COMMON_UTILS_H_
diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc
@@ -40,13 +40,11 @@ class ForwardOpExecutor : public OpExecutor {
  temp_in_.clear(); temp_out_.clear(); temp_aux_.clear();
  if (is_gpu) {
 #if MXNET_USE_CUDA
-#if __CUDACC__
  GetDefaultBlobs<gpu>(in_array_, &in_data_, &temp_in_, op_ctx);
  GetDefaultBlobs<gpu>(aux_array_, &aux_data_, &temp_aux_, op_ctx);
  GetDefaultBlobs<gpu>(out_array, &out_data_, &temp_out_, op_ctx);
  op_->Forward(op_ctx, in_data_, req, out_data_, aux_data_);
  CastNonDefaultStorage<gpu>(out_array, temp_out_, op_ctx);
-#endif // __CUDACC__
 #elif NDEBUG == 0
  LOG(DEBUG) << MXNET_GPU_NOT_ENABLED_ERROR;
 #endif
@@ -173,12 +171,10 @@ class FComputeExecutor : public OpExecutor {
  temp_in_.clear(); temp_out_.clear();
  if (is_gpu) {
 #if MXNET_USE_CUDA
-#if __CUDACC__
  GetDefaultBlobs<gpu>(in_array, &in_data_, &temp_in_, op_ctx);
  GetDefaultBlobs<gpu>(out_array, &out_data_, &temp_out_, op_ctx);
  fcompute_(attrs_, op_ctx, in_data_, req, out_data_);
  CastNonDefaultStorage<gpu>(out_array, temp_out_, op_ctx);
-#endif // __CUDACC__
 #else
  LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
 #endif

diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
@@ -12,6 +12,7 @@
 #include <mxnet/resource.h>
 #include <mshadow/tensor.h>
 #include "./ndarray_function.h"
+#include "../common/utils.h"
 #include "../operator/tensor/matrix_op-inl.h"
 #include "../operator/tensor/init_op.h"
 #include "./autograd.h"
@@ -466,7 +467,7 @@ void CopyFromToImpl(const NDArray from, NDArray *to, RunContext ctx) {
  } else {
  casted_nd = NDArray(to_stype, shape, from_ctx);
  }
- op::CastStorageComputeImpl<from_xpu>(s, from, casted_nd);
+ common::CastStorageDispatch<from_xpu>(s, from, casted_nd);
  } else {
  casted_nd = from;
  }
@@ -510,7 +511,6 @@ void CopyFromTo(const NDArray &from, NDArray *to, int priority) {
  FnProperty::kNormal, priority, PROFILER_MESSAGE("CopyCPU2CPU"));
  } else {
 #if MXNET_USE_CUDA
-#if __CUDACC__
  if (a == cpu::kDevMask && b == gpu::kDevMask) {
  Engine::Get()->PushSync([from, ret](RunContext ctx) {
  NDArray nd(ret);
@@ -533,7 +533,6 @@ void CopyFromTo(const NDArray &from, NDArray *to, int priority) {
  } else {
  LOG(FATAL) << "unknown device mask";
  }
-#endif // __CUDACC__
 #else
  LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
 #endif

diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h
@@ -211,7 +211,6 @@ __global__ void mxnet_generic_kernel(int N, Args... args) {
  }
 }
 
-
 template<typename OP>
 struct Kernel<OP, gpu> {
  template<typename ...Args>

diff --git a/src/operator/nn/cast_storage-inl.h b/src/operator/nn/cast_storage-inl.h
@@ -10,7 +10,7 @@
 #include "../mxnet_op.h"
 #ifdef __CUDACC__
 #include "./cast_storage-inl.cuh"
-#endif
+#endif // __CUDACC__
 
 
 namespace mxnet {