Merge pull request #71 from ROCmSoftwarePlatform/refactor-algorithmconfig-profileresult

whchung · web-flow · commit 008b9dbf45c2 · 2018-07-19T14:04:18.000-05:00
Refactor AlgorithmConfig / ProfileResult and put scratch size in AlgorithmDesc
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -987,7 +987,6 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
                              !profile_result.algorithm().is_default(),
                 errors::NotFound("Failed to find backward filter algorithm!"));
     algorithm_config.set_algorithm(profile_result.algorithm());
-    algorithm_config.set_algorithm_scratch_size(profile_result.scratch_size());
     algorithm_config.set_algorithm_no_scratch(profile_result.algorithm());
 #endif
     AutoTuneConvBwdFilter::GetInstance()->Insert(conv_parameters,
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -1037,7 +1037,6 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
 
 
     algorithm_config.set_algorithm(profile_result.algorithm());
-    algorithm_config.set_algorithm_scratch_size(profile_result.scratch_size());
     // TODO - Add support for no-scratch algorithm
     algorithm_config.set_algorithm_no_scratch(AlgorithmDesc());
 #endif
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
@@ -792,7 +792,6 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
                 errors::NotFound("Failed to find conv algorithm!"));
 
     algorithm_config.set_algorithm(profile_result.algorithm());
-    algorithm_config.set_algorithm_scratch_size(profile_result.scratch_size());
     // TODO - Add support for no-scratch algorithm
     algorithm_config.set_algorithm_no_scratch(AlgorithmDesc());
 #endif
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
@@ -713,22 +713,28 @@ class PoolingDescriptor {
 class AlgorithmDesc {
  public:
   typedef int64 Index;
-  AlgorithmDesc() : algo_(kDefaultAlgorithm), tensor_ops_enabled_(true) {}
+  AlgorithmDesc() : algo_(kDefaultAlgorithm), tensor_ops_enabled_(true), scratch_size_(0) {}
   AlgorithmDesc(Index a, bool use_tensor_ops)
-      : algo_(a), tensor_ops_enabled_(use_tensor_ops) {}
+      : algo_(a), tensor_ops_enabled_(use_tensor_ops), scratch_size_(0) {}
+  AlgorithmDesc(Index a, bool use_tensor_ops, size_t scratch_size)
+      : algo_(a), tensor_ops_enabled_(use_tensor_ops), scratch_size_(scratch_size) {}
   bool is_default() const { return algo_ == kDefaultAlgorithm; }
   bool tensor_ops_enabled() const { return tensor_ops_enabled_; }
   Index algo_id() const { return algo_; }
+  size_t scratch_size() const { return scratch_size_; }
+  void set_scratch_size(size_t val) { scratch_size_ = val; }
   bool operator==(const AlgorithmDesc& other) const {
     return this->algo_ == other.algo_ &&
-           this->tensor_ops_enabled_ == other.tensor_ops_enabled_;
+           this->tensor_ops_enabled_ == other.tensor_ops_enabled_ &&
+           this->scratch_size_ == other.scratch_size_;
   }
   uint64 hash() const;
 
  private:
   enum { kDefaultAlgorithm = -1 };
   Index algo_;
   bool tensor_ops_enabled_;
+  size_t scratch_size_;
 };
 
 // Describes the result from a perf experiment.
@@ -743,15 +749,12 @@ class ProfileResult {
             elapsed_time_in_ms_ != std::numeric_limits<float>::max());
   }
   AlgorithmDesc algorithm() const { return algorithm_; }
-  size_t scratch_size() const { return scratch_size_; }
   void set_algorithm(AlgorithmDesc val) { algorithm_ = val; }
-  void set_scratch_size(size_t val) { scratch_size_ = val; }
   float elapsed_time_in_ms() const { return elapsed_time_in_ms_; }
   void set_elapsed_time_in_ms(float val) { elapsed_time_in_ms_ = val; }
 
  private:
   AlgorithmDesc algorithm_;
-  size_t scratch_size_ = 0;
   float elapsed_time_in_ms_ = std::numeric_limits<float>::max();
 };
 
@@ -764,18 +767,9 @@ class ProfileResult {
 class AlgorithmConfig {
  public:
   AlgorithmConfig()
-      : algorithm_(),
-        algorithm_no_scratch_(),
-        algorithm_scratch_size_(0) {}
+      : algorithm_(), algorithm_no_scratch_() {}
   explicit AlgorithmConfig(AlgorithmDesc algorithm)
-      : algorithm_(algorithm),
-        algorithm_no_scratch_(),
-        algorithm_scratch_size_(0) {}
-  AlgorithmConfig(AlgorithmDesc algorithm, AlgorithmDesc algorithm_no_scratch,
-                  size_t algorithm_scratch_size = 0)
-      : algorithm_(algorithm),
-        algorithm_no_scratch_(algorithm_no_scratch),
-        algorithm_scratch_size_(0) {}
+      : algorithm_(algorithm), algorithm_no_scratch_() {}
   AlgorithmDesc algorithm() const { return algorithm_; }
   void set_algorithm(AlgorithmDesc val) { algorithm_ = val; }
   AlgorithmDesc algorithm_no_scratch() const { return algorithm_no_scratch_; }
@@ -790,17 +784,10 @@ class AlgorithmConfig {
     return !(*this == other);
   }
   string ToString() const;
-  size_t algorithm_scratch_size() const {
-    return algorithm_scratch_size_;
-  }
-  void set_algorithm_scratch_size(size_t val) {
-    algorithm_scratch_size_ = val;
-  }
 
  private:
   AlgorithmDesc algorithm_;
   AlgorithmDesc algorithm_no_scratch_;
-  size_t        algorithm_scratch_size_;
 };
 
 // Describes a local response normalization (LRN). LRN is used e.g. in
diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.cc b/tensorflow/stream_executor/rocm/rocm_dnn.cc
@@ -1746,8 +1746,9 @@ bool MIOpenSupport::DoConvolveImpl(
 
   } else {
     // An algorithm has been specified.
-    algo_sz.first = ToConvForwardAlgo(algorithm_config.algorithm());
-    algo_sz.second = algorithm_config.algorithm_scratch_size();
+    dnn::AlgorithmDesc algo = algorithm_config.algorithm();
+    algo_sz.first = ToConvForwardAlgo(algo);
+    algo_sz.second = algo.scratch_size();
 
     size_t size_in_bytes = algo_sz.second;
     if (size_in_bytes != 0) {
@@ -1801,9 +1802,8 @@ bool MIOpenSupport::DoConvolveImpl(
       return false;
     }
     if (status == miopenStatusSuccess) {
-      dnn::AlgorithmDesc algotype(algo_sz.first, false);
+      dnn::AlgorithmDesc algotype(algo_sz.first, false, algo_sz.second);
       output_profile_result->set_algorithm(algotype);
-      output_profile_result->set_scratch_size(algo_sz.second);
       output_profile_result->set_elapsed_time_in_ms(
           timer->GetElapsedMilliseconds());
     }
@@ -2307,8 +2307,9 @@ bool MIOpenSupport::DoConvolveBackwardDataImpl(
 
   } else {
     // An algorithm has been specified.
-    algo_sz.first = ToConvBackwardDataAlgo(algorithm_config.algorithm());
-    algo_sz.second = algorithm_config.algorithm_scratch_size();
+    dnn::AlgorithmDesc algo = algorithm_config.algorithm();
+    algo_sz.first = ToConvBackwardDataAlgo(algo);
+    algo_sz.second = algo.scratch_size();
 
     size_t size_in_bytes = algo_sz.second;
     if (size_in_bytes != 0) {
@@ -2362,9 +2363,8 @@ bool MIOpenSupport::DoConvolveBackwardDataImpl(
   if (is_profiling) {
     timer->Stop(AsROCMStream(stream));
     if (status == miopenStatusSuccess) {
-      dnn::AlgorithmDesc algotype(algo_sz.first, false);
+      dnn::AlgorithmDesc algotype(algo_sz.first, false, algo_sz.second);
       output_profile_result->set_algorithm(algotype);
-      output_profile_result->set_scratch_size(algo_sz.second);
       output_profile_result->set_elapsed_time_in_ms(
           timer->GetElapsedMilliseconds());
     }
@@ -2530,8 +2530,9 @@ bool MIOpenSupport::DoConvolveBackwardFilterImpl(
 
   } else {
     // An algorithm has been specified.
+    dnn::AlgorithmDesc algo = algorithm_config.algorithm();
     algo_sz.first = ToConvBackwardFilterAlgo(algorithm_config.algorithm());
-    algo_sz.second = algorithm_config.algorithm_scratch_size();
+    algo_sz.second = algo.scratch_size();
 
     size_t size_in_bytes = algo_sz.second;
 
@@ -2585,9 +2586,8 @@ bool MIOpenSupport::DoConvolveBackwardFilterImpl(
   if (is_profiling) {
     timer->Stop(AsROCMStream(stream));
     if (status == miopenStatusSuccess) {
-      dnn::AlgorithmDesc algotype(algo_sz.first, false);
+      dnn::AlgorithmDesc algotype(algo_sz.first, false, algo_sz.second);
       output_profile_result->set_algorithm(algotype);
-      output_profile_result->set_scratch_size(algo_sz.second);
       output_profile_result->set_elapsed_time_in_ms(
           timer->GetElapsedMilliseconds());
     }