From 7b787d3cf37a994e51a3420149c6bbd48ca09c8c Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Tue, 30 Oct 2018 13:21:35 -0700 Subject: [PATCH] [MXNET-1179] Enforce deterministic algorithms in convolution layers (#12992) * add env variable to choose deterministic cudnn alg * set default value to false * fix build failure in Windows GPU * revert the previous change * only check determinism in CUDNN 7.x release * Add cudnn version check * fix lint error --- src/operator/nn/cudnn/cudnn_convolution-inl.h | 4 ++++ src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 4 ++++ src/operator/nn/cudnn/cudnn_pooling-inl.h | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index d63d46821edc..3bd6c5a3826b 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -889,6 +889,7 @@ class CuDNNConvolutionOp { size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo that matches the algo_preference (-1 = any), // regardless of mathType. + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; @@ -896,6 +897,9 @@ class CuDNNConvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + #if CUDNN_MAJOR >= 7 + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + #endif (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index c0c56507bbf3..72ba2c95fc6a 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -829,6 +829,7 @@ class CuDNNDeconvolutionOp { void AlgoFinalSelect(const std::vector &perf_results, std::string kernel_name, size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo regardless of mathType. + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; @@ -836,6 +837,9 @@ class CuDNNDeconvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + #if CUDNN_MAJOR >= 7 + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + #endif (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_pooling-inl.h b/src/operator/nn/cudnn/cudnn_pooling-inl.h index bc3ee366007c..89fa73ef5471 100644 --- a/src/operator/nn/cudnn/cudnn_pooling-inl.h +++ b/src/operator/nn/cudnn/cudnn_pooling-inl.h @@ -48,7 +48,12 @@ class CuDNNPoolingOp { param_ = p; switch (param_.pool_type) { case pool_enum::kMaxPooling: + #if CUDNN_MAJOR >= 7 + mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false) ? + CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX; + #else mode_ = CUDNN_POOLING_MAX; + #endif break; case pool_enum::kAvgPooling: if (param_.count_include_pad.has_value() && !param_.count_include_pad.value()) {