diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index 75b8bc4..9ae9e1a 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -650,7 +650,9 @@ static void DirConvFindCore(Handle& handle, else { MIOPEN_LOG_I(sol << ": " << elapsed << (elapsed < best ? " < " : " >= ") - << best); + << best + << ", workspce_sz = " + << sol.workspce_sz); if(elapsed < best) { best = elapsed; @@ -988,7 +990,9 @@ static void DirConvFindCore(Handle& handle, else { MIOPEN_LOG_I(sol << ": " << elapsed << (elapsed < best ? " < " : " >= ") - << best); + << best + << ", workspce_sz = " + << sol.workspce_sz); if(elapsed < best) { best = elapsed; @@ -2193,7 +2197,9 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle, else { MIOPEN_LOG_I(sol << ": " << elapsed << (elapsed < best ? " < " : " >= ") - << best); + << best + << ", workspce_sz = " + << sol.workspce_sz); if(elapsed < best) { best = elapsed; @@ -2325,7 +2331,9 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle, else { MIOPEN_LOG_I(sol << ": " << elapsed << (elapsed < best ? " < " : " >= ") - << best); + << best + << ", workspce_sz = " + << sol.workspce_sz); if(elapsed < best) { best = elapsed; @@ -3645,7 +3653,9 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle, workSpaceSize, as_float(0.0f)); MIOPEN_LOG_I(sol << ": " << elapsed << (elapsed < best ? " < " : " >= ") - << best); + << best + << ", workspce_sz = " + << sol.workspce_sz); if(elapsed < best) { best = elapsed; @@ -3854,7 +3864,9 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle, workSpaceSize, as_float(0.0f)); MIOPEN_LOG_I(sol << ": " << elapsed << (elapsed < best ? " < " : " >= ") - << best); + << best + << ", workspce_sz = " + << sol.workspce_sz); if(elapsed < best) { best = elapsed; @@ -4104,10 +4116,13 @@ void ConvolutionDescriptor::ConvolutionBackwardWeights(Handle& handle, else { assert(kernels.size() == 2); - // this pointer needed here as a workaround in gcc 5 - assert(workSpace != nullptr && - workSpaceSize >= this->BackwardWeightsGetWorkSpaceSizeDirect( - handle, dyDesc, xDesc, dwDesc)); + /// We can't use BackwardWeightsGetWorkSpaceSizeDirect() to check if enough + /// workspace is provided by the user, because the function returns max of + /// all available Solutions, but we do not know how much workspace is + /// required for the specific Solution (which is reduced to a vector of + /// kernels here) we are going to invoke. So let's check against 0 for now. + /// \todo Implement full ws size check. See #1127. + assert(workSpace != nullptr && workSpaceSize > 0); if(kernel.GetName() == "SubSample") { // subsampling kernel diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp index 3884f8b..01251dc 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp @@ -257,9 +257,14 @@ ConvSolution ConvOclBwdWrW2::GetSolution(const ConvolutionContext& params) const { int out_width = params.in_width; // out is in, in is out - int wei_blk_sz0 = ((params.kernel_size0 + WEI_WKITEM - 1) / WEI_WKITEM); - int wei_blk_sz = params.kernel_size1 * wei_blk_sz0; - int n_wei_blk = GRP_SZ / wei_blk_sz; + int wei_blk_sz0 = ((params.kernel_size0 + WEI_WKITEM - 1) / WEI_WKITEM); + int wei_blk_sz = params.kernel_size1 * wei_blk_sz0; + int n_wei_blk = GRP_SZ / wei_blk_sz; + if(n_wei_blk == 0) + { /// \todo This is quickfix for DIV/0, see ROCmSoftwarePlatform/MIOpen/issues/70. + MIOPEN_LOG_I2("ConvOClBwdWrW2: GRP_SZ < wei_blk_sz, not applicable?"); + return ConvSolution(miopenStatusNotInitialized); + } out_wei_scan_loop = (out_width + n_wei_blk - 1) / n_wei_blk; max_wei_blk = std::min(n_wei_blk, (out_width + out_wei_scan_loop - 1) / out_wei_scan_loop);