Merge branch 'DeepLink-org:main' into improve_buildATen

DeepLink-org · Jun 3, 2024 · 2224093 · 2224093
2 parents 8afcf59 + e1392fe
commit 2224093
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 50 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -197,6 +197,7 @@ jobs:
     name: Gen-Data-Op-Test-A100
     runs-on: tps-diopi-ci
     needs: [Build-Nvidia-A100]
+    if: ${{ contains( needs.Rsync.outputs.output, 'NV' ) }}
     steps:
       - name: gen-test-data
         run: |

diff --git a/impl/camb/functions/index.cpp b/impl/camb/functions/index.cpp
@@ -201,8 +201,6 @@ static diopiError_t indexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out,
                                  true,
                                  outputDesc.get(),
                                  outputTensor.data()));
-    // TODO: add sync here temporarily because indicesPtrList is a host tensor, and may be released ealier.
-    syncStreamInCtx(ctx);
     return diopiSuccess;
 }
 

diff --git a/impl/camb/functions/index_put.cpp b/impl/camb/functions/index_put.cpp
@@ -109,8 +109,6 @@ diopiError_t diopiIndexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out, di
                                  true,
                                  outputDesc.get(),
                                  outputTensor.data()));
-    // TODO: add sync here temporarily because indicesPtrList is a host tenso, and may be released ealier.
-    syncStreamInCtx(ctx);
 
     return diopiSuccess;
 }

diff --git a/impl/camb/functions_mmcv/modulated_deform_conv_mlu.cpp b/impl/camb/functions_mmcv/modulated_deform_conv_mlu.cpp
@@ -36,7 +36,7 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvMmcv(diopiContextHandl
                     outputTensor.size(3) == maskTensor.size(3),
                 "offset and mask should have the same spatial size as the output of the convolution");
 
-    std::vector<impl::camb::DiopiTensor*> tensors{&inputTensor, &weightTensor, &offsetTensor, &maskTensor, &biasTensor};
+    std::vector<impl::camb::DiopiTensor *> tensors{&inputTensor, &weightTensor, &offsetTensor, &maskTensor, &biasTensor};
     DIOPI_CALL(impl::camb::autoCastTensorType(ctx, tensors, {diopi_dtype_float16, diopi_dtype_float32}));
 
     impl::camb::DiopiTensor outputTensorTmp = outputTensor;
@@ -61,31 +61,19 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvMmcv(diopiContextHandl
     impl::camb::CnnlTensorDesc biasDesc(biasTensor, CNNL_LAYOUT_ARRAY);
 
     int32_t batchSize = static_cast<int32_t>(inputTensor.size(0));
-
     // im2col_step should be set in cnnl kernel.
     int32_t im2colStep = batchSize;
-    impl::camb::DiopiTensor strideTensor = impl::camb::requiresTensor(ctx, {2}, diopi_dtype_int32, diopiDevice_t::diopi_host);
-    impl::camb::DiopiTensor paddingTensor = impl::camb::requiresTensor(ctx, {4}, diopi_dtype_int32, diopiDevice_t::diopi_host);
-    impl::camb::DiopiTensor dilationTensor = impl::camb::requiresTensor(ctx, {2}, diopi_dtype_int32, diopiDevice_t::diopi_host);
-    int32_t* strideTensorPtr = (int32_t*)strideTensor.data();
-    int32_t* paddingTensorPtr = (int32_t*)paddingTensor.data();
-    int32_t* dilationTensorPtr = (int32_t*)dilationTensor.data();
-    strideTensorPtr[0] = static_cast<int32_t>(strideH);
-    strideTensorPtr[1] = static_cast<int32_t>(strideW);
-    paddingTensorPtr[0] = static_cast<int32_t>(padH);
-    paddingTensorPtr[1] = static_cast<int32_t>(padH);
-    paddingTensorPtr[2] = static_cast<int32_t>(padW);
-    paddingTensorPtr[3] = static_cast<int32_t>(padW);
-    dilationTensorPtr[0] = static_cast<int32_t>(dilationH);
-    dilationTensorPtr[1] = static_cast<int32_t>(dilationW);
+    int32_t stride[2] = {static_cast<int32_t>(strideH), static_cast<int32_t>(strideW)};
+    int32_t padding[4] = {static_cast<int32_t>(padH), static_cast<int32_t>(padH), static_cast<int32_t>(padW), static_cast<int32_t>(padW)};
+    int32_t dilation[2] = {static_cast<int32_t>(dilationH), static_cast<int32_t>(dilationW)};
 
     cnnlDCNDescriptor_t dcnDesc;
     DIOPI_CALL_CNNL(cnnlCreateDCNDescriptor(&dcnDesc));
     DIOPI_CALL_CNNL(cnnlSetDCNDescriptor(dcnDesc,
                                          inputTensor.dim(),
-                                         paddingTensorPtr,
-                                         strideTensorPtr,
-                                         dilationTensorPtr,
+                                         padding,
+                                         stride,
+                                         dilation,
                                          static_cast<int32_t>(deformableGroup),
                                          static_cast<int32_t>(group),
                                          im2colStep,
@@ -101,7 +89,7 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvMmcv(diopiContextHandl
                                                    withBias ? biasDesc.get() : nullptr,
                                                    outputDesc.get(),
                                                    &workspaceSize));
-    void* workspace = nullptr;
+    void *workspace = nullptr;
     if (workspaceSize != 0) {
         workspace = impl::camb::requiresBuffer(ctx, workspaceSize).data();
     }
@@ -130,9 +118,6 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvMmcv(diopiContextHandl
         DIOPI_CALL(impl::camb::dataTypeCast(ctx, outputTensor, outputTensorTmp));
     }
 
-    // TODO: add sync here temporarily because the tensors(padding,stride,dilation) are host tensors, and may be released ealier.
-    impl::camb::syncStreamInCtx(ctx);
-
     return diopiSuccess;
 }
 
@@ -168,7 +153,7 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvBackwardMmcv(
                     gradOutputTensor.size(3) == offsetTensor.size(3) && gradOutputTensor.size(3) == maskTensor.size(3),
                 "offset and mask should have the same spatial size as the output of the convolution");
 
-    std::vector<impl::camb::DiopiTensor*> tensors{&inputTensor, &weightTensor, &offsetTensor, &maskTensor, &biasTensor, &gradOutputTensor};
+    std::vector<impl::camb::DiopiTensor *> tensors{&inputTensor, &weightTensor, &offsetTensor, &maskTensor, &biasTensor, &gradOutputTensor};
     DIOPI_CALL(impl::camb::autoCastTensorType(ctx, tensors, {diopi_dtype_float16, diopi_dtype_float32}));
 
     impl::camb::DiopiTensor gradInputTensorTmp = gradInputTensor;
@@ -222,28 +207,17 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvBackwardMmcv(
     int32_t batchSize = static_cast<int32_t>(inputTensor.size(0));
     // im2col_step should be set in cnnl kernel.
     int32_t im2colStep = batchSize;
-    impl::camb::DiopiTensor strideTensor = impl::camb::requiresTensor(ctx, {2}, diopi_dtype_int32, diopiDevice_t::diopi_host);
-    impl::camb::DiopiTensor paddingTensor = impl::camb::requiresTensor(ctx, {4}, diopi_dtype_int32, diopiDevice_t::diopi_host);
-    impl::camb::DiopiTensor dilationTensor = impl::camb::requiresTensor(ctx, {2}, diopi_dtype_int32, diopiDevice_t::diopi_host);
-    int32_t* strideTensorPtr = (int32_t*)strideTensor.data();
-    int32_t* paddingTensorPtr = (int32_t*)paddingTensor.data();
-    int32_t* dilationTensorPtr = (int32_t*)dilationTensor.data();
-    strideTensorPtr[0] = static_cast<int32_t>(strideH);
-    strideTensorPtr[1] = static_cast<int32_t>(strideW);
-    paddingTensorPtr[0] = static_cast<int32_t>(padH);
-    paddingTensorPtr[1] = static_cast<int32_t>(padH);
-    paddingTensorPtr[2] = static_cast<int32_t>(padW);
-    paddingTensorPtr[3] = static_cast<int32_t>(padW);
-    dilationTensorPtr[0] = static_cast<int32_t>(dilationH);
-    dilationTensorPtr[1] = static_cast<int32_t>(dilationW);
+    int32_t stride[2] = {static_cast<int32_t>(strideH), static_cast<int32_t>(strideW)};
+    int32_t padding[4] = {static_cast<int32_t>(padH), static_cast<int32_t>(padH), static_cast<int32_t>(padW), static_cast<int32_t>(padW)};
+    int32_t dilation[2] = {static_cast<int32_t>(dilationH), static_cast<int32_t>(dilationW)};
 
     cnnlDCNDescriptor_t dcnDesc;
     DIOPI_CALL_CNNL(cnnlCreateDCNDescriptor(&dcnDesc));
     DIOPI_CALL_CNNL(cnnlSetDCNDescriptor(dcnDesc,
                                          inputTensor.dim(),
-                                         paddingTensorPtr,
-                                         strideTensorPtr,
-                                         dilationTensorPtr,
+                                         padding,
+                                         stride,
+                                         dilation,
                                          static_cast<int32_t>(deformableGroup),
                                          static_cast<int32_t>(group),
                                          im2colStep,
@@ -261,7 +235,7 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvBackwardMmcv(
                                                         gradOffsetDesc.get(),
                                                         gradMaskDesc.get(),
                                                         &dataWorkspaceSize));
-    void* dataWorkspace = nullptr;
+    void *dataWorkspace = nullptr;
     if (dataWorkspaceSize != 0) {
         dataWorkspace = impl::camb::requiresBuffer(ctx, dataWorkspaceSize).data();
     }
@@ -298,7 +272,7 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvBackwardMmcv(
                                                           gradWeightDesc.get(),
                                                           withBias ? gradBiasDesc.get() : nullptr,
                                                           &weightWorkspaceSize));
-    void* weightWorkspace = nullptr;
+    void *weightWorkspace = nullptr;
     if (weightWorkspaceSize != 0) {
         weightWorkspace = impl::camb::requiresBuffer(ctx, weightWorkspaceSize).data();
     }
@@ -348,8 +322,5 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvBackwardMmcv(
         DIOPI_CALL(impl::camb::dataTypeCast(ctx, gradBiasTensor, gradBiasTensorTmp));
     }
 
-    // TODO: add sync here temporarily because the tensors (padding,stride,dilation) are host tensors, and may be released ealier
-    cnrtQueueSync(impl::camb::getStream(ctx));
-
     return diopiSuccess;
 }