Skip to content

Commit

Permalink
Merge branch 'DeepLink-org:main' into improve_buildATen
Browse files Browse the repository at this point in the history
  • Loading branch information
Wrench-Git authored Jun 3, 2024
2 parents 8afcf59 + e1392fe commit 2224093
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 50 deletions.
1 change: 1 addition & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ jobs:
name: Gen-Data-Op-Test-A100
runs-on: tps-diopi-ci
needs: [Build-Nvidia-A100]
if: ${{ contains( needs.Rsync.outputs.output, 'NV' ) }}
steps:
- name: gen-test-data
run: |
Expand Down
2 changes: 0 additions & 2 deletions impl/camb/functions/index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,6 @@ static diopiError_t indexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out,
true,
outputDesc.get(),
outputTensor.data()));
// TODO: add sync here temporarily because indicesPtrList is a host tensor, and may be released ealier.
syncStreamInCtx(ctx);
return diopiSuccess;
}

Expand Down
2 changes: 0 additions & 2 deletions impl/camb/functions/index_put.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,6 @@ diopiError_t diopiIndexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out, di
true,
outputDesc.get(),
outputTensor.data()));
// TODO: add sync here temporarily because indicesPtrList is a host tenso, and may be released ealier.
syncStreamInCtx(ctx);

return diopiSuccess;
}
Expand Down
63 changes: 17 additions & 46 deletions impl/camb/functions_mmcv/modulated_deform_conv_mlu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvMmcv(diopiContextHandl
outputTensor.size(3) == maskTensor.size(3),
"offset and mask should have the same spatial size as the output of the convolution");

std::vector<impl::camb::DiopiTensor*> tensors{&inputTensor, &weightTensor, &offsetTensor, &maskTensor, &biasTensor};
std::vector<impl::camb::DiopiTensor *> tensors{&inputTensor, &weightTensor, &offsetTensor, &maskTensor, &biasTensor};
DIOPI_CALL(impl::camb::autoCastTensorType(ctx, tensors, {diopi_dtype_float16, diopi_dtype_float32}));

impl::camb::DiopiTensor outputTensorTmp = outputTensor;
Expand All @@ -61,31 +61,19 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvMmcv(diopiContextHandl
impl::camb::CnnlTensorDesc biasDesc(biasTensor, CNNL_LAYOUT_ARRAY);

int32_t batchSize = static_cast<int32_t>(inputTensor.size(0));

// im2col_step should be set in cnnl kernel.
int32_t im2colStep = batchSize;
impl::camb::DiopiTensor strideTensor = impl::camb::requiresTensor(ctx, {2}, diopi_dtype_int32, diopiDevice_t::diopi_host);
impl::camb::DiopiTensor paddingTensor = impl::camb::requiresTensor(ctx, {4}, diopi_dtype_int32, diopiDevice_t::diopi_host);
impl::camb::DiopiTensor dilationTensor = impl::camb::requiresTensor(ctx, {2}, diopi_dtype_int32, diopiDevice_t::diopi_host);
int32_t* strideTensorPtr = (int32_t*)strideTensor.data();
int32_t* paddingTensorPtr = (int32_t*)paddingTensor.data();
int32_t* dilationTensorPtr = (int32_t*)dilationTensor.data();
strideTensorPtr[0] = static_cast<int32_t>(strideH);
strideTensorPtr[1] = static_cast<int32_t>(strideW);
paddingTensorPtr[0] = static_cast<int32_t>(padH);
paddingTensorPtr[1] = static_cast<int32_t>(padH);
paddingTensorPtr[2] = static_cast<int32_t>(padW);
paddingTensorPtr[3] = static_cast<int32_t>(padW);
dilationTensorPtr[0] = static_cast<int32_t>(dilationH);
dilationTensorPtr[1] = static_cast<int32_t>(dilationW);
int32_t stride[2] = {static_cast<int32_t>(strideH), static_cast<int32_t>(strideW)};
int32_t padding[4] = {static_cast<int32_t>(padH), static_cast<int32_t>(padH), static_cast<int32_t>(padW), static_cast<int32_t>(padW)};
int32_t dilation[2] = {static_cast<int32_t>(dilationH), static_cast<int32_t>(dilationW)};

cnnlDCNDescriptor_t dcnDesc;
DIOPI_CALL_CNNL(cnnlCreateDCNDescriptor(&dcnDesc));
DIOPI_CALL_CNNL(cnnlSetDCNDescriptor(dcnDesc,
inputTensor.dim(),
paddingTensorPtr,
strideTensorPtr,
dilationTensorPtr,
padding,
stride,
dilation,
static_cast<int32_t>(deformableGroup),
static_cast<int32_t>(group),
im2colStep,
Expand All @@ -101,7 +89,7 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvMmcv(diopiContextHandl
withBias ? biasDesc.get() : nullptr,
outputDesc.get(),
&workspaceSize));
void* workspace = nullptr;
void *workspace = nullptr;
if (workspaceSize != 0) {
workspace = impl::camb::requiresBuffer(ctx, workspaceSize).data();
}
Expand Down Expand Up @@ -130,9 +118,6 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvMmcv(diopiContextHandl
DIOPI_CALL(impl::camb::dataTypeCast(ctx, outputTensor, outputTensorTmp));
}

// TODO: add sync here temporarily because the tensors(padding,stride,dilation) are host tensors, and may be released ealier.
impl::camb::syncStreamInCtx(ctx);

return diopiSuccess;
}

Expand Down Expand Up @@ -168,7 +153,7 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvBackwardMmcv(
gradOutputTensor.size(3) == offsetTensor.size(3) && gradOutputTensor.size(3) == maskTensor.size(3),
"offset and mask should have the same spatial size as the output of the convolution");

std::vector<impl::camb::DiopiTensor*> tensors{&inputTensor, &weightTensor, &offsetTensor, &maskTensor, &biasTensor, &gradOutputTensor};
std::vector<impl::camb::DiopiTensor *> tensors{&inputTensor, &weightTensor, &offsetTensor, &maskTensor, &biasTensor, &gradOutputTensor};
DIOPI_CALL(impl::camb::autoCastTensorType(ctx, tensors, {diopi_dtype_float16, diopi_dtype_float32}));

impl::camb::DiopiTensor gradInputTensorTmp = gradInputTensor;
Expand Down Expand Up @@ -222,28 +207,17 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvBackwardMmcv(
int32_t batchSize = static_cast<int32_t>(inputTensor.size(0));
// im2col_step should be set in cnnl kernel.
int32_t im2colStep = batchSize;
impl::camb::DiopiTensor strideTensor = impl::camb::requiresTensor(ctx, {2}, diopi_dtype_int32, diopiDevice_t::diopi_host);
impl::camb::DiopiTensor paddingTensor = impl::camb::requiresTensor(ctx, {4}, diopi_dtype_int32, diopiDevice_t::diopi_host);
impl::camb::DiopiTensor dilationTensor = impl::camb::requiresTensor(ctx, {2}, diopi_dtype_int32, diopiDevice_t::diopi_host);
int32_t* strideTensorPtr = (int32_t*)strideTensor.data();
int32_t* paddingTensorPtr = (int32_t*)paddingTensor.data();
int32_t* dilationTensorPtr = (int32_t*)dilationTensor.data();
strideTensorPtr[0] = static_cast<int32_t>(strideH);
strideTensorPtr[1] = static_cast<int32_t>(strideW);
paddingTensorPtr[0] = static_cast<int32_t>(padH);
paddingTensorPtr[1] = static_cast<int32_t>(padH);
paddingTensorPtr[2] = static_cast<int32_t>(padW);
paddingTensorPtr[3] = static_cast<int32_t>(padW);
dilationTensorPtr[0] = static_cast<int32_t>(dilationH);
dilationTensorPtr[1] = static_cast<int32_t>(dilationW);
int32_t stride[2] = {static_cast<int32_t>(strideH), static_cast<int32_t>(strideW)};
int32_t padding[4] = {static_cast<int32_t>(padH), static_cast<int32_t>(padH), static_cast<int32_t>(padW), static_cast<int32_t>(padW)};
int32_t dilation[2] = {static_cast<int32_t>(dilationH), static_cast<int32_t>(dilationW)};

cnnlDCNDescriptor_t dcnDesc;
DIOPI_CALL_CNNL(cnnlCreateDCNDescriptor(&dcnDesc));
DIOPI_CALL_CNNL(cnnlSetDCNDescriptor(dcnDesc,
inputTensor.dim(),
paddingTensorPtr,
strideTensorPtr,
dilationTensorPtr,
padding,
stride,
dilation,
static_cast<int32_t>(deformableGroup),
static_cast<int32_t>(group),
im2colStep,
Expand All @@ -261,7 +235,7 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvBackwardMmcv(
gradOffsetDesc.get(),
gradMaskDesc.get(),
&dataWorkspaceSize));
void* dataWorkspace = nullptr;
void *dataWorkspace = nullptr;
if (dataWorkspaceSize != 0) {
dataWorkspace = impl::camb::requiresBuffer(ctx, dataWorkspaceSize).data();
}
Expand Down Expand Up @@ -298,7 +272,7 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvBackwardMmcv(
gradWeightDesc.get(),
withBias ? gradBiasDesc.get() : nullptr,
&weightWorkspaceSize));
void* weightWorkspace = nullptr;
void *weightWorkspace = nullptr;
if (weightWorkspaceSize != 0) {
weightWorkspace = impl::camb::requiresBuffer(ctx, weightWorkspaceSize).data();
}
Expand Down Expand Up @@ -348,8 +322,5 @@ extern "C" DIOPI_API diopiError_t diopiModulatedDeformConvBackwardMmcv(
DIOPI_CALL(impl::camb::dataTypeCast(ctx, gradBiasTensor, gradBiasTensorTmp));
}

// TODO: add sync here temporarily because the tensors (padding,stride,dilation) are host tensors, and may be released ealier
cnrtQueueSync(impl::camb::getStream(ctx));

return diopiSuccess;
}

0 comments on commit 2224093

Please sign in to comment.