Skip to content

Commit

Permalink
Update demoBERT C++ inference for triton
Browse files Browse the repository at this point in the history
Signed-off-by: Rajeev Rao <rajeevrao@nvidia.com>
  • Loading branch information
rajeevsrao committed May 11, 2021
1 parent 718c13d commit faced58
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions demo/BERT/infer_c/bert_infer.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ struct BertInference
exit(-1);
}

mEngine = TrtUniquePtr<ICudaEngine>(runtime->deserializeCudaEngine(bytes.data(), bytes.size(), nullptr));
mEngine = TrtUniquePtr<ICudaEngine>(runtime->deserializeCudaEngine(bytes.data(), bytes.size()));
if (mEngine == nullptr)
{
gLogError << "Error deserializing CUDA engine\n";
Expand Down Expand Up @@ -175,7 +175,7 @@ struct BertInference
{
for (int i = 0; i < kBERT_INPUT_NUM; i++)
{
mContext->setBindingDimensions(i + bindingIdxOffset, Dims2(mSeqLength, batchSize));
mContext->setBindingDimensions(i + bindingIdxOffset, Dims2(batchSize, mSeqLength));
}
}

Expand Down

0 comments on commit faced58

Please sign in to comment.