From 886aff917b63f10a81c5f31e89752a3b46169623 Mon Sep 17 00:00:00 2001 From: Michal Guzek Date: Tue, 8 Oct 2024 10:33:07 -0700 Subject: [PATCH] TensorRT 10.5 GA Release - 2024-10-01 (#998) * Staging 10.5 release Signed-off-by: Michal Guzek * Update minor version in CMakeLists Signed-off-by: Michal Guzek --------- Signed-off-by: Michal Guzek --- CMakeLists.txt | 2 +- ConditionalHelpers.cpp | 31 +- ConditionalHelpers.hpp | 8 +- ImporterContext.hpp | 6 +- ModelImporter.cpp | 503 ++++++++-------- ModelImporter.hpp | 6 +- ModelRefitter.cpp | 84 +-- README.md | 8 +- ShapeTensor.cpp | 7 +- docs/Changelog.md | 6 + docs/operators.md | 2 +- errorHelpers.hpp | 6 +- importerUtils.cpp | 287 ++++----- importerUtils.hpp | 74 +-- onnxOpCheckers.cpp | 5 +- onnxOpImporters.cpp | 1173 +++++++++++++++++++++---------------- onnxProtoUtils.hpp | 10 +- onnx_tensorrt/__init__.py | 2 +- 18 files changed, 1208 insertions(+), 1012 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3955a6b..ad9bc6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}") # Version information #-------------------------------------------------- set(ONNX2TRT_MAJOR 10) -set(ONNX2TRT_MINOR 4) +set(ONNX2TRT_MINOR 5) set(ONNX2TRT_PATCH 0) set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version") diff --git a/ConditionalHelpers.cpp b/ConditionalHelpers.cpp index f51d796..49f6336 100644 --- a/ConditionalHelpers.cpp +++ b/ConditionalHelpers.cpp @@ -34,19 +34,19 @@ SubgraphPortsMap::const_iterator findLayer(const SubgraphPortsMap& inputs, const // Add an ConditionalInputLayer between `layer` and its inputs. // I.e. input[inIdx] -> layer ==> input[inIdx] -> ConditionalInputLayer -> layer. -Status addConditionalInputLayer(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, +void addConditionalInputLayer(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, nvinfer1::ILayer& layer, int32_t inIdx) { auto input = layer.getInput(inIdx); if (input == nullptr) { // Phantom input (an input that is really constant weights). - return Status::success(); + return; } if (layer.getType() == nvinfer1::LayerType::kCONDITIONAL_OUTPUT) { - return Status::success(); + return; } auto const name = input->getName(); @@ -70,12 +70,11 @@ Status addConditionalInputLayer(ImporterContext* ctx, nvinfer1::IIfConditional* } auto ifOutput = N_CHECK(inputLayer->getOutput(0)); layer.setInput(inIdx, *ifOutput); - return Status::success(); }; // Take a snapshot of the network before and after parsing the subgraph and return a list // of newly added network layers. -Status importSubgraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& subgraph, +void importSubgraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& subgraph, std::vector& newLayers, std::vector& subgraphTensors) { auto net = ctx->network(); @@ -85,7 +84,7 @@ Status importSubgraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& NameScope nameScope(*ctx); std::vector errors{}; - CHECK_STATUS(onnx2trt::parseGraph(ctx, subgraph, errors)); + onnx2trt::parseGraph(ctx, subgraph, errors); for (int32_t i = 0; i < subgraph.output_size(); ++i) { @@ -97,12 +96,10 @@ Status importSubgraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& { newLayers.push_back(net->getLayer(i)); } - - return Status::success(); } // Add an IConditionalInputLayer to `layer`'s inputs, if they don't already exist. -Status addConditionalInputIfNeeded(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, +void addConditionalInputIfNeeded(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, nvinfer1::ILayer& layer, SubgraphPortsMap subgraphInputsMap) { // Return all of the layer's inputs that are external to the subgraph that @@ -125,11 +122,10 @@ Status addConditionalInputIfNeeded(ImporterContext* ctx, nvinfer1::IIfConditiona LOG_VERBOSE("Adding Input layer for " << layer.getName()); addConditionalInputLayer(ctx, conditional, inputsMap, layer, inIdx); } - return Status::success(); } // Add IConditionalInputLayers to `layer`'s inputs. -Status addIfInputLayers(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, +void addIfInputLayers(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, const std::vector& newLayers) { // Find all of the tensors entering the subgraph. @@ -143,12 +139,10 @@ Status addIfInputLayers(ImporterContext* ctx, nvinfer1::IIfConditional* conditio { addConditionalInputIfNeeded(ctx, conditional, inputsMap, *layer, subgraphInputsMap); } - - return Status::success(); } // Given a subgraph, find all of its external inputs/outputs (tensors entering/exiting the subgraph). -Status getSubgraphTensors(const std::vector& newLayers, +void getSubgraphTensors(const std::vector& newLayers, std::unordered_map>& externalOutputs, bool extractOutputs, const std::vector* reportedOutputs = nullptr) { @@ -255,20 +249,19 @@ Status getSubgraphTensors(const std::vector& newLayers, externalOutputs[tensor].insert(portIndex); } } - return Status::success(); } -Status getSubgraphOutputs(const std::vector& newLayers, +void getSubgraphOutputs(const std::vector& newLayers, std::unordered_map>& externalOutputs, const std::vector& reportedOutputs) { - return getSubgraphTensors(newLayers, externalOutputs, true, &reportedOutputs); + getSubgraphTensors(newLayers, externalOutputs, true, &reportedOutputs); } -Status getSubgraphInputs(const std::vector& newLayers, +void getSubgraphInputs(const std::vector& newLayers, std::unordered_map>& externalInputs) { - return getSubgraphTensors(newLayers, externalInputs, false); + getSubgraphTensors(newLayers, externalInputs, false); } } // namespace onnx2trt diff --git a/ConditionalHelpers.hpp b/ConditionalHelpers.hpp index d4e763d..1a317f0 100644 --- a/ConditionalHelpers.hpp +++ b/ConditionalHelpers.hpp @@ -21,25 +21,25 @@ namespace onnx2trt // Given a subgraph, find all of its external inputs (tensors entering the subgraph). // The result is returned in `subgraphInputs`, which is a map indexed by ITensor (a tensor entering the subgraph) and // with values indicating a set of external input indices. -Status getSubgraphInputs(std::vector const& newLayers, +void getSubgraphInputs(std::vector const& newLayers, std::unordered_map>& subgraphInputs); // Given a subgraph, find all of its external outputs (tensors exiting the subgraph). // The result is returned in `subgraphInputs`, which is a map indexed by ITensor (a tensor exiting the subgraph) and // with values indicating a set of external outputs indices. -Status getSubgraphOutputs(const std::vector& newLayers, +void getSubgraphOutputs(const std::vector& newLayers, std::unordered_map>& subgraphOutputs, const std::vector& reportedOutputs); // Take a snapshot of the network before and after parsing the subgraph and return a list // of newly added network layers. -Status importSubgraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& subgraph, +void importSubgraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& subgraph, std::vector& newLayers, std::vector& subgraphTensors); using InputsMap = std::unordered_map; // Add IIfConditionalInputLayers to the inputs of the subgraph indicated by `subgraph`. -onnx2trt::Status addIfInputLayers(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, +void addIfInputLayers(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap, const std::vector& newLayers); } // namespace onnx2trt diff --git a/ImporterContext.hpp b/ImporterContext.hpp index fd3bb9a..7d9909a 100644 --- a/ImporterContext.hpp +++ b/ImporterContext.hpp @@ -371,9 +371,9 @@ class ImporterContext } }; -typedef ValueOrStatus> NodeImportResult; -typedef std::function& inputs)> +typedef std::vector NodeOutputs; +typedef std::function& inputs)> NodeImporter; typedef std::function const& tensors, std::vector const& locations) { - ASSERT((tensors.size() >= locations.size()) + ONNXTRT_CHECK((tensors.size() >= locations.size()) && "The size of tensors misaligns with the size of the attribute trt_outputs_loc.", nvonnxparser::ErrorCode::kINVALID_GRAPH); for (size_t i = 0; i < locations.size(); ++i) @@ -48,7 +50,7 @@ Status setTensorLocations( if (ctx->tensorLocations().count(tensor) > 0) { - ASSERT((ctx->tensorLocations()[tensor] == loc) && "The tensor location cannot be changed.", + ONNXTRT_CHECK((ctx->tensorLocations()[tensor] == loc) && "The tensor location cannot be changed.", nvonnxparser::ErrorCode::kINVALID_GRAPH); } else @@ -56,16 +58,14 @@ Status setTensorLocations( ctx->tensorLocations()[tensor] = loc; } } - - return Status::success(); } // Helper for deserializing INetwork template -Status setStringMap( +void setStringMap( ImporterContext* ctx, std::vector const& tensors, std::vector const& data, StringMap& map) { - ASSERT((tensors.size() >= data.size()) + ONNXTRT_CHECK((tensors.size() >= data.size()) && "The size of tensors misaligns with the size of the attribute trt_outputs_range_min/max.", nvonnxparser::ErrorCode::kINVALID_GRAPH); for (size_t i = 0; i < data.size(); ++i) @@ -74,14 +74,13 @@ Status setStringMap( T dataName = data.at(i); if (map.count(name) > 0) { - ASSERT( (map[name] == dataName) && "The order of tensorRangeMin/Max in context misaligns with the order of the attribute trt_outputs_range_min/max.", nvonnxparser::ErrorCode::kINVALID_GRAPH); + ONNXTRT_CHECK( (map[name] == dataName) && "The order of tensorRangeMin/Max in context misaligns with the order of the attribute trt_outputs_range_min/max.", nvonnxparser::ErrorCode::kINVALID_GRAPH); } else { map[name] = dataName; } } - return Status::success(); } //! Make error explanation from TensorRT error recorder. @@ -109,7 +108,7 @@ static std::string makeErrorExplanation(std::exception const& e, std::string con return result.str(); } -Status parseNode( +void parseNode( ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, bool deserializingINetwork) { // For nodes that contain subgraphs (Ifs, Loops, Scans, LocalFunctions), ensure that the recursion depth is @@ -117,8 +116,8 @@ Status parseNode( size_t const kMAX_NESTED_SUBGRAPHS = 24; if (ctx->getNestedDepth() > kMAX_NESTED_SUBGRAPHS) { - ASSERT(false && "ONNX graph contains nested structures that exceed the maximum allowed by TensorRT!", - ErrorCode::kUNSUPPORTED_GRAPH); + ONNXTRT_THROW(MAKE_ERROR("ONNX graph contains nested structures that exceed the maximum allowed by TensorRT!", + ErrorCode::kUNSUPPORTED_GRAPH)); } StringMap const& opImporters = getBuiltinOpImporterMap(); std::string const& nodeName = getNodeName(node); @@ -141,11 +140,12 @@ Status parseNode( else { LOG_VERBOSE("Searching for input: " << inputName); - ASSERT_NODE((ctx->tensors().count(inputName)), "Node input was not registered.", node, nodeIdx, + ONNXTRT_CHECK_NODE((ctx->tensors().count(inputName)), "Node input was not registered.", node, nodeIdx, ErrorCode::kINVALID_GRAPH); nodeInputs.push_back(ctx->tensors().at(inputName)); ssInputs << "[" << inputName << " -> " << nodeInputs.back().shape() << "[" << nodeInputs.back().getType() - << "]" << "], "; + << "]" + << "], "; } } LOG_VERBOSE(ssInputs.str()); @@ -166,19 +166,19 @@ Status parseNode( LOG_INFO("No importer registered for op: " << nodeType << ". Attempting to import as plugin."); importFunc = &opImporters.at("FallbackPluginImporter"); } - std::vector outputs; + std::vector outputs; try { - GET_VALUE((*importFunc)(ctx, node, nodeIdx, nodeInputs), &outputs); + outputs = (*importFunc)(ctx, node, nodeIdx, nodeInputs); } - catch (std::exception const& e) + catch (OnnxTrtException& e) { - return MAKE_NODE_ERROR(makeErrorExplanation(e, nodeName), ErrorCode::kINTERNAL_ERROR, node, nodeIdx); + throw e; } - if (ctx->hasError()) + catch (std::exception& e) { - return MAKE_NODE_ERROR(makeErrorExplanation(ctx, nodeName), ErrorCode::kINVALID_NODE, node, nodeIdx); + ONNXTRT_THROW(MAKE_NODE_ERROR(makeErrorExplanation(ctx, nodeName), ErrorCode::kINVALID_NODE, node, nodeIdx)); } ctx->addLayerOutputTensors(nodeName, outputs); @@ -189,10 +189,12 @@ Status parseNode( // check that we can resolve output dims // in the future we may have a network/layer.validate() which will help with that as well output.tensor().getDimensions(); + // If output dimensions cannot be resolved the error will be captured by the ErrorRecorder. if (ctx->hasError()) { - return MAKE_NODE_ERROR(makeErrorExplanation(ctx, nodeName), ErrorCode::kINVALID_NODE, node, nodeIdx); + ONNXTRT_THROW( + MAKE_NODE_ERROR(makeErrorExplanation(ctx, nodeName), ErrorCode::kINVALID_NODE, node, nodeIdx)); } } } @@ -205,21 +207,21 @@ Status parseNode( std::vector outputsLocation = attrs.get>("trt_outputs_loc", {}); std::vector outputsVec(node.output().begin(), node.output().end()); std::vector layerName{nodeName}; - CHECK_STATUS(setTensorLocations(ctx, outputsVec, outputsLocation)); + setTensorLocations(ctx, outputsVec, outputsLocation); auto outputsRangeMin = attrs.get>("trt_outputs_range_min", {}); - CHECK_STATUS(setStringMap(ctx, outputsVec, outputsRangeMin, ctx->tensorRangeMins())); + setStringMap(ctx, outputsVec, outputsRangeMin, ctx->tensorRangeMins()); auto outputsRangeMax = attrs.get>("trt_outputs_range_max", {}); - CHECK_STATUS(setStringMap(ctx, outputsVec, outputsRangeMax, ctx->tensorRangeMaxes())); + setStringMap(ctx, outputsVec, outputsRangeMax, ctx->tensorRangeMaxes()); if (attrs.count("trt_layer_precision")) { std::vector layerPrecision{attrs.get("trt_layer_precision")}; - CHECK_STATUS(setStringMap(ctx, layerName, layerPrecision, ctx->layerPrecisions())); + setStringMap(ctx, layerName, layerPrecision, ctx->layerPrecisions()); } } - ASSERT_NODE((node.output().size() <= static_cast(outputs.size())), + ONNXTRT_CHECK_NODE((node.output().size() <= static_cast(outputs.size())), "Node has more output tensors than TRT expected, expected output size is " << outputs.size() << ", actual output size is " << node.output().size() << ".", node, nodeIdx, ErrorCode::kINVALID_GRAPH); @@ -250,12 +252,11 @@ Status parseNode( legalUINT8 = true; } } - ASSERT_NODE(legalUINT8, "TensorRT does not support UINT8 types for intermediate tensors!", node, nodeIdx, - ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(legalUINT8, "TensorRT does not support UINT8 types for intermediate tensors!", node, + nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } } LOG_VERBOSE(ssOutputs.str()); - return Status::success(); } void parseNodeStaticCheck( @@ -296,7 +297,7 @@ void parseNodeStaticCheck( (*checkerFunc)(ctx, node, errors, nodeIndex); } -Status parseGraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& graph, std::vector& errors, +void parseGraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& graph, std::vector& errors, bool deserializingINetwork, int* currentNode) { // Import initializers. @@ -306,7 +307,7 @@ Status parseGraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& grap { LOG_VERBOSE("Importing initializer: " << initializer.name()); ShapedWeights weights; - ASSERT( + ONNXTRT_CHECK( ctx->getWeightsContext().convertOnnxWeights(initializer, &weights) && "Failed to import initializer.", ErrorCode::kUNSUPPORTED_NODE); ctx->registerTensor(TensorOrWeights{std::move(weights)}, initializer.name()); @@ -314,7 +315,7 @@ Status parseGraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& grap } catch (const std::exception& e) { - ASSERT(false && "Failed to import initialzer", ErrorCode::kINVALID_GRAPH); + ONNXTRT_THROW(MAKE_ERROR("Failed to import initialzer", ErrorCode::kINVALID_GRAPH)); } // Keep track of graph outputs in the context to validate UINT8 nodes @@ -324,7 +325,8 @@ Status parseGraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& grap } std::vector topoOrder; - ASSERT(toposort(graph.node(), &topoOrder) && "Failed to sort the model topologically.", ErrorCode::kINVALID_GRAPH); + ONNXTRT_CHECK( + toposort(graph.node(), &topoOrder) && "Failed to sort the model topologically.", ErrorCode::kINVALID_GRAPH); for (auto const& nodeIndex : topoOrder) { @@ -336,22 +338,21 @@ Status parseGraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& grap if (errors.size() == 0) { // At most one dynamic error will be returned. - auto status = parseNode(ctx, graph.node(nodeIndex), nodeIndex, deserializingINetwork); - if (status.is_error()) - { - errors.push_back(status); - } + parseNode(ctx, graph.node(nodeIndex), nodeIndex, deserializingINetwork); } } + + // Static check still reports error through the error vector by design if (errors.size() != 0) { auto result = errors.back(); errors.pop_back(); // this error will be added back to the list in ModelImporter::parseWithWeightDescriptors. - return result; + + ONNXTRT_THROW(result); } - return Status::success(); } +// Still returns a vector since CHECK_INPUT doesn't immediately return std::vector importInput(ImporterContext* ctx, ::ONNX_NAMESPACE::ValueInfoProto const& input, nvinfer1::ITensor** tensor, std::vector& namedDims) { @@ -384,17 +385,16 @@ std::vector importInput(ImporterContext* ctx, ::ONNX_NAMESPACE::ValueInf return errorList; } -static Status setDimensionNames(ImporterContext* ctx, std::vector& namedDims) +static void setDimensionNames(ImporterContext* ctx, std::vector& namedDims) { for (auto const& namedDim : namedDims) { namedDim.tensor->setDimensionName(namedDim.index, namedDim.dimParam.c_str()); } - return Status::success(); } -Status importInputs(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& graph, - StringMap* tensors, std::vector& errors) +void importInputs(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& graph, StringMap* tensors, + std::vector& errors) { // The weights come from the Initializer list in onnx graph // Initializers are not really network inputs, so they need to be excluded. @@ -425,18 +425,17 @@ Status importInputs(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& gr if (!statusList.empty()) { errors.insert(errors.end(), statusList.begin(), statusList.end()); - return statusList[0]; + return; } - return setDimensionNames(ctx, namedDims); + setDimensionNames(ctx, namedDims); } -Status importLocalFunctions(ImporterContext* ctx, ::ONNX_NAMESPACE::ModelProto const& model) +void importLocalFunctions(ImporterContext* ctx, ::ONNX_NAMESPACE::ModelProto const& model) { for (auto const& localFunction : model.functions()) { ctx->localFunctions().insert({localFunction.name(), localFunction}); } - return Status::success(); } // Internal helper function used for ONNXRT-TRT EP to filter out DDS nodes @@ -454,13 +453,7 @@ std::pair ModelImporter::doSupport void const* serialized_onnx_model, size_t serialized_onnx_model_size, char const* model_path) { ::ONNX_NAMESPACE::ModelProto model; - Status status = deserializeOnnxModel(serialized_onnx_model, serialized_onnx_model_size, &model); - - if (status.is_error()) - { - mErrors.push_back(status); - return std::make_pair(false, {}); - } + deserializeOnnxModel(serialized_onnx_model, serialized_onnx_model_size, &model); if (model_path) { @@ -624,8 +617,8 @@ bool ModelImporter::isSubgraphSupported(int64_t const index) noexcept errorMessage << "Query index " << index << " exceeds subgraph support vector (size = " << mSubGraphSupportVector.size() << "). Have you called supports_model_v2?"; - ONNXTRT_CHECK(mSubGraphSupportVector.size() > static_cast(index), - MAKE_ERROR(errorMessage.str(), ErrorCode::kINVALID_VALUE)); + ONNXTRT_CHECK(mSubGraphSupportVector.size() > static_cast(index) && errorMessage.str().c_str(), + ErrorCode::kINVALID_VALUE); return mSubGraphSupportVector[index].second; } ONNXTRT_CATCH_RECORD @@ -640,8 +633,8 @@ int64_t* ModelImporter::getSubgraphNodes(int64_t const index, int64_t& subgraphL errorMessage << "Query index " << index << " exceeds subgraph support vector (size = " << mSubGraphSupportVector.size() << "). Have you called supports_model_v2?"; - ONNXTRT_CHECK(mSubGraphSupportVector.size() > static_cast(index), - MAKE_ERROR(errorMessage.str(), ErrorCode::kINVALID_VALUE)); + ONNXTRT_CHECK(mSubGraphSupportVector.size() > static_cast(index) && errorMessage.str().c_str(), + ErrorCode::kINVALID_VALUE); subgraphLength = mSubGraphSupportVector[index].first.size(); return mSubGraphSupportVector[index].first.data(); } @@ -673,18 +666,8 @@ bool ModelImporter::parseWithWeightDescriptors( // Note: We store a copy of the model so that weight arrays will persist mONNXModels.emplace_back(); ::ONNX_NAMESPACE::ModelProto& model = mONNXModels.back(); - Status status = deserializeOnnxModel(serialized_onnx_model, serialized_onnx_model_size, &model); - if (status.is_error()) - { - mErrors.push_back(status); - return false; - } - status = this->importModel(model); - if (status.is_error()) - { - mErrors.push_back(status); - return false; - } + deserializeOnnxModel(serialized_onnx_model, serialized_onnx_model_size, &model); + importModel(model); return true; } ONNXTRT_CATCH_RECORD @@ -715,245 +698,253 @@ bool ModelImporter::parse( return false; } -Status ModelImporter::importModel(::ONNX_NAMESPACE::ModelProto const& model) noexcept +void ModelImporter::importModel(::ONNX_NAMESPACE::ModelProto const& model) { - ONNXTRT_TRY - { - auto* ctx = &mImporterCtx; - mImporterCtx.clearOpsets(); - // Add domain import limit for security reasons - int32_t const MAX_DOMAINS = 1024; - ASSERT(model.opset_import().size() <= MAX_DOMAINS - && "Model contains more than 1024 domains! Parsing will halt for security reasons.", - ErrorCode::kUNSUPPORTED_GRAPH); - for (int32_t i = 0; i < model.opset_import().size(); ++i) - { - - std::string domain = model.opset_import(i).domain(); - int64_t version = model.opset_import(i).version(); - // TensorRT requires an ONNX graph to be generated with at least ai.onnx version 7. - // ONNX spec says that the default domain is either an empty string or is "ai.onnx". - if ((domain.empty() || domain == "ai.onnx") && version < 7) - { - LOG_WARNING( - "TensorRT supports ONNX graphs generated with at least opset 7. Models using older opsets are not " - "guaranteed to work."); - } - mImporterCtx.addOpset(domain, version); - } - ::ONNX_NAMESPACE::GraphProto const& graph = model.graph(); - // Create a dummy tensors so that we can reserve output names. If the output names are encountered elsewhere - // in the graph, the ctx will know to make the names unique. - for (::ONNX_NAMESPACE::ValueInfoProto const& output : graph.output()) + auto* ctx = &mImporterCtx; + mImporterCtx.clearOpsets(); + // Add domain import limit for security reasons + int32_t const MAX_DOMAINS = 1024; + ONNXTRT_CHECK(model.opset_import().size() <= MAX_DOMAINS + && "Model contains more than 1024 domains! Parsing will halt for security reasons.", + ErrorCode::kUNSUPPORTED_GRAPH); + for (int32_t i = 0; i < model.opset_import().size(); ++i) + { + std::string domain = model.opset_import(i).domain(); + int64_t version = model.opset_import(i).version(); + // TensorRT requires an ONNX graph to be generated with at least ai.onnx version 7. + // ONNX spec says that the default domain is either an empty string or is "ai.onnx". + if ((domain.empty() || domain == "ai.onnx") && version < 7) { - mImporterCtx.registerTensor(TensorOrWeights{}, output.name()); + LOG_WARNING( + "TensorRT supports ONNX graphs generated with at least opset 7. Models using older opsets are not " + "guaranteed to work."); } + mImporterCtx.addOpset(domain, version); + } + ::ONNX_NAMESPACE::GraphProto const& graph = model.graph(); + // Create a dummy tensors so that we can reserve output names. If the output names are encountered elsewhere + // in the graph, the ctx will know to make the names unique. + for (::ONNX_NAMESPACE::ValueInfoProto const& output : graph.output()) + { + mImporterCtx.registerTensor(TensorOrWeights{}, output.name()); + } - // Import LocalFunctions - CHECK_STATUS(importLocalFunctions(&mImporterCtx, model)); + // Import LocalFunctions + importLocalFunctions(&mImporterCtx, model); - // Propagate OnnxParserFlags down to the importer context. - mImporterCtx.setFlags(getFlags()); + // Propagate OnnxParserFlags down to the importer context. + mImporterCtx.setFlags(getFlags()); - mCurrentNode = -1; - importInputs(&mImporterCtx, graph, &mImporterCtx.tensors(), mErrors); - CHECK_STATUS(parseGraph(&mImporterCtx, graph, mErrors, model.producer_name() == "TensorRT", &mCurrentNode)); + mCurrentNode = -1; + importInputs(&mImporterCtx, graph, &mImporterCtx.tensors(), mErrors); + parseGraph(&mImporterCtx, graph, mErrors, model.producer_name() == "TensorRT", &mCurrentNode); - mCurrentNode = -1; - // Mark outputs defined in the ONNX model (unless tensors are user-requested) - for (::ONNX_NAMESPACE::ValueInfoProto const& output : graph.output()) + mCurrentNode = -1; + // Mark outputs defined in the ONNX model (unless tensors are user-requested) + for (::ONNX_NAMESPACE::ValueInfoProto const& output : graph.output()) + { + ONNXTRT_CHECK((mImporterCtx.tensors().count(output.name())) && "The output tensor was not registered.", + ErrorCode::kINVALID_GRAPH); + nvinfer1::ITensor* output_tensor_ptr + = &convertToTensor(mImporterCtx.tensors().at(output.name()), &mImporterCtx); + LOG_VERBOSE("Marking " << output_tensor_ptr->getName() << " as output: " << output.name()); + output_tensor_ptr->setName(output.name().c_str()); + + if (output_tensor_ptr->isNetworkInput()) { - ASSERT((mImporterCtx.tensors().count(output.name())) && "The output tensor was not registered.", - ErrorCode::kINVALID_GRAPH); - nvinfer1::ITensor* output_tensor_ptr - = &convertToTensor(mImporterCtx.tensors().at(output.name()), &mImporterCtx); - LOG_VERBOSE("Marking " << output_tensor_ptr->getName() << " as output: " << output.name()); + // HACK WAR for TRT not allowing input == output + // TODO: Does this break things by changing the name of the input tensor? + output_tensor_ptr->setName(("__" + output.name()).c_str()); + output_tensor_ptr = &identity(&mImporterCtx, output_tensor_ptr).tensor(); + ONNXTRT_CHECK(output_tensor_ptr && "Failed to add an Identity layer.", ErrorCode::kUNSUPPORTED_NODE); output_tensor_ptr->setName(output.name().c_str()); + } - if (output_tensor_ptr->isNetworkInput()) + mImporterCtx.network()->markOutput(*output_tensor_ptr); + nvinfer1::DataType output_trt_dtype; + + ONNXTRT_CHECK(convertDtype(output.type().tensor_type().elem_type(), &output_trt_dtype) + && "Failed to convert ONNX date type to TensorRT data type.", + ErrorCode::kUNSUPPORTED_NODE); + // For INT32 data type, output type must match tensor type + ONNXTRT_CHECK((output_tensor_ptr->getType() != nvinfer1::DataType::kINT32 + || output_trt_dtype == nvinfer1::DataType::kINT32) + && "For INT32 tensors, the output type must also be INT32.", + ErrorCode::kUNSUPPORTED_NODE); + // Note: Without this, output type is always float32 + output_tensor_ptr->setType(output_trt_dtype); + if (output_trt_dtype == nvinfer1::DataType::kINT64) + { + LOG_WARNING("Make sure output " << output.name() << " has Int64 binding."); + } + } + + if (model.producer_name() == "TensorRT") + { + // iterate over all tensors in the network and add them to "tensors" map + StringMap tensors; + StringMap layers; + for (int32_t idx = 0; idx < mImporterCtx.network()->getNbInputs(); ++idx) + { + nvinfer1::ITensor* tensor = mImporterCtx.network()->getInput(idx); + if (tensor != nullptr) { - // HACK WAR for TRT not allowing input == output - // TODO: Does this break things by changing the name of the input tensor? - output_tensor_ptr->setName(("__" + output.name()).c_str()); - output_tensor_ptr = &identity(&mImporterCtx, output_tensor_ptr).tensor(); - ASSERT(output_tensor_ptr && "Failed to add an Identity layer.", ErrorCode::kUNSUPPORTED_NODE); - output_tensor_ptr->setName(output.name().c_str()); + tensors[tensor->getName()] = tensor; } - - mImporterCtx.network()->markOutput(*output_tensor_ptr); - nvinfer1::DataType output_trt_dtype; - ASSERT(convertDtype(output.type().tensor_type().elem_type(), &output_trt_dtype) - && "Failed to convert ONNX date type to TensorRT data type.", - ErrorCode::kUNSUPPORTED_NODE); - // For INT32 data type, output type must match tensor type - ASSERT((output_tensor_ptr->getType() != nvinfer1::DataType::kINT32 - || output_trt_dtype == nvinfer1::DataType::kINT32) - && "For INT32 tensors, the output type must also be INT32.", - ErrorCode::kUNSUPPORTED_NODE); - // Note: Without this, output type is always float32 - output_tensor_ptr->setType(output_trt_dtype); - if (output_trt_dtype == nvinfer1::DataType::kINT64) + } + for (int32_t idx = 0; idx < mImporterCtx.network()->getNbOutputs(); ++idx) + { + nvinfer1::ITensor* tensor = mImporterCtx.network()->getOutput(idx); + if (tensor != nullptr) { - LOG_WARNING("Make sure output " << output.name() << " has Int64 binding."); + tensors[tensor->getName()] = tensor; } } - - if (model.producer_name() == "TensorRT") + for (int32_t layerIdx = 0; layerIdx < mImporterCtx.network()->getNbLayers(); ++layerIdx) { - // iterate over all tensors in the network and add them to "tensors" map - StringMap tensors; - StringMap layers; - for (int32_t idx = 0; idx < mImporterCtx.network()->getNbInputs(); ++idx) + nvinfer1::ILayer* layer = mImporterCtx.network()->getLayer(layerIdx); + for (int32_t idx = 0; idx < layer->getNbInputs(); ++idx) { - nvinfer1::ITensor* tensor = mImporterCtx.network()->getInput(idx); + nvinfer1::ITensor* tensor = layer->getInput(idx); if (tensor != nullptr) { tensors[tensor->getName()] = tensor; } } - for (int32_t idx = 0; idx < mImporterCtx.network()->getNbOutputs(); ++idx) + for (int32_t idx = 0; idx < layer->getNbOutputs(); ++idx) { - nvinfer1::ITensor* tensor = mImporterCtx.network()->getOutput(idx); + nvinfer1::ITensor* tensor = layer->getOutput(idx); if (tensor != nullptr) { tensors[tensor->getName()] = tensor; } } - for (int32_t layerIdx = 0; layerIdx < mImporterCtx.network()->getNbLayers(); ++layerIdx) - { - nvinfer1::ILayer* layer = mImporterCtx.network()->getLayer(layerIdx); - for (int32_t idx = 0; idx < layer->getNbInputs(); ++idx) - { - nvinfer1::ITensor* tensor = layer->getInput(idx); - if (tensor != nullptr) - { - tensors[tensor->getName()] = tensor; - } - } - for (int32_t idx = 0; idx < layer->getNbOutputs(); ++idx) - { - nvinfer1::ITensor* tensor = layer->getOutput(idx); - if (tensor != nullptr) - { - tensors[tensor->getName()] = tensor; - } - } - layers[layer->getName()] = layer; - } + layers[layer->getName()] = layer; + } - // Set locations for all tensors - for (auto const& tensor : ctx->tensorLocations()) + // Set locations for all tensors + for (auto const& tensor : ctx->tensorLocations()) + { + ONNXTRT_CHECK((tensors.count(tensor.first) > 0) && "The tensor does not have an assigned location.", + nvonnxparser::ErrorCode::kINVALID_GRAPH); + tensors.at(tensor.first)->setLocation(tensor.second); + } + // Set dynamic range for all tensors + for (auto const& tensor : ctx->tensorRangeMins()) + { + // if there's a min range, there must be a max range as well + ONNXTRT_CHECK((tensors.count(tensor.first) > 0) && "The tensor does not have an assigned location.", + nvonnxparser::ErrorCode::kINVALID_GRAPH); + if (!std::isnan(tensor.second)) { - ASSERT((tensors.count(tensor.first) > 0) && "The tensor does not have an assigned location.", - nvonnxparser::ErrorCode::kINVALID_GRAPH); - tensors.at(tensor.first)->setLocation(tensor.second); + tensors.at(tensor.first)->setDynamicRange(tensor.second, ctx->tensorRangeMaxes().at(tensor.first)); } - // Set dynamic range for all tensors - for (auto const& tensor : ctx->tensorRangeMins()) + } + // Avoid setting layer precision if graph is strongly typed. + if (!ctx->network()->getFlag(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED)) + { + // Set precisions for all layers. + for (auto const& layer : ctx->layerPrecisions()) { - // if there's a min range, there must be a max range as well - ASSERT((tensors.count(tensor.first) > 0) && "The tensor does not have an assigned location.", + ONNXTRT_CHECK((layers.count(layer.first) > 0) && "The layer does not have an assigned precision.", nvonnxparser::ErrorCode::kINVALID_GRAPH); - if (!std::isnan(tensor.second)) - { - tensors.at(tensor.first)->setDynamicRange(tensor.second, ctx->tensorRangeMaxes().at(tensor.first)); - } - } - // Avoid setting layer precision if graph is strongly typed. - if (!ctx->network()->getFlag(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED)) - { - // Set precisions for all layers. - for (auto const& layer : ctx->layerPrecisions()) - { - ASSERT((layers.count(layer.first) > 0) && "The layer does not have an assigned precision.", - nvonnxparser::ErrorCode::kINVALID_GRAPH); - layers.at(layer.first)->setPrecision(layer.second); - } + layers.at(layer.first)->setPrecision(layer.second); } } + } - // Regenerate the plugin library list - mPluginLibraryList = ctx->getUsedVCPluginLibraries(); - mPluginLibraryListCStr.clear(); - mPluginLibraryListCStr.reserve(mPluginLibraryList.size()); - for (auto const& s : mPluginLibraryList) - { - mPluginLibraryListCStr.push_back(s.c_str()); - } - - return Status::success(); + // Regenerate the plugin library list + mPluginLibraryList = ctx->getUsedVCPluginLibraries(); + mPluginLibraryListCStr.clear(); + mPluginLibraryListCStr.reserve(mPluginLibraryList.size()); + for (auto const& s : mPluginLibraryList) + { + mPluginLibraryListCStr.push_back(s.c_str()); } - ONNXTRT_CATCH_RECORD - return Status{ErrorCode::kINTERNAL_ERROR}; } bool ModelImporter::parseFromFile(char const* onnxModelFile, int32_t verbosity) noexcept { - auto* ctx = &mImporterCtx; + ONNXTRT_TRY + { + auto* ctx = &mImporterCtx; - // Define S_ISREG macro for Windows + // Define S_ISREG macro for Windows #if !defined(S_ISREG) #define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) #endif - struct stat sb; - if (stat(onnxModelFile, &sb) == 0 && !S_ISREG(sb.st_mode)) - { - LOG_ERROR("Input is not a regular file: " << onnxModelFile); - return false; - } - - GOOGLE_PROTOBUF_VERIFY_VERSION; + struct stat sb; + if (stat(onnxModelFile, &sb) == 0 && !S_ISREG(sb.st_mode)) + { + LOG_ERROR("Input is not a regular file: " << onnxModelFile); + return false; + } - // Own the ONNX model for weights to persist. - mONNXModels.emplace_back(); - ::ONNX_NAMESPACE::ModelProto& onnxModel = mONNXModels.back(); - bool const fileLoadSuccess = ParseFromFileAsBinary(&onnxModel, onnxModelFile); - if (!fileLoadSuccess) - { - LOG_ERROR("Failed to parse ONNX model from file: " << onnxModelFile << "!"); - return false; - } + GOOGLE_PROTOBUF_VERIFY_VERSION; - // Keep track of the absolute path to the ONNX file. - mImporterCtx.setOnnxFileLocation(onnxModelFile); + // Own the ONNX model for weights to persist. + mONNXModels.emplace_back(); + ::ONNX_NAMESPACE::ModelProto& onnxModel = mONNXModels.back(); + bool const fileLoadSuccess = ParseFromFileAsBinary(&onnxModel, onnxModelFile); + if (!fileLoadSuccess) + { + LOG_ERROR("Failed to parse ONNX model from file: " << onnxModelFile << "!"); + return false; + } - int64_t const opset_version = (onnxModel.opset_import().size() ? onnxModel.opset_import(0).version() : 0); - LOG_INFO("----------------------------------------------------------------"); - LOG_INFO("Input filename: " << onnxModelFile); - LOG_INFO("ONNX IR version: " << onnxIRVersionAsString(onnxModel.ir_version())); - LOG_INFO("Opset version: " << opset_version); - LOG_INFO("Producer name: " << onnxModel.producer_name()); - LOG_INFO("Producer version: " << onnxModel.producer_version()); - LOG_INFO("Domain: " << onnxModel.domain()); - LOG_INFO("Model version: " << onnxModel.model_version()); - LOG_INFO("Doc string: " << onnxModel.doc_string()); - LOG_INFO("----------------------------------------------------------------"); + // Keep track of the absolute path to the ONNX file. + mImporterCtx.setOnnxFileLocation(onnxModelFile); + + int64_t const opset_version = (onnxModel.opset_import().size() ? onnxModel.opset_import(0).version() : 0); + LOG_INFO("----------------------------------------------------------------"); + LOG_INFO("Input filename: " << onnxModelFile); + LOG_INFO("ONNX IR version: " << onnxIRVersionAsString(onnxModel.ir_version())); + LOG_INFO("Opset version: " << opset_version); + LOG_INFO("Producer name: " << onnxModel.producer_name()); + LOG_INFO("Producer version: " << onnxModel.producer_version()); + LOG_INFO("Domain: " << onnxModel.domain()); + LOG_INFO("Model version: " << onnxModel.model_version()); + LOG_INFO("Doc string: " << onnxModel.doc_string()); + LOG_INFO("----------------------------------------------------------------"); + + // Set currentNode count to -1 + mCurrentNode = -1; - // Set currentNode count to -1 - mCurrentNode = -1; - Status status = this->importModel(onnxModel); - if (status.is_error()) - { - mErrors.push_back(status); - } + // Prevent failure of importModel from early-exiting + try + { + this->importModel(onnxModel); + } + catch (OnnxTrtException& e) + { + mErrors.push_back(e.getStatus()); + } + catch (std::exception& e) + { + mErrors.push_back(MAKE_ERROR(e.what(), ErrorCode::kINTERNAL_ERROR)); + } - int32_t const numErrors = getNbErrors(); - for (int32_t i = 0; i < numErrors; ++i) - { - nvonnxparser::IParserError const* error = getError(i); - if (error->node() != -1) + int32_t const numErrors = getNbErrors(); + for (int32_t i = 0; i < numErrors; ++i) { - ::ONNX_NAMESPACE::NodeProto const& node = onnxModel.graph().node(error->node()); - LOG_ERROR("While parsing node number " << error->node() << " [" << node.op_type() << " -> \"" - << node.output(0) << "\"" << "]:"); - LOG_ERROR("--- Begin node ---" << "\n" << node); - LOG_ERROR("--- End node ---"); + nvonnxparser::IParserError const* error = getError(i); + if (error->node() != -1) + { + ::ONNX_NAMESPACE::NodeProto const& node = onnxModel.graph().node(error->node()); + LOG_ERROR("While parsing node number " << error->node() << " [" << node.op_type() << " -> \"" + << node.output(0) << "\"" + << "]:"); + LOG_ERROR("--- Begin node ---" << "\n" << node); + LOG_ERROR("--- End node ---"); + } + LOG_ERROR("ERROR: " << error->file() << ":" << error->line() << " In function " << error->func() << ":\n" + << "[" << static_cast(error->code()) << "] " << error->desc()); } - LOG_ERROR("ERROR: " << error->file() << ":" << error->line() << " In function " << error->func() << ":\n" - << "[" << static_cast(error->code()) << "] " << error->desc()); + return numErrors == 0; } - return numErrors == 0; + ONNXTRT_CATCH_RECORD + return false; } char const* const* ModelImporter::getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept diff --git a/ModelImporter.hpp b/ModelImporter.hpp index 7a64d7d..ae194cb 100644 --- a/ModelImporter.hpp +++ b/ModelImporter.hpp @@ -15,13 +15,13 @@ namespace onnx2trt { -Status parseNode(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, +void parseNode(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, bool deserializingINetwork = false); void parseNodeStaticCheck( ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector& errors, size_t const nodeIndex); -Status parseGraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& graph, std::vector& errors, +void parseGraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& graph, std::vector& errors, bool deserializingINetwork = false, int32_t* currentNode = nullptr); class ModelImporter : public nvonnxparser::IParser @@ -31,7 +31,7 @@ class ModelImporter : public nvonnxparser::IParser protected: StringMap _op_importers; - virtual Status importModel(::ONNX_NAMESPACE::ModelProto const& model) noexcept; + virtual void importModel(::ONNX_NAMESPACE::ModelProto const& model); private: ImporterContext mImporterCtx; diff --git a/ModelRefitter.cpp b/ModelRefitter.cpp index a053786..7a3bcec 100644 --- a/ModelRefitter.cpp +++ b/ModelRefitter.cpp @@ -28,14 +28,14 @@ void deserializeOnnxModelFile(char const* onnxModelFile, ::ONNX_NAMESPACE::Model #endif struct stat sb; - ONNXTRT_CHECK(!(stat(onnxModelFile, &sb) == 0 && !S_ISREG(sb.st_mode)), - MAKE_ERROR( - "Failed to parse the ONNX model; input is not a regular file.", ErrorCode::kMODEL_DESERIALIZE_FAILED)); + ONNXTRT_CHECK(!(stat(onnxModelFile, &sb) == 0 && !S_ISREG(sb.st_mode)) + && "Failed to parse the ONNX model; input is not a regular file.", + ErrorCode::kMODEL_DESERIALIZE_FAILED); GOOGLE_PROTOBUF_VERIFY_VERSION; bool const fileLoadSuccess = ParseFromFileAsBinary(&onnx_model, onnxModelFile); - ONNXTRT_CHECK(fileLoadSuccess, MAKE_ERROR("Failed to parse the ONNX model!", ErrorCode::kMODEL_DESERIALIZE_FAILED)); + ONNXTRT_CHECK(fileLoadSuccess && "Failed to parse the ONNX model!", ErrorCode::kMODEL_DESERIALIZE_FAILED); } } // anonymous namespace @@ -87,8 +87,7 @@ size_t ModelRefitter::batchnormWeightRefitter( // Validate that all the weights have the same amount of values bool allSame = scale.count() == bias.count() && mean.count() == scale.count() && variance.count() == scale.count() && combinedScale.count() == scale.count() && combinedBias.count() == scale.count(); - ONNXTRT_CHECK( - allSame, MAKE_ERROR("Inputs to BatchNormalization must have the same shape!", ErrorCode::kREFIT_FAILED)); + ONNXTRT_CHECK(allSame && "Inputs to BatchNormalization must have the same shape!", ErrorCode::kREFIT_FAILED); for (int32_t i = 0; i < nbChannels; ++i) { @@ -99,15 +98,17 @@ size_t ModelRefitter::batchnormWeightRefitter( if (refittableWeights.count(combinedScale.name)) { refittableWeights.erase(combinedScale.name); - ONNXTRT_CHECK(mRefitter->setNamedWeights(combinedScale.name, std::move(combinedScale)), - MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED)); + ONNXTRT_CHECK( + mRefitter->setNamedWeights(combinedScale.name, std::move(combinedScale)) && "Failed to set named weights", + ErrorCode::kREFIT_FAILED); ++successfullyRefittedWeights; } if (refittableWeights.count(combinedBias.name)) { refittableWeights.erase(combinedBias.name); - ONNXTRT_CHECK(mRefitter->setNamedWeights(combinedBias.name, std::move(combinedBias)), - MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED)); + ONNXTRT_CHECK( + mRefitter->setNamedWeights(combinedBias.name, std::move(combinedBias)) && "Failed to set named weights", + ErrorCode::kREFIT_FAILED); ++successfullyRefittedWeights; } return successfullyRefittedWeights; @@ -130,8 +131,8 @@ void ModelRefitter::refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_mo successfullyRefittedWeights = 0; size_t const numberOfWeightsToRefit = refittableWeights.size(); refitOnnxGraph(onnx_model.graph()); - ONNXTRT_CHECK(successfullyRefittedWeights == numberOfWeightsToRefit, - MAKE_ERROR("Failed to refit all the weights.", ErrorCode::kREFIT_FAILED)); + ONNXTRT_CHECK(successfullyRefittedWeights == numberOfWeightsToRefit && "Failed to refit all the weights.", + ErrorCode::kREFIT_FAILED); } void ModelRefitter::refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph) @@ -157,16 +158,18 @@ void ModelRefitter::refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph) refittedWeights.insert(initializer.name()); } ShapedWeights weights; - ONNXTRT_CHECK(mWeightsContext.convertOnnxWeights(initializer, &weights, /*ownAllWeights=*/true), - MAKE_ERROR("Failed to import initializer.", ErrorCode::kUNSUPPORTED_NODE)); - ONNXTRT_CHECK(mRefitter->setNamedWeights(initializer.name().c_str(), std::move(weights)), - MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED)); + ONNXTRT_CHECK(mWeightsContext.convertOnnxWeights(initializer, &weights, /*ownAllWeights=*/true) + && "Failed to import initializer.", + ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK( + mRefitter->setNamedWeights(initializer.name().c_str(), std::move(weights)) && "Failed to set named weights", + ErrorCode::kREFIT_FAILED); ++successfullyRefittedWeights; } std::vector topoOrder; - ONNXTRT_CHECK(toposort(graph.node(), &topoOrder), - MAKE_ERROR("Failed to sort the model topologically.", ErrorCode::kINVALID_GRAPH)); + ONNXTRT_CHECK( + toposort(graph.node(), &topoOrder) && "Failed to sort the model topologically.", ErrorCode::kINVALID_GRAPH); for (auto const& nodeIdx : topoOrder) { @@ -181,9 +184,9 @@ void ModelRefitter::refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONN // ensure that the recursion depth is limited to a set amount. ++nestedDepth; static size_t const MAX_NESTED_SUBGRAPHS = 24; - ONNXTRT_CHECK((nestedDepth <= MAX_NESTED_SUBGRAPHS), - MAKE_ERROR("ONNX graph contains nested structures that exceed the maximum allowed by TensorRT!", - ErrorCode::kUNSUPPORTED_GRAPH)); + ONNXTRT_CHECK((nestedDepth <= MAX_NESTED_SUBGRAPHS) + && "ONNX graph contains nested structures that exceed the maximum allowed by TensorRT!", + ErrorCode::kUNSUPPORTED_GRAPH); if (node.op_type() == "Constant") { @@ -231,8 +234,7 @@ void ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& nod { weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {}}); float value = nodeAttribute.f(); - ONNXTRT_CHECK( - weights.count() == 1, MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE)); + ONNXTRT_CHECK(weights.count() == 1 && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE); std::memcpy(weights.values, &value, sizeof(float)); } else if (nodeAttribute.name() == "value_floats") @@ -240,16 +242,15 @@ void ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& nod std::vector values{nodeAttribute.floats().begin(), nodeAttribute.floats().end()}; int64_t valueSize = values.size(); weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::FLOAT, {1, {valueSize}}); - ONNXTRT_CHECK(weights.count() == values.size(), - MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE)); + ONNXTRT_CHECK( + weights.count() == values.size() && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE); std::memcpy(weights.values, values.data(), weights.count() * sizeof(float)); } else if (nodeAttribute.name() == "value_int") { weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::INT64, {0, {}}); int64_t value = nodeAttribute.i(); - ONNXTRT_CHECK( - weights.count() == 1, MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE)); + ONNXTRT_CHECK(weights.count() == 1 && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE); std::memcpy(weights.values, &value, sizeof(int64_t)); } else if (nodeAttribute.name() == "value_ints") @@ -257,26 +258,28 @@ void ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& nod std::vector values{nodeAttribute.ints().begin(), nodeAttribute.ints().end()}; int64_t valueSize = values.size(); weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::INT64, {1, {valueSize}}); - ONNXTRT_CHECK(weights.count() == values.size(), - MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE)); + ONNXTRT_CHECK( + weights.count() == values.size() && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE); std::memcpy(weights.values, values.data(), weights.count() * sizeof(int64_t)); } else { ::ONNX_NAMESPACE::TensorProto const& onnx_weights_tensor = nodeAttribute.t(); - ONNXTRT_CHECK(mWeightsContext.convertOnnxWeights(onnx_weights_tensor, &weights), - MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE)); + ONNXTRT_CHECK( + mWeightsContext.convertOnnxWeights(onnx_weights_tensor, &weights) && "Failed to import Constant node.", + ErrorCode::kUNSUPPORTED_NODE); } - ONNXTRT_CHECK(mRefitter->setNamedWeights(node.output(0).c_str(), std::move(weights)), - MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED)); + ONNXTRT_CHECK( + mRefitter->setNamedWeights(node.output(0).c_str(), std::move(weights)) && "Failed to set named weights", + ErrorCode::kREFIT_FAILED); ++successfullyRefittedWeights; } void ModelRefitter::refitOnnxBatchNormNode( ::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph) { - ONNXTRT_CHECK(node.input().size() == 5, - MAKE_ERROR("BatchNorm node does not have five required inputs.", ErrorCode::kINVALID_NODE)); + ONNXTRT_CHECK( + node.input().size() == 5 && "BatchNorm node does not have five required inputs.", ErrorCode::kINVALID_NODE); std::vector batchNormInputs; // The following looping construct is due to the fact that some tensors // might be shared among the BatchNorm's inputs @@ -288,8 +291,9 @@ void ModelRefitter::refitOnnxBatchNormNode( if (inputNames.at(inputIdx) == initializer.name()) { ShapedWeights weights; - ONNXTRT_CHECK(mWeightsContext.convertOnnxWeights(initializer, &weights), - MAKE_ERROR("Failed to import initializer.", ErrorCode::kUNSUPPORTED_NODE)); + ONNXTRT_CHECK( + mWeightsContext.convertOnnxWeights(initializer, &weights) && "Failed to import initializer.", + ErrorCode::kUNSUPPORTED_NODE); weights.name = initializer.name().c_str(); batchNormInputs.push_back(std::move(weights)); break; @@ -347,9 +351,9 @@ void ModelRefitter::refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node) } // Number of outputs are the same between the two branches. - ONNXTRT_CHECK(thenGraphOutputSize == elseGraphOutputSize, - MAKE_ERROR( - "then/else subgraphs within the IF node should have the same number of outputs", ErrorCode::kREFIT_FAILED)); + ONNXTRT_CHECK(thenGraphOutputSize == elseGraphOutputSize + && "then/else subgraphs within the IF node should have the same number of outputs", + ErrorCode::kREFIT_FAILED); } void ModelRefitter::refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node) diff --git a/README.md b/README.md index ed1474f..89a30f2 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia. ## Supported TensorRT Versions -Development on the this branch is for the latest version of [TensorRT 10.4](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support. +Development on the this branch is for the latest version of [TensorRT 10.5](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support. For previous versions of TensorRT, refer to their respective branches. @@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs ### Dependencies - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases) - - [TensorRT 10.4](https://developer.nvidia.com/tensorrt) - - [TensorRT 10.4 open source libaries] (https://github.com/NVIDIA/TensorRT/) + - [TensorRT 10.5](https://developer.nvidia.com/tensorrt) + - [TensorRT 10.5 open source libaries] (https://github.com/NVIDIA/TensorRT/) ### Building @@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files. -TensorRT 10.4 supports ONNX release 1.16.0. Install it with: +TensorRT 10.5 supports ONNX release 1.16.0. Install it with: python3 -m pip install onnx==1.16.0 diff --git a/ShapeTensor.cpp b/ShapeTensor.cpp index ffc6bc7..1e47cf8 100644 --- a/ShapeTensor.cpp +++ b/ShapeTensor.cpp @@ -67,11 +67,8 @@ ShapeTensor::ShapeTensor(ImporterContext* ctx, TensorOrWeights& t) assert(d.nbDims <= 1 && "shape tensor must be 0D or 1D"); mRank = d.nbDims; mSize = d.nbDims == 0 ? 1 : d.d[0]; - auto status = weightsToVector(weights, &mValues); - if (status.code() != ErrorCode::kSUCCESS) - { - throw std::runtime_error("constant " + t.getName() + " is not a valid shape tensor"); - } + + weightsToVector(weights, &mValues); mAllValuesKnown = true; } } diff --git a/docs/Changelog.md b/docs/Changelog.md index d390b37..f15cb41 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -2,6 +2,12 @@ # ONNX-TensorRT Changelog +# TensorRT 10.5 GA Release - 2024-10-1 +For more details, see the 10.5 GA release notes. + +- Added support for real-valued `STFT` operations +- Improved error handling in `IParser` + # TensorRT 10.4 GA Release - 2024-9-5 For more details, see the 10.4 GA release notes. diff --git a/docs/operators.md b/docs/operators.md index 170fbb1..0a897b3 100644 --- a/docs/operators.md +++ b/docs/operators.md @@ -163,7 +163,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA | RNN | Y | FP32, FP16, BF16| For bidirectional RNNs, activation functions must be the same for both the forward and reverse pass | RoiAlign | Y | FP32, FP16 | | Round | Y | FP32, FP16, BF16 | -| STFT | N | +| STFT | Y | FP32| `frame_step` and `window` must be an initializer. Input must be real-valued. | ScaledTanh | Y | FP32, FP16, BF16 | | Scan | Y | FP32, FP16, BF16| | Scatter | Y | FP32, FP16, BF16, INT32, INT64 | diff --git a/errorHelpers.hpp b/errorHelpers.hpp index 031a9f2..58bbd69 100644 --- a/errorHelpers.hpp +++ b/errorHelpers.hpp @@ -39,17 +39,19 @@ #define ONNXTRT_THROW(status) throw OnnxTrtException(status) -#define ONNXTRT_CHECK(cond, status) \ +#define ONNXTRT_CHECK(cond, code) \ if (!(cond)) \ { \ std::ostringstream ss; \ ss << "Assertion failed: " << #cond; \ - ONNXTRT_THROW(status); \ + ONNXTRT_THROW(MAKE_ERROR(ss.str(), (code))); \ } #define ONNXTRT_CHECK_NODE(cond, desc, node, nodeIdx, code) \ if (!(cond)) \ { \ + std::ostringstream ss; \ + ss << "Assertion failed: " << #cond << ": " << desc; \ ONNXTRT_THROW(MAKE_NODE_ERROR((ss.str()), (code), (node), (nodeIdx))); \ } diff --git a/importerUtils.cpp b/importerUtils.cpp index 1ec5b3e..e961473 100644 --- a/importerUtils.cpp +++ b/importerUtils.cpp @@ -4,8 +4,11 @@ #include "importerUtils.hpp" #include "OnnxAttrs.hpp" +#include "Status.hpp" #include "bfloat16.hpp" +#include "errorHelpers.hpp" #include +#include #include #include @@ -30,13 +33,21 @@ Status notInvalidType(TensorOrWeights const& input, std::vector con return Status::success(); } -NodeImportResult activationHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, +void checkNotInvalidType(TensorOrWeights const& input, std::vector const& invalidTypes, + ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx) +{ + Status status = notInvalidType(input, invalidTypes, node, nodeIdx); + ONNXTRT_CHECK_NODE(status.is_success(), "Found unsupported input type of " << input.getType(), node, nodeIdx, + ErrorCode::kUNSUPPORTED_NODE); +} + +NodeOutputs activationHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, std::vector& inputs, nvinfer1::ActivationType op, float* alpha, float* beta) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"INT32", "BOOL", "UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"INT32", "BOOL", "UINT8"}, node, nodeIdx); nvinfer1::ITensor& input = convertToTensor(inputs.at(0), ctx); nvinfer1::IActivationLayer* layer = N_CHECK(ctx->network()->addActivation(input, op)); - ASSERT_NODE(layer, "Failed to add activation layer!", node, nodeIdx, ErrorCode::kINTERNAL_ERROR); + ONNXTRT_CHECK_NODE(layer, "Failed to add activation layer!", node, nodeIdx, ErrorCode::kINTERNAL_ERROR); if (alpha) { layer->setAlpha(*alpha); @@ -54,7 +65,8 @@ nvinfer1::ITensor* addClip(ImporterContext* ctx, nvinfer1::ITensor* input, float { if (clip >= 0.f) { - nvinfer1::IActivationLayer* layer = N_CHECK(ctx->network()->addActivation(*input, nvinfer1::ActivationType::kCLIP)); + nvinfer1::IActivationLayer* layer + = N_CHECK(ctx->network()->addActivation(*input, nvinfer1::ActivationType::kCLIP)); layer->setAlpha(-clip); layer->setBeta(clip); return N_CHECK(layer->getOutput(0)); @@ -62,10 +74,10 @@ nvinfer1::ITensor* addClip(ImporterContext* ctx, nvinfer1::ITensor* input, float return input; }; -NodeImportResult argMinMaxHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, +NodeOutputs argMinMaxHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, std::vector& inputs, nvinfer1::TopKOperation op) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); nvinfer1::ITensor* tensor = &convertToTensor(inputs.at(0), ctx); // Get attributes. @@ -76,7 +88,7 @@ NodeImportResult argMinMaxHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::N // Insert a TopK layer with k set to 1. int32_t nbDims = tensor->getDimensions().nbDims; - CHECK_STATUS(convertAxis(axis, nbDims, node, nodeIdx)); + convertAxis(axis, nbDims, node, nodeIdx); uint32_t axisMask = 1 << axis; nvinfer1::ITopKLayer* layer; @@ -138,9 +150,8 @@ NodeImportResult argMinMaxHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::N auto const ones = constantOfShape(ctx, &shapeVector(1).tensor(ctx), &indicesDims.tensor(ctx)); std::vector newInputs{tensorDimOnAxis, indices, ones}; - std::vector indicesUpdate; - GET_VALUE( - elementwiseHelper(ctx, node, nodeIdx, newInputs, nvinfer1::ElementWiseOperation::kSUB), &indicesUpdate); + std::vector indicesUpdate + = elementwiseHelper(ctx, node, nodeIdx, newInputs, nvinfer1::ElementWiseOperation::kSUB); indices = &convertToTensor(indicesUpdate.at(0), ctx); } // The default behavior of the TopK layer is to keepdims. @@ -155,13 +166,14 @@ NodeImportResult argMinMaxHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::N return {{indices}}; } -Status broadcastTensor(ImporterContext* ctx, nvinfer1::ITensor*& t, int const nbDims) +void broadcastTensor(ImporterContext* ctx, nvinfer1::ITensor*& t, int const nbDims) { - ASSERT(ctx->getOpsetVersion() >= 7 && "Pre-opset 7 broadcasting is unsupported in this version of the ONNX parser", + ONNXTRT_CHECK( + ctx->getOpsetVersion() >= 7 && "Pre-opset 7 broadcasting is unsupported in this version of the ONNX parser", ErrorCode::kUNSUPPORTED_NODE); auto const inputDims = shapeOf(*t); int const nbInputDims = inputDims.size(); - ASSERT((nbInputDims <= nbDims) && "Cannot broadcast a higher rank tensor to a lower rank tensor.", + ONNXTRT_CHECK((nbInputDims <= nbDims) && "Cannot broadcast a higher rank tensor to a lower rank tensor.", ErrorCode::kUNSUPPORTED_NODE); if (nbInputDims < nbDims) { @@ -170,17 +182,16 @@ Status broadcastTensor(ImporterContext* ctx, nvinfer1::ITensor*& t, int const nb ctx->registerLayer(reshape, "ONNXTRT_Broadcast", nullptr); t = N_CHECK(reshape->getOutput(0)); } - return Status::success(); } -Status broadcastTensors(ImporterContext* ctx, nvinfer1::ITensor*& t1, nvinfer1::ITensor*& t2) +void broadcastTensors(ImporterContext* ctx, nvinfer1::ITensor*& t1, nvinfer1::ITensor*& t2) { int const t1Dims = t1->getDimensions().nbDims; int const t2Dims = t2->getDimensions().nbDims; if (t1Dims == t2Dims) { - return Status::success(); + return; } if (t1Dims > t2Dims) @@ -190,13 +201,12 @@ Status broadcastTensors(ImporterContext* ctx, nvinfer1::ITensor*& t1, nvinfer1:: return broadcastTensor(ctx, t1, t2Dims); } -Status broadcastTensors(ImporterContext* ctx, nvinfer1::ITensor*& t1, nvinfer1::ITensor*& t2, nvinfer1::ITensor*& t3) +void broadcastTensors(ImporterContext* ctx, nvinfer1::ITensor*& t1, nvinfer1::ITensor*& t2, nvinfer1::ITensor*& t3) { int const maxDims = std::max({t1->getDimensions().nbDims, t2->getDimensions().nbDims, t3->getDimensions().nbDims}); - CHECK_STATUS(broadcastTensor(ctx, t1, maxDims)); - CHECK_STATUS(broadcastTensor(ctx, t2, maxDims)); - CHECK_STATUS(broadcastTensor(ctx, t3, maxDims)); - return Status::success(); + broadcastTensor(ctx, t1, maxDims); + broadcastTensor(ctx, t2, maxDims); + broadcastTensor(ctx, t3, maxDims); } // Helper functions for calculateBias: @@ -295,7 +305,7 @@ nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, nvinfer1::ITensor* cons return N_CHECK(l->getOutput(0)); } -Status convertAxis(int32_t& axis, int32_t const nbDims, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx) +void convertAxis(int32_t& axis, int32_t const nbDims, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx) { // Support negative indexing if (axis < 0) @@ -303,10 +313,9 @@ Status convertAxis(int32_t& axis, int32_t const nbDims, ::ONNX_NAMESPACE::NodePr axis += nbDims; } // Support nbDims as a valid axis for QuantDequantLinearHelper - ASSERT_NODE((axis >= 0 && axis <= nbDims), + ONNXTRT_CHECK_NODE((axis >= 0 && axis <= nbDims), "Axis must be in the range [0, nbDims (" << nbDims << ")]. Provided axis is: " << axis, node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - return Status::success(); } bool convertDtype(int32_t onnx_dtype, nvinfer1::DataType* trt_dtype) @@ -362,10 +371,12 @@ bool convertOnnxPadding(ImporterContext* ctx, int32_t nbInputDims, std::vectorgetOutput(0)); - - auto* totalPaddingLayer = N_CHECK(addConstant(ctx, totalPadding, ::ONNX_NAMESPACE::TensorProto::INT64, nvinfer1::Dims{1, {nbInputDims}})); + + auto* totalPaddingLayer = N_CHECK( + addConstant(ctx, totalPadding, ::ONNX_NAMESPACE::TensorProto::INT64, nvinfer1::Dims{1, {nbInputDims}})); totalPaddingTensor = N_CHECK(totalPaddingLayer->getOutput(0)); return startTensor && totalPaddingTensor; } @@ -510,7 +521,7 @@ std::string getElementWiseOpName(nvinfer1::ElementWiseOperation op) } } -Status elementwiseCheck(std::vector const& inputs, const nvinfer1::ElementWiseOperation op, +void elementwiseCheck(std::vector const& inputs, const nvinfer1::ElementWiseOperation op, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx) { switch (op) @@ -519,7 +530,7 @@ Status elementwiseCheck(std::vector const& inputs, const nvinfe case nvinfer1::ElementWiseOperation::kAND: case nvinfer1::ElementWiseOperation::kOR: case nvinfer1::ElementWiseOperation::kXOR: - ASSERT_NODE( + ONNXTRT_CHECK_NODE( std::all_of(inputs.begin(), inputs.end(), [](TensorOrWeights const& input) { return input.isBool(); }), "Elementwise layer only supports operator " + getElementWiseOpName(op) + " and the given inputs with type BOOL.", @@ -535,7 +546,7 @@ Status elementwiseCheck(std::vector const& inputs, const nvinfe case nvinfer1::ElementWiseOperation::kPROD: case nvinfer1::ElementWiseOperation::kSUB: case nvinfer1::ElementWiseOperation::kSUM: - ASSERT_NODE( + ONNXTRT_CHECK_NODE( !std::any_of(inputs.begin(), inputs.end(), [](TensorOrWeights const& input) { return input.isBool(); }), "Elementwise layer does not support operator " + getElementWiseOpName(op) + " and the given inputs with type BOOL.", @@ -543,21 +554,21 @@ Status elementwiseCheck(std::vector const& inputs, const nvinfe break; // Pow does not support bool or integer types case nvinfer1::ElementWiseOperation::kPOW: - ASSERT_NODE(!std::any_of(inputs.begin(), inputs.end(), - [](TensorOrWeights const& input) { return input.isBool() || input.isInt32() || input.isInt64(); }), - "Elementwise layer does not support operator POW with boolean or integer types.", node, - nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE( + !std::any_of(inputs.begin(), inputs.end(), + [](TensorOrWeights const& input) { return input.isBool() || input.isInt32() || input.isInt64(); }), + "Elementwise layer does not support operator POW with boolean or integer types.", node, nodeIdx, + ErrorCode::kUNSUPPORTED_NODE); break; // Equal supports all types. case nvinfer1::ElementWiseOperation::kEQUAL: break; } - return Status::success(); } -NodeImportResult elementwiseHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, +NodeOutputs elementwiseHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, std::vector const& inputs, nvinfer1::ElementWiseOperation binary_op) { - ASSERT_NODE((!inputs.empty()), "Inputs vector is empty.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE((!inputs.empty()), "Inputs vector is empty.", node, nodeIdx, ErrorCode::kINVALID_NODE); std::vector inputTensors; int maxNbDims = -1; @@ -571,14 +582,14 @@ NodeImportResult elementwiseHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeP auto* tensor_ptr = &convertToTensor(input, ctx); // Broadcast all input tensors to size of maxNbDims - CHECK_STATUS(broadcastTensor(ctx, tensor_ptr, maxNbDims)); - ASSERT_NODE(tensor_ptr->getDimensions().nbDims == maxNbDims, + broadcastTensor(ctx, tensor_ptr, maxNbDims); + ONNXTRT_CHECK_NODE(tensor_ptr->getDimensions().nbDims == maxNbDims, "The number of dimensions should remain the same adding inputs: " << tensor_ptr->getDimensions().nbDims << " != " << maxNbDims << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); inputTensors.push_back(tensor_ptr); } - CHECK_STATUS(elementwiseCheck(inputs, binary_op, node, nodeIdx)); + elementwiseCheck(inputs, binary_op, node, nodeIdx); // Use the first tensor input as the base for the elementwise operation nvinfer1::ITensor* combined = inputTensors.at(0); @@ -590,7 +601,7 @@ NodeImportResult elementwiseHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeP for (size_t i = 1; i < inputTensors.size(); ++i) { nvinfer1::ITensor* tensor = inputTensors.at(i); - ASSERT_NODE((tensor->getDimensions().nbDims == combined->getDimensions().nbDims), + ONNXTRT_CHECK_NODE((tensor->getDimensions().nbDims == combined->getDimensions().nbDims), "The number of dimensions should remain the same adding inputs: " << tensor->getDimensions().nbDims << " != " << combined->getDimensions().nbDims << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -609,7 +620,8 @@ nvinfer1::ITensor* flattenTensor( auto const d1 = product(ctx, dims, axis, dims.size(), 1); // ShuffleLayer here interprets dim extent 0 as empty dim to support empty tensor - nvinfer1::IShuffleLayer* flattenLayer = N_CHECK(addShuffle(ctx, tensor, concat(ctx, d0, d1), /*zeroIsPlaceholder=*/false)); + nvinfer1::IShuffleLayer* flattenLayer + = N_CHECK(addShuffle(ctx, tensor, concat(ctx, d0, d1), /*zeroIsPlaceholder=*/false)); if (regLayer) { ctx->registerLayer(flattenLayer, node); @@ -729,7 +741,8 @@ nvinfer1::ITensor* getAxisLengthInt64( return extractDimension(ctx, inpShape, axis, shape); } -nvinfer1::ITensor* getElementWiseResult(ImporterContext* ctx, nvinfer1::ITensor& lhs, nvinfer1::ITensor& rhs, nvinfer1::ElementWiseOperation op) +nvinfer1::ITensor* getElementWiseResult( + ImporterContext* ctx, nvinfer1::ITensor& lhs, nvinfer1::ITensor& rhs, nvinfer1::ElementWiseOperation op) { auto* elemLayer = N_CHECK(ctx->network()->addElementWise(lhs, rhs, op)); return N_CHECK(elemLayer->getOutput(0)); @@ -741,10 +754,9 @@ nvinfer1::ITensor* getUnaryResult(ImporterContext* ctx, nvinfer1::ITensor& input return N_CHECK(unaryLayer->getOutput(0)); } -Status getKernelParams(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, nvinfer1::Dims* kernelSize, - nvinfer1::Dims* strides, nvinfer1::Dims* begPadding, nvinfer1::Dims* endPadding, - nvinfer1::PaddingMode& paddingMode, bool& countExcludePadding, nvinfer1::Dims* dilations, - nvinfer1::Dims* outputPadding, bool const poolingCeilMode) +void getKernelParams(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, nvinfer1::Dims* kernelSize, + nvinfer1::Dims* strides, nvinfer1::Dims* begPadding, nvinfer1::Dims* endPadding, nvinfer1::PaddingMode& paddingMode, + bool& countExcludePadding, nvinfer1::Dims* dilations, nvinfer1::Dims* outputPadding, bool const poolingCeilMode) { int32_t const nbSpatialDims = kernelSize->nbDims; OnnxAttrs attrs(node, ctx); @@ -808,7 +820,7 @@ Status getKernelParams(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& { // If auto_pad is SAME_LOWER or SAME_UPPER, input padding should be calculated // "pads" attribute should not be specified - ASSERT(!attrs.count("pads") + ONNXTRT_CHECK(!attrs.count("pads") && "Pads attribute should not be specified with SAME_LOWER or SAME_UPPER auto padding!", ErrorCode::kINVALID_NODE); // Note: ONNX is always NCHW ordering @@ -822,10 +834,9 @@ Status getKernelParams(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& } else { - ASSERT(false && "invalid autopad attribute was set!", ErrorCode::kINVALID_NODE); + ONNXTRT_THROW(MAKE_ERROR("invalid autopad attribute was set!", ErrorCode::kINVALID_NODE)); } } - return Status::success(); } float getSingleValueAsFloat(ShapedWeights const& weights) @@ -854,16 +865,16 @@ nvinfer1::ITensor* globalPoolingHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::N return N_CHECK(layer->getOutput(0)); } -NodeImportResult greaterLessOrEqual(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, - size_t const nodeIdx, nvinfer1::ITensor* inputA, nvinfer1::ITensor* inputB, bool greater) +NodeOutputs greaterLessOrEqual(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, + nvinfer1::ITensor* inputA, nvinfer1::ITensor* inputB, bool greater) { nvinfer1::ElementWiseOperation op = greater ? nvinfer1::ElementWiseOperation::kGREATER : nvinfer1::ElementWiseOperation::kLESS; - std::vector firstOpResults; - GET_VALUE(elementwiseHelper(ctx, node, nodeIdx, {inputA, inputB}, op), &firstOpResults); - std::vector equalsResult; - GET_VALUE(elementwiseHelper(ctx, node, nodeIdx, {inputA, inputB}, nvinfer1::ElementWiseOperation::kEQUAL), &equalsResult); - return elementwiseHelper(ctx, node, nodeIdx, {firstOpResults.at(0), equalsResult.at(0)}, nvinfer1::ElementWiseOperation::kOR); + std::vector firstOpResults = elementwiseHelper(ctx, node, nodeIdx, {inputA, inputB}, op); + std::vector equalsResult + = elementwiseHelper(ctx, node, nodeIdx, {inputA, inputB}, nvinfer1::ElementWiseOperation::kEQUAL); + return elementwiseHelper( + ctx, node, nodeIdx, {firstOpResults.at(0), equalsResult.at(0)}, nvinfer1::ElementWiseOperation::kOR); } nvinfer1::IPluginCreatorInterface* importPluginCreator(ImporterContext* ctx, std::string const& pluginName, @@ -912,8 +923,8 @@ std::unique_ptr createPlugin(std::string const& name, nvinf pluginCreator->createPlugin(name.c_str(), &fc, nvinfer1::TensorRTPhase::kBUILD)}; } -NodeImportResult staticSliceImporter(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, - size_t const nodeIdx, std::vector& inputs, nvinfer1::ITensor& data) +NodeOutputs staticSliceImporter(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, + std::vector& inputs, nvinfer1::ITensor& data) { auto const nbInputs = inputs.size(); @@ -975,10 +986,10 @@ NodeImportResult staticSliceImporter(ImporterContext* ctx, ::ONNX_NAMESPACE::Nod // Perform ceil integer division of (ends - starts) / steps to compute sizes. // Note ceil(x/y) = (x+y-1) / y for postive x & y, and ceil(x/y) = (x+y+1)/y for negative x&y // Negative sizes indicates an empty slice, so clamp to 0 - sizes.d[axesIndex] - = std::max((modifiedEnds - starts.d[axesIndex] + steps.d[axesIndex] - (steps.d[axesIndex] > 0 ? 1 : -1)) - / steps.d[axesIndex], - 0); + sizes.d[axesIndex] = std::max( + (modifiedEnds - starts.d[axesIndex] + steps.d[axesIndex] - (steps.d[axesIndex] > 0 ? 1 : -1)) + / steps.d[axesIndex], + 0); } auto* slice = N_CHECK(ctx->network()->addSlice(data, starts, sizes, steps)); @@ -992,15 +1003,15 @@ bool isDynamic(nvinfer1::Dims const& shape) return std::any_of(shape.d, shape.d + shape.nbDims, [](int dim) { return dim < 0; }); } -NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, +NodeOutputs modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, std::vector& inputs) { nvinfer1::ITensor* inputXPtr = &convertToTensor(inputs.at(0), ctx); nvinfer1::ITensor* weightPtr = &convertToTensor(inputs.at(1), ctx); nvinfer1::ITensor* offsetPtr = &convertToTensor(inputs.at(2), ctx); int32_t nbDims = inputXPtr->getDimensions().nbDims; - ASSERT_NODE(nbDims >= 3 && nbDims <= 4, "TensorRT only supports DeformConv on 3D, or 4D tensors!", node, nodeIdx, - ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(nbDims >= 3 && nbDims <= 4, "TensorRT only supports DeformConv on 3D, or 4D tensors!", node, + nodeIdx, ErrorCode::kUNSUPPORTED_NODE); bool const needToExpandDims = (nbDims == 3); if (needToExpandDims) { @@ -1009,9 +1020,9 @@ NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONN inputXPtr = unsqueezeTensor(ctx, node, *inputXPtr, axes); weightPtr = unsqueezeTensor(ctx, node, *weightPtr, axes); offsetPtr = unsqueezeTensor(ctx, node, *offsetPtr, axes); - ASSERT(inputXPtr && "Failed to unsqueeze the input tensor.", ErrorCode::kUNSUPPORTED_NODE); - ASSERT(weightPtr && "Failed to unsqueeze the weight tensor.", ErrorCode::kUNSUPPORTED_NODE); - ASSERT(offsetPtr && "Failed to unsqueeze the offset tensor.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(inputXPtr && "Failed to unsqueeze the input tensor.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(weightPtr && "Failed to unsqueeze the weight tensor.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(offsetPtr && "Failed to unsqueeze the offset tensor.", ErrorCode::kUNSUPPORTED_NODE); } // Parse attributes @@ -1019,10 +1030,10 @@ NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONN int32_t nbSpatialDims = nbDims - 2; if (attrs.count("kernel_shape")) { - ASSERT(nbSpatialDims == attrs.at("kernel_shape")->ints().size() + ONNXTRT_CHECK(nbSpatialDims == attrs.at("kernel_shape")->ints().size() && "The attribute kernel_shape misaligns with the shape of the weight tensor.", ErrorCode::kUNSUPPORTED_NODE); - ASSERT_NODE(((nbSpatialDims == 1 && needToExpandDims) || nbSpatialDims == 2), + ONNXTRT_CHECK_NODE(((nbSpatialDims == 1 && needToExpandDims) || nbSpatialDims == 2), "The attribute kernel_shape misaligns with the shape of the input tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } @@ -1056,7 +1067,7 @@ NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONN { auto onnxPadding = attrs.get>("pads"); int32_t ndim = onnxPadding.size() / 2; - ASSERT(ndim == nbSpatialDims + ONNXTRT_CHECK(ndim == nbSpatialDims && "The given pads attribute mismatch with the spatial dimensions of the weight tensor.", ErrorCode::kUNSUPPORTED_NODE); for (int32_t i = 0; i < nbSpatialDims; ++i) @@ -1066,7 +1077,7 @@ NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONN } } - ASSERT(begPadding == endPadding + ONNXTRT_CHECK(begPadding == endPadding && "TensorRT only support the pads attribute of the DeformConv operator where the same number of pixels are added to the beginning and the end of the corresponding axis.", ErrorCode::kUNSUPPORTED_NODE); nvinfer1::Dims strides = makeDims(nbSpatialDims, /*Default value of strides*/ 1); @@ -1107,8 +1118,8 @@ NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONN auto const plugin = createPlugin( pluginName, static_cast(importPluginCreator(ctx, pluginName, pluginVersion)), f); - ASSERT_NODE(plugin != nullptr, "ModulatedDeformConv2d plugin was not found in the plugin registry!", node, nodeIdx, - ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(plugin != nullptr, "ModulatedDeformConv2d plugin was not found in the plugin registry!", node, + nodeIdx, ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor* biasPtr = nullptr; nvinfer1::ITensor* maskPtr = nullptr; @@ -1124,7 +1135,7 @@ NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONN // Expand spatial dims from 1D to 2D std::vector const axes{3}; maskPtr = unsqueezeTensor(ctx, node, *maskPtr, axes); - ASSERT(maskPtr && "Failed to unsqueeze the mask tensor.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(maskPtr && "Failed to unsqueeze the mask tensor.", ErrorCode::kUNSUPPORTED_NODE); } } else @@ -1170,24 +1181,25 @@ NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONN // Un-expand spatial dims back to 1D std::vector const axes{3}; outputPtr = squeezeTensor(ctx, node, *outputPtr, axes); - ASSERT_NODE(outputPtr, "Failed to squeeze tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(outputPtr, "Failed to squeeze tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } return {{outputPtr}}; } -NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, +NodeOutputs instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, std::vector& inputs) { // Scales and biases must be initializers - ASSERT_NODE(inputs.at(1).is_weights(), "The scale tensor is required to be an initializer.", node, nodeIdx, + ONNXTRT_CHECK_NODE(inputs.at(1).is_weights(), "The scale tensor is required to be an initializer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - ASSERT_NODE(inputs.at(2).is_weights(), "The bias tensor is required to be an initializer.", node, nodeIdx, + ONNXTRT_CHECK_NODE(inputs.at(2).is_weights(), "The bias tensor is required to be an initializer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); int32_t nbDims = tensorPtr->getDimensions().nbDims; - ASSERT_NODE(nbDims >= 3 && nbDims <= 5, "TensorRT only supports InstanceNormalization on 3D, 4D, or 5D tensors!", - node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(nbDims >= 3 && nbDims <= 5, + "TensorRT only supports InstanceNormalization on 3D, 4D, or 5D tensors!", node, nodeIdx, + ErrorCode::kUNSUPPORTED_NODE); bool const needToExpandDims = (nbDims == 3); if (needToExpandDims) @@ -1195,7 +1207,7 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE // Expand spatial dims from 1D to 2D std::vector const axes{3}; tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); - ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); } auto scaleWeights = inputs.at(1).weights(); auto biasWeights = inputs.at(2).weights(); @@ -1223,8 +1235,8 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE auto const plugin = createPlugin(getNodeName(node), static_cast(importPluginCreator(ctx, pluginName, pluginVersion)), f); - ASSERT_NODE(plugin != nullptr, "InstanceNormalization plugin was not found in the plugin registry!", node, nodeIdx, - ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(plugin != nullptr, "InstanceNormalization plugin was not found in the plugin registry!", node, + nodeIdx, ErrorCode::kUNSUPPORTED_NODE); auto* layer = N_CHECK(ctx->network()->addPluginV3(&tensorPtr, 1, nullptr, 0, *plugin)); ctx->registerLayer(layer, node); @@ -1235,7 +1247,7 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE // Un-expand spatial dims back to 1D std::vector const axes{3}; tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes); - ASSERT_NODE(tensorPtr, "Failed to squeeze tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(tensorPtr, "Failed to squeeze tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } return {{tensorPtr}}; @@ -1245,10 +1257,12 @@ nvinfer1::ITensor* iota(ImporterContext* ctx, ShapeTensor iotaDims, int32_t axis { std::vector deltaVals(iotaDims.size(), 0); deltaVals[axis] = 1; - auto* iota = N_CHECK(ctx->network()->addFill({0, {0}}, nvinfer1::FillOperation::kLINSPACE, nvinfer1::DataType::kINT32)); + auto* iota + = N_CHECK(ctx->network()->addFill({0, {0}}, nvinfer1::FillOperation::kLINSPACE, nvinfer1::DataType::kINT32)); auto* alphaLayer = N_CHECK(addConstantScalar(ctx, static_cast(0), ::ONNX_NAMESPACE::TensorProto::INT32)); auto* alpha = N_CHECK(alphaLayer->getOutput(0)); - auto* deltaLayer = N_CHECK(addConstant(ctx, deltaVals, ::ONNX_NAMESPACE::TensorProto::INT32, {1, {iotaDims.size()}})); + auto* deltaLayer + = N_CHECK(addConstant(ctx, deltaVals, ::ONNX_NAMESPACE::TensorProto::INT32, {1, {iotaDims.size()}})); auto* delta = N_CHECK(deltaLayer->getOutput(0)); iota->setInput(0, iotaDims.tensor(ctx)); iota->setInput(1, *alpha); @@ -1279,8 +1293,8 @@ nvinfer1::Dims makeDims(int nbDims, int val) return dims; } -NodeImportResult normalizationHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, - size_t const nodeIdx, std::vector& inputs) +NodeOutputs normalizationHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, + std::vector& inputs) { auto* input = &convertToTensor(inputs.at(0), ctx); auto* scale = &convertToTensor(inputs.at(1), ctx); @@ -1291,7 +1305,7 @@ NodeImportResult normalizationHelper(ImporterContext* ctx, const ::ONNX_NAMESPAC int32_t nbGroups = attrs.get("num_groups", 1); auto nbDims = input->getDimensions().nbDims; - ASSERT_NODE(nbDims >= 3, + ONNXTRT_CHECK_NODE(nbDims >= 3, "Input to normalization should be at least 3D, the actual number of dimensions is " << nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -1325,14 +1339,14 @@ NodeImportResult normalizationHelper(ImporterContext* ctx, const ::ONNX_NAMESPAC return {{output}}; } -Status normalizeAxes(ShapeTensor& axes, int32_t const rank) +void normalizeAxes(ShapeTensor& axes, int32_t const rank) { - ASSERT(axes.allValuesKnown() && "Axes should not contain unknown values.", ErrorCode::kINTERNAL_ERROR); + ONNXTRT_CHECK(axes.allValuesKnown() && "Axes should not contain unknown values.", ErrorCode::kINTERNAL_ERROR); std::vector newAxes; newAxes.reserve(axes.size()); for (int64_t axis : axes) { - ASSERT((-rank <= axis && axis < rank) && "Axis must be in the range of [-rank, rank-1].", + ONNXTRT_CHECK((-rank <= axis && axis < rank) && "Axis must be in the range of [-rank, rank-1].", ErrorCode::kINVALID_VALUE); // "Negative value means counting dimensions from the back." if (axis < 0) @@ -1342,7 +1356,6 @@ Status normalizeAxes(ShapeTensor& axes, int32_t const rank) newAxes.push_back(axis); } axes = ShapeTensor(1, std::move(newAxes)); - return Status::success(); } nvinfer1::Dims insertDimension(nvinfer1::Dims const& dims, int const axis, int const value) @@ -1381,7 +1394,7 @@ std::vector parseLSTMActivationValues(std::vector& inputs, nvinfer1::PoolingType type) { nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); @@ -1392,13 +1405,13 @@ NodeImportResult poolingHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto // Expand spatial dims from 1D to 2D std::vector axes{3}; tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); - ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); dims = tensorPtr->getDimensions(); } OnnxAttrs attrs(node, ctx); int nbSpatialDims = attrs.at("kernel_shape")->ints().size(); - ASSERT_NODE(((nbSpatialDims == 1 && needToExpandDims) || nbSpatialDims == 2 || nbSpatialDims == 3), + ONNXTRT_CHECK_NODE(((nbSpatialDims == 1 && needToExpandDims) || nbSpatialDims == 2 || nbSpatialDims == 3), "The attribute kernel_shape misaligns with the shape of the input tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -1417,8 +1430,8 @@ NodeImportResult poolingHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto ceilMode = static_cast(attrs.get("ceil_mode", 0)); } - CHECK_STATUS(getKernelParams(ctx, node, &kernelSize, &strides, &begPadding, &endPadding, paddingMode, exclude_padding, - nullptr, nullptr, ceilMode)); + getKernelParams(ctx, node, &kernelSize, &strides, &begPadding, &endPadding, paddingMode, exclude_padding, nullptr, + nullptr, ceilMode); if (needToExpandDims) { @@ -1455,11 +1468,11 @@ bool IsReduceNoOp( return (attrs.get("noop_with_empty_axes", 0) == 1) && (!attrs.count("axes")) && (inputs.size() == 1); } -NodeImportResult reduceTensor(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, +NodeOutputs reduceTensor(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, TensorOrWeights input, nvinfer1::ReduceOperation operation, TensorOrWeights inputAxes) { // TensorRT does not support reduction on Bool or UINT8 tensors. - CHECK_STATUS(notInvalidType(input, {"BOOL", "UINT8"}, node, nodeIdx)); + checkNotInvalidType(input, {"BOOL", "UINT8"}, node, nodeIdx); OnnxAttrs attrs(node, ctx); nvinfer1::ITensor& tensor = convertToTensor(input, ctx); @@ -1472,9 +1485,9 @@ NodeImportResult reduceTensor(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto } else if (!inputAxes.isNullTensor()) { - ASSERT_NODE( + ONNXTRT_CHECK_NODE( inputAxes.is_weights(), "Axis input must be an initializer!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - CHECK_STATUS(weightsToVector(inputAxes.weights(), &axes)); + weightsToVector(inputAxes.weights(), &axes); } // It's possible that the axes tensor, axes initializer, or axes attribute was empty. Handle such cases here. if (axes.empty()) @@ -1492,7 +1505,7 @@ NodeImportResult reduceTensor(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto uint32_t axisMask = 0; for (int32_t axis : axes) { - CHECK_STATUS(convertAxis(axis, ndim, node, nodeIdx)); + convertAxis(axis, ndim, node, nodeIdx); axisMask |= 1 << axis; } @@ -1515,7 +1528,7 @@ nvinfer1::ITensor* reshapeTensor(ImporterContext* ctx, nvinfer1::ITensor& tensor return N_CHECK(layer->getOutput(0)); } -NodeImportResult scaleHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, +NodeOutputs scaleHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, nvinfer1::ITensor& tensor_, nvinfer1::ScaleMode mode, nvinfer1::Weights const& shift, nvinfer1::Weights const& scale, nvinfer1::Weights const& power, char const* shiftName, char const* scaleName) { @@ -1557,8 +1570,8 @@ NodeImportResult scaleHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeP return {{tensorPtr}}; } -void setAttr( - nvinfer1::Dims* trtAttr, ::ONNX_NAMESPACE::AttributeProto const* onnxAttr, int32_t nbSpatialDims, int32_t defaultVal) +void setAttr(nvinfer1::Dims* trtAttr, ::ONNX_NAMESPACE::AttributeProto const* onnxAttr, int32_t nbSpatialDims, + int32_t defaultVal) { assert(trtAttr->nbDims == nbSpatialDims); int32_t ndim = onnxAttr->ints().size(); @@ -1692,7 +1705,7 @@ std::string getUnaryOpName(nvinfer1::UnaryOperation op) } } -NodeImportResult unaryHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, +NodeOutputs unaryHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, TensorOrWeights& input, nvinfer1::UnaryOperation op) { nvinfer1::ITensor* tensorPtr = &convertToTensor(input, ctx); @@ -1720,9 +1733,10 @@ NodeImportResult unaryHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeP if (inputType == nvinfer1::DataType::kINT32) { // Calculate the rank of the input, and set all size to one and rely on broadcasting - auto* zeroLayer = N_CHECK(addConstant(ctx, std::vector{0}, ::ONNX_NAMESPACE::TensorProto::INT32, {0, {1}})); + auto* zeroLayer + = N_CHECK(addConstant(ctx, std::vector{0}, ::ONNX_NAMESPACE::TensorProto::INT32, {0, {1}})); nvinfer1::ITensor* zeroTensor = N_CHECK(zeroLayer->getOutput(0)); - CHECK_STATUS(broadcastTensors(ctx, zeroTensor, tensorPtr)); + broadcastTensors(ctx, zeroTensor, tensorPtr); std::vector layerInputs = {zeroTensor, tensorPtr}; return elementwiseHelper(ctx, node, nodeIdx, layerInputs, nvinfer1::ElementWiseOperation::kSUB); } @@ -1737,7 +1751,7 @@ NodeImportResult unaryHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeP } } - ASSERT_NODE(validUnaryType, + ONNXTRT_CHECK_NODE(validUnaryType, "This version of TensorRT does not support the given operator " + getUnaryOpName(op) + " with the given input data type " + getTrtDtypeName(inputType) + ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -1749,10 +1763,10 @@ NodeImportResult unaryHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeP return {{tensorPtr}}; } -NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, +NodeOutputs convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, std::vector& inputs) { - ASSERT(inputs.size() >= 2 && "Convolution require at least 2 inputs.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(inputs.size() >= 2 && "Convolution require at least 2 inputs.", ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor* input = &convertToTensor(inputs.at(0), ctx); nvinfer1::Dims dims = input->getDimensions(); bool needToExpandDims = (dims.nbDims == 3); @@ -1780,8 +1794,8 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No nvinfer1::Dims dilations = makeDims(nbSpatialDims, 1); nvinfer1::PaddingMode paddingMode; bool excludePadding{false}; - CHECK_STATUS(getKernelParams( - ctx, node, &kernelDims, &strides, &begPadding, &endPadding, paddingMode, excludePadding, &dilations)); + getKernelParams( + ctx, node, &kernelDims, &strides, &begPadding, &endPadding, paddingMode, excludePadding, &dilations); auto const nChannel = dims.d[1]; auto const K = inputs.at(1).shape().d[0]; auto const C = inputs.at(1).shape().d[1]; @@ -1809,9 +1823,9 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No // Expand spatial dims from 1D to 2D std::vector const axes{3}; kernelTensor = unsqueezeTensor(ctx, node, *kernelTensor, axes); - ASSERT(kernelTensor && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(kernelTensor && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); } - ASSERT(checkSpatialDims(kernelTensor->getDimensions()) + ONNXTRT_CHECK(checkSpatialDims(kernelTensor->getDimensions()) && "The input tensor shape misaligns with the input kernel shape.", ErrorCode::kUNSUPPORTED_NODE); } @@ -1823,7 +1837,7 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No kernelWeights.shape.nbDims = 4; kernelWeights.shape.d[3] = 1; } - ASSERT_NODE(checkSpatialDims(kernelWeights.shape), + ONNXTRT_CHECK_NODE(checkSpatialDims(kernelWeights.shape), "The input tensor shape misaligns with the input kernel shape.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } @@ -1842,13 +1856,13 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No OnnxAttrs attrs(node, ctx); int32_t ngroup = attrs.get("group", 1); - ASSERT_NODE((nChannel == -1 || C * ngroup == nChannel), + ONNXTRT_CHECK_NODE((nChannel == -1 || C * ngroup == nChannel), "The attribute group and the kernel shape misalign with the channel size of the input tensor. ", node, nodeIdx, ErrorCode::kINVALID_NODE); nvinfer1::IConvolutionLayer* layer = N_CHECK(ctx->network()->addConvolutionNd(*input, K, kernelDims, kernelWeights, biasWeights)); - ASSERT_NODE(layer, "Failed to add the Convolution layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Failed to add the Convolution layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); layer->setStrideNd(strides); layer->setPaddingMode(paddingMode); layer->setPrePadding(begPadding); @@ -1924,8 +1938,8 @@ nvinfer1::ITensor* resizeShapeTensor(ImporterContext* ctx, nvinfer1::ITensor& in auto* inputShapeTensor = N_CHECK(floatCast->getOutput(0)); auto& scaleTensor = convertToTensor(scales, ctx); - auto* prodLayer - = N_CHECK(ctx->network()->addElementWise(scaleTensor, *inputShapeTensor, nvinfer1::ElementWiseOperation::kPROD)); + auto* prodLayer = N_CHECK( + ctx->network()->addElementWise(scaleTensor, *inputShapeTensor, nvinfer1::ElementWiseOperation::kPROD)); ctx->registerLayer(prodLayer, "ONNXTRT_resizeShapeTensor_prod", nullptr); auto* prod = N_CHECK(prodLayer->getOutput(0)); @@ -2020,10 +2034,7 @@ nvinfer1::ITensor* addSoftmax( // "Negative value means counting dimensions from the back. // Accepted range is [-r, r-1] where r = rank(input)." auto const rank = shapeOf(input).size(); - if (convertAxis(axis, rank, node, nodeIdx).is_error()) - { - return nullptr; - } + convertAxis(axis, rank, node, nodeIdx); nvinfer1::ISoftMaxLayer* softMax{nullptr}; if (ctx->getOpsetVersion() >= 13) @@ -2043,10 +2054,10 @@ nvinfer1::ITensor* addSoftmax( return N_CHECK(softMax->getOutput(0)); } -NodeImportResult addScatterLayer(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, +NodeOutputs addScatterLayer(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, std::vector& inputs, nvinfer1::ScatterMode mode, int32_t axis) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); nvinfer1::ITensor& data = convertToTensor(inputs.at(0), ctx); nvinfer1::ITensor& indices = convertToTensor(inputs.at(1), ctx); nvinfer1::ITensor& updates = convertToTensor(inputs.at(2), ctx); @@ -2059,7 +2070,7 @@ NodeImportResult addScatterLayer(ImporterContext* ctx, ::ONNX_NAMESPACE::NodePro auto const updatesDims = updates.getDimensions(); // Ranks must all be the same - ASSERT_NODE(dataDims.nbDims == indicesDims.nbDims && dataDims.nbDims == updatesDims.nbDims, + ONNXTRT_CHECK_NODE(dataDims.nbDims == indicesDims.nbDims && dataDims.nbDims == updatesDims.nbDims, "Input dimensions to ScatterElements must have the same rank! data rank = " << dataDims.nbDims << ", indices rank = " << indicesDims.nbDims << ", updates rank = " << updatesDims.nbDims << ".", @@ -2070,14 +2081,14 @@ NodeImportResult addScatterLayer(ImporterContext* ctx, ::ONNX_NAMESPACE::NodePro { if (indicesDims.d[i] != -1 && dataDims.d[i] != -1) { - ASSERT_NODE(indicesDims.d[i] <= dataDims.d[i], + ONNXTRT_CHECK_NODE(indicesDims.d[i] <= dataDims.d[i], "Indices dimensions must be less than data dimensions! indices dimension = " << indicesDims.d[i] << ", data dimension = " << dataDims.d[i] << " on index " << i << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } if (updatesDims.d[i] != -1 && dataDims.d[i] != -1) { - ASSERT_NODE(updatesDims.d[i] <= dataDims.d[i], + ONNXTRT_CHECK_NODE(updatesDims.d[i] <= dataDims.d[i], "Updates dimensions must be less than data dimensions! updates dimension = " << updatesDims.d[i] << ", data dimension = " << dataDims.d[i] << " on index " << i << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -2227,7 +2238,7 @@ std::string rebuildEinsumEquation( return equation; } -Status processEllipsisAndImplicitOutput( +void processEllipsisAndImplicitOutput( std::vector const& inputTensors, std::string& equation, bool const withEllipsis) { std::vector inputSubscriptsVec{}; @@ -2281,8 +2292,6 @@ Status processEllipsisAndImplicitOutput( //! Rebuild einsum equation. equation = rebuildEinsumEquation(inputSubscriptsVec, outputSubscripts); - - return Status::success(); } //! Infer hiddent output subscripts when transforming einsum layer with more than 2 inputs into multiple 2-input einsum @@ -2330,7 +2339,8 @@ nvinfer1::IEinsumLayer* parseGraphWithMoreInputs(ImporterContext* ctx, ::ONNX_NA std::string hiddenOutputSubscripts = inferHiddenOutputSubscripts(inputSubscripts); std::string hiddenEquation = rebuildEinsumEquation(inputSubscripts, hiddenOutputSubscripts); - nvinfer1::IEinsumLayer* einsumLayer = N_CHECK(ctx->network()->addEinsum(inputTensors.data(), 2, hiddenEquation.c_str())); + nvinfer1::IEinsumLayer* einsumLayer + = N_CHECK(ctx->network()->addEinsum(inputTensors.data(), 2, hiddenEquation.c_str())); ctx->registerLayer(einsumLayer, node); leftSubscripts = hiddenOutputSubscripts; @@ -2341,7 +2351,8 @@ nvinfer1::IEinsumLayer* parseGraphWithMoreInputs(ImporterContext* ctx, ::ONNX_NA std::vector finalInputTensors{leftInput, inputs[nbInputs - 1]}; std::string finalEquation = rebuildEinsumEquation({leftSubscripts, inputSubscriptsVec[nbInputs - 1]}, outputSubscripts); - nvinfer1::IEinsumLayer* einsumLayer = N_CHECK(ctx->network()->addEinsum(finalInputTensors.data(), 2, finalEquation.c_str())); + nvinfer1::IEinsumLayer* einsumLayer + = N_CHECK(ctx->network()->addEinsum(finalInputTensors.data(), 2, finalEquation.c_str())); ctx->registerLayer(einsumLayer, node); return einsumLayer; @@ -2387,4 +2398,4 @@ nvinfer1::ITensor* windowHelper(ImporterContext* ctx, float numerator, nvinfer1: return N_CHECK(trigOutput); } -} // namespace onnx2trt +} // namespace onnx2trt \ No newline at end of file diff --git a/importerUtils.hpp b/importerUtils.hpp index 73abe9c..4497f44 100644 --- a/importerUtils.hpp +++ b/importerUtils.hpp @@ -9,6 +9,7 @@ #include "ShapeTensor.hpp" #include "ShapedWeights.hpp" #include "Status.hpp" +#include "errorHelpers.hpp" #include "weightUtils.hpp" #include @@ -93,25 +94,25 @@ enum ScaleOp }; // Helper function to import ONNX activation nodes into TRT -NodeImportResult activationHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, +NodeOutputs activationHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, std::vector& inputs, nvinfer1::ActivationType op, float* alpha = nullptr, float* beta = nullptr); // Add clipping to a tensor if clip is a valid value. nvinfer1::ITensor* addClip(ImporterContext* ctx, nvinfer1::ITensor* input, float clip); // Helper function to import ArgMax and ArgMin nodes into TRT -NodeImportResult argMinMaxHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, +NodeOutputs argMinMaxHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, std::vector& inputs, nvinfer1::TopKOperation op); //! If t has rank less than nbDims, reshape it to have nbDims by prepending ones to its dimensions. //! Assert failure if t has rank greater than nbDims. -Status broadcastTensor(ImporterContext* ctx, nvinfer1::ITensor*& t, int const nbDims); +void broadcastTensor(ImporterContext* ctx, nvinfer1::ITensor*& t, int const nbDims); // Helper function to broadcast two tensors to the larger one's shape -Status broadcastTensors(ImporterContext* ctx, nvinfer1::ITensor*& t1, nvinfer1::ITensor*& t2); +void broadcastTensors(ImporterContext* ctx, nvinfer1::ITensor*& t1, nvinfer1::ITensor*& t2); // Helper function to broadcast three tensors to the largest one's shape -Status broadcastTensors(ImporterContext* ctx, nvinfer1::ITensor*& t1, nvinfer1::ITensor*& t2, nvinfer1::ITensor*& t3); +void broadcastTensors(ImporterContext* ctx, nvinfer1::ITensor*& t1, nvinfer1::ITensor*& t2, nvinfer1::ITensor*& t3); // Helper function to calculate the bias tensor for GatherElements. std::vector calculateBias( @@ -130,7 +131,7 @@ nvinfer1::ITensor* castHelper(ImporterContext* ctx, nvinfer1::ITensor* input, nv nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, nvinfer1::ITensor* constant, nvinfer1::ITensor* shape); // Helper function to convert an ONNX axis into a TRT axis -Status convertAxis(int32_t& axis, int32_t const nbDims, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx); +void convertAxis(int32_t& axis, int32_t const nbDims, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx); // Helper function to convert an ONNX datatype into a TRT datatype bool convertDtype(int32_t onnx_dtype, nvinfer1::DataType* trt_dtype); @@ -149,7 +150,7 @@ onnx2trt::ShapedWeights createZeroShifts(onnx2trt::ShapedWeights const& shiftInt nvinfer1::ITensor* createZeroTensor(ImporterContext* ctx, nvinfer1::ITensor* data); // Helper function to convert multi input convolution -NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, +NodeOutputs convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, std::vector& inputs); // Helper function to convert a 1D tensor into a scalar @@ -165,11 +166,11 @@ nvinfer1::ITensor* convertToScalar(TensorOrWeights& input, ImporterContext* ctx) int divCeil(int n, int d); // Helper function to check that the input data types for an elementwise operation are supported -Status elementwiseCheck(std::vector const& inputs, const nvinfer1::ElementWiseOperation op, +void elementwiseCheck(std::vector const& inputs, const nvinfer1::ElementWiseOperation op, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx); // Helper function to import an ONNX elementwise op into TRT -NodeImportResult elementwiseHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, +NodeOutputs elementwiseHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, std::vector const& inputs, nvinfer1::ElementWiseOperation binary_op); // Helper function to flatten a tensor on a given axis @@ -178,7 +179,8 @@ nvinfer1::ITensor* flattenTensor(ImporterContext* ctx, ::ONNX_NAMESPACE::NodePro // Slice out the specified dimension from a shape tensor. e.g. extractDimension(shape=(7, 6, 5), dim=2) would return 5. // shape specifies the shape of the returned Tensor. Must have a volume of 1. -nvinfer1::ITensor* extractDimension(ImporterContext* ctx, nvinfer1::ITensor* shapeTensor, int32_t dim, nvinfer1::Dims shape); +nvinfer1::ITensor* extractDimension( + ImporterContext* ctx, nvinfer1::ITensor* shapeTensor, int32_t dim, nvinfer1::Dims shape); // Helper function to generate padding values for convTranspose void generatePadding(nvinfer1::Dims inputShape, nvinfer1::Dims outputShape, nvinfer1::Dims kernelSize, @@ -203,10 +205,10 @@ nvinfer1::ITensor* getElementWiseResult( nvinfer1::ITensor* getUnaryResult(ImporterContext* ctx, nvinfer1::ITensor& input, nvinfer1::UnaryOperation op); // Helper function to get kernel attributes for various ONNX nodes -Status getKernelParams(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, nvinfer1::Dims* kernelSize, - nvinfer1::Dims* strides, nvinfer1::Dims* begPadding, nvinfer1::Dims* endPadding, - nvinfer1::PaddingMode& paddingMode, bool& countExcludePadding, nvinfer1::Dims* dilations = nullptr, - nvinfer1::Dims* outputPadding = nullptr, bool const poolingCeilMode = false); +void getKernelParams(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, nvinfer1::Dims* kernelSize, + nvinfer1::Dims* strides, nvinfer1::Dims* begPadding, nvinfer1::Dims* endPadding, nvinfer1::PaddingMode& paddingMode, + bool& countExcludePadding, nvinfer1::Dims* dilations = nullptr, nvinfer1::Dims* outputPadding = nullptr, + bool const poolingCeilMode = false); // Helper function to get the scaling mode for TRT's scale layer nvinfer1::ScaleMode getScaleMode(nvinfer1::Dims const& weights_shape, nvinfer1::Dims const& tensor_shape); @@ -220,18 +222,18 @@ nvinfer1::ITensor* globalPoolingHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::N // Helper function to create a greaterOrEqual or lessOrEqual operation. Provide `greater=true` for greaterOrEqual, // `greater=false` for lessOrEqual -NodeImportResult greaterLessOrEqual(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, - size_t const nodeIdx, nvinfer1::ITensor* inputA, nvinfer1::ITensor* inputB, bool greater); +NodeOutputs greaterLessOrEqual(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, + nvinfer1::ITensor* inputA, nvinfer1::ITensor* inputB, bool greater); // Helper function to determine if a shape contains dynamic dimensions bool isDynamic(nvinfer1::Dims const& shape); // Helper function to use modulatedDeformableConv2D plugin -NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, +NodeOutputs modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, std::vector& inputs); // Helper function to use optimized 3D instanceNorm plugin -NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, +NodeOutputs instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, std::vector& inputs); // Helper fucntion to create an iota fill given a set of dimensions and an axis @@ -256,19 +258,19 @@ TensorOrWeights identity(ImporterContext* ctx, TensorOrWeights input); nvinfer1::Dims makeDims(int nbDims, int val); // Helper function to create normalization layers for GroupNorm and InstanceNorm -NodeImportResult normalizationHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, - size_t const nodeIdx, std::vector& inputs); +NodeOutputs normalizationHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, + std::vector& inputs); // Given a list of axes in the range of [-rank, rank-1], where rank is the rank // of the corresponding data tensor, normalize to [0, rank-1]. -Status normalizeAxes(ShapeTensor& axes, int32_t const rank); +void normalizeAxes(ShapeTensor& axes, int32_t const rank); // Helper function to parse activation values for LSTM nodes std::vector parseLSTMActivationValues(std::vector const& activationTypes, std::vector const& activationValues, bool isAlpha); // Helper function to map various ONNX pooling ops into TensorRT. -NodeImportResult poolingHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, +NodeOutputs poolingHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, std::vector& inputs, nvinfer1::PoolingType type); // Helper function to check if reduce op equals No-op @@ -276,20 +278,20 @@ bool IsReduceNoOp( ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector const& inputs); // Helper function to import reduce ops into TRT -NodeImportResult reduceTensor(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, +NodeOutputs reduceTensor(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, TensorOrWeights input, nvinfer1::ReduceOperation operation, TensorOrWeights inputAxes = TensorOrWeights()); // Helper function to shape a Tensor given a new shape nvinfer1::ITensor* reshapeTensor(ImporterContext* ctx, nvinfer1::ITensor& tensor, nvinfer1::Dims shape); // Helper function to map attributes to a TRT scale layer -NodeImportResult scaleHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, +NodeOutputs scaleHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, nvinfer1::ITensor& tensor_, nvinfer1::ScaleMode mode, nvinfer1::Weights const& shift, nvinfer1::Weights const& scale, nvinfer1::Weights const& power, char const* shiftName, char const* scaleName); // Helper function to set an ONNX attribute -void setAttr( - nvinfer1::Dims* trtAttr, ::ONNX_NAMESPACE::AttributeProto const* onnxAttr, int32_t nbSpatialDims, int32_t defaultVal); +void setAttr(nvinfer1::Dims* trtAttr, ::ONNX_NAMESPACE::AttributeProto const* onnxAttr, int32_t nbSpatialDims, + int32_t defaultVal); // Helper function to slice away elements on a given axis dimension nvinfer1::ITensor* sliceAcrossAxis( @@ -306,7 +308,7 @@ nvinfer1::ITensor* transposeTensor(ImporterContext* ctx, const ::ONNX_NAMESPACE: ::ONNX_NAMESPACE::TensorProto_DataType trtDataTypeToONNX(nvinfer1::DataType dt); // Helper function to import ONNX unary ops into TRT -NodeImportResult unaryHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, +NodeOutputs unaryHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, TensorOrWeights& input, nvinfer1::UnaryOperation op); // Helper function to unsqueeze tensors on a given set of axes @@ -319,10 +321,10 @@ nvinfer1::ITensor* resizeShapeTensor(ImporterContext* ctx, nvinfer1::ITensor& in // Helper function to convert a ShapedWeights object into a vector template -Status weightsToVector(TensorOrWeights weights, std::vector* weightVector) +void weightsToVector(TensorOrWeights weights, std::vector* weightVector) { - ASSERT(weights.is_weights(), ErrorCode::kUNSUPPORTED_NODE); - ASSERT((weights.weights().type == ::ONNX_NAMESPACE::TensorProto::INT32) + ONNXTRT_CHECK(weights.is_weights(), ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK((weights.weights().type == ::ONNX_NAMESPACE::TensorProto::INT32) || (weights.weights().type == ::ONNX_NAMESPACE::TensorProto::INT64) || (weights.weights().type == ::ONNX_NAMESPACE::TensorProto::BOOL) || (weights.weights().type == ::ONNX_NAMESPACE::TensorProto::FLOAT), @@ -348,11 +350,10 @@ Status weightsToVector(TensorOrWeights weights, std::vector* weightV auto array_start = static_cast(weights.weights().values); std::copy(array_start, array_start + weights.weights().count(), weightVector->begin()); } - return Status(ErrorCode::kSUCCESS); } -NodeImportResult staticSliceImporter(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, - size_t const nodeIdx, std::vector& inputs, nvinfer1::ITensor& data); +NodeOutputs staticSliceImporter(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, + std::vector& inputs, nvinfer1::ITensor& data); // Helper function to convert ONNX node name. If no node name, using name of first output. std::string const getNodeName(::ONNX_NAMESPACE::NodeProto const& node); @@ -375,7 +376,7 @@ nvinfer1::ITensor* addSoftmax( ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, nvinfer1::ITensor& input); //! Helper function to import ONNX scatter nodes into TRT -NodeImportResult addScatterLayer(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, +NodeOutputs addScatterLayer(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx, std::vector& inputs, nvinfer1::ScatterMode mode, int32_t axis = 0); //! Helper function to calculate mod(A, B), A & B are integers @@ -408,6 +409,9 @@ class NameScope Status notInvalidType(TensorOrWeights const& input, std::vector const& invalidTypes, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx); +void checkNotInvalidType(TensorOrWeights const& input, std::vector const& invalidTypes, + ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx); + void processMetadata(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, nvinfer1::ILayer* layer); //! Helper function to process ellipsis and implicit output in Einsum @@ -422,7 +426,7 @@ void processMetadata(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& no //! 3. replace ellipsis with new subscripts for each input/output string when the equation contains ellipsis; //! 4. rebuild the einsum equation string with explicit output. //! -Status processEllipsisAndImplicitOutput( +void processEllipsisAndImplicitOutput( std::vector const& inputTensors, std::string& equation, bool const withEllipsis); //! Helper function to parse the Einsum layer with more than 2 inputs as a graph with multiple 2-input Einsum layers. diff --git a/onnxOpCheckers.cpp b/onnxOpCheckers.cpp index 103df3a..5d5d482 100644 --- a/onnxOpCheckers.cpp +++ b/onnxOpCheckers.cpp @@ -976,10 +976,7 @@ DEFINE_OP_CHECKER(RegexFullMatch) STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex); } -DEFINE_OP_CHECKER(STFT) -{ - STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex); -} +DEFINE_OP_EMPTY_CHECKER(STFT) DEFINE_OP_CHECKER(SequenceAt) { diff --git a/onnxOpImporters.cpp b/onnxOpImporters.cpp index 45d19f1..27d711f 100644 --- a/onnxOpImporters.cpp +++ b/onnxOpImporters.cpp @@ -17,6 +17,7 @@ #include "RNNHelpers.hpp" #include "ShapeTensor.hpp" #include "bfloat16.hpp" +#include "errorHelpers.hpp" #include "half.h" #include "importerUtils.hpp" #include "onnxOpImporters.hpp" @@ -27,6 +28,7 @@ #include #include #include // For std::iota +#include #include #include @@ -60,22 +62,22 @@ using nvinfer1::DataType; } #define DECLARE_BUILTIN_OP_IMPORTER(op) \ - NodeImportResult import##op(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, \ + NodeOutputs import##op(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, \ std::vector& inputs) #define DEFINE_BUILTIN_OP_IMPORTER(op) \ - NodeImportResult import##op(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, \ + NodeOutputs import##op(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, \ std::vector& inputs); \ static bool const op##_registered_builtin_op = registerBuiltinOpImporter(#op, import##op); \ IGNORE_UNUSED_GLOBAL(op##_registered_builtin_op); \ - NodeImportResult import##op(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, \ + NodeOutputs import##op(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, \ std::vector& inputs) #define RETURN_FIRST_OUTPUT(layer, node, nodeIdx) \ do \ { \ nvinfer1::ILayer* layer_ptr = layer; \ - ASSERT_NODE(layer_ptr, "Input layer is null.", node, nodeIdx, ErrorCode::kINVALID_NODE); \ + ONNXTRT_CHECK_NODE(layer_ptr, "Input layer is null.", node, nodeIdx, ErrorCode::kINVALID_NODE); \ auto* output = N_CHECK(layer->getOutput(0)); \ return {{output}}; \ } while (0) @@ -84,7 +86,7 @@ using nvinfer1::DataType; do \ { \ TensorOrWeights output = identity(ctx, input); \ - ASSERT_NODE(output, "Failed to add an identity layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); \ + ONNXTRT_CHECK_NODE(output, "Failed to add an identity layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); \ return {{output}}; \ } while (0) @@ -92,7 +94,7 @@ using nvinfer1::DataType; do \ { \ nvinfer1::ILayer* layer_ptr = layer; \ - ASSERT_NODE(layer_ptr, "The input layer is null.", node, nodeIdx, ErrorCode::kINVALID_NODE); \ + ONNXTRT_CHECK_NODE(layer_ptr, "The input layer is null.", node, nodeIdx, ErrorCode::kINVALID_NODE); \ std::vector outputs; \ for (int i = 0; i < layer_ptr->getNbOutputs(); ++i) \ outputs.push_back(N_CHECK(layer_ptr->getOutput(i))); \ @@ -198,7 +200,7 @@ DEFINE_BUILTIN_OP_IMPORTER(AveragePool) return poolingHelper(ctx, node, nodeIdx, inputs, nvinfer1::PoolingType::kAVERAGE); } -NodeImportResult batchnormFallback( +NodeOutputs batchnormFallback( ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t nodeIdx, std::vector& inputs) { using eOp = nvinfer1::ElementWiseOperation; @@ -255,7 +257,8 @@ NodeImportResult batchnormFallback( nvinfer1::ITensor* divisor = getUnaryResult(ctx, *getElementWiseResult(ctx, *variance, *epsilon, eOp::kSUM), uOp::kSQRT); nvinfer1::ITensor* dividend = getElementWiseResult(ctx, input, *mean, eOp::kSUB); - auto intermediateResult = getElementWiseResult(ctx, *scale, *getElementWiseResult(ctx, *dividend, *divisor, eOp::kDIV), eOp::kPROD); + auto intermediateResult + = getElementWiseResult(ctx, *scale, *getElementWiseResult(ctx, *dividend, *divisor, eOp::kDIV), eOp::kPROD); nvinfer1::IElementWiseLayer* layer = N_CHECK(ctx->network()->addElementWise(*intermediateResult, *bias, eOp::kSUM)); ctx->registerLayer(layer, node); @@ -264,7 +267,7 @@ NodeImportResult batchnormFallback( } template -NodeImportResult batchnormWeightHelper( +NodeOutputs batchnormWeightHelper( ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t nodeIdx, std::vector& inputs) { auto const scale = inputs.at(1).weights(); @@ -294,7 +297,7 @@ NodeImportResult batchnormWeightHelper( // Validate that all the weights have the same amount of values bool allSame = scale.count() == bias.count() && mean.count() == scale.count() && variance.count() == scale.count() && combinedScale.count() == scale.count() && combinedBias.count() == scale.count(); - ASSERT_NODE( + ONNXTRT_CHECK_NODE( allSame, "Inputs to BatchNormalization must have the same shape!", node, nodeIdx, ErrorCode::kINVALID_NODE); for (int32_t i = 0; i < nbChannels; ++i) @@ -309,13 +312,13 @@ NodeImportResult batchnormWeightHelper( DEFINE_BUILTIN_OP_IMPORTER(BatchNormalization) { - ASSERT_NODE((inputs.at(1).shape().nbDims == 1), "The shape of the scale input must be (C, )", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(1).shape().nbDims == 1), "The shape of the scale input must be (C, )", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((inputs.at(2).shape().nbDims == 1), "The shape of the bias input must be (C, )", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(2).shape().nbDims == 1), "The shape of the bias input must be (C, )", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((inputs.at(3).shape().nbDims == 1), "The shape of the mean input must be (C, )", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(3).shape().nbDims == 1), "The shape of the mean input must be (C, )", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((inputs.at(4).shape().nbDims == 1), "The shape of the var input must be (C, )", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(4).shape().nbDims == 1), "The shape of the var input must be (C, )", node, nodeIdx, ErrorCode::kINVALID_NODE); OnnxAttrs attrs(node, ctx); @@ -344,7 +347,7 @@ DEFINE_BUILTIN_OP_IMPORTER(BatchNormalization) { return batchnormWeightHelper(ctx, node, nodeIdx, inputs); } - ASSERT_NODE(false, "Invalid data type provided for BatchNormalization", node, nodeIdx, + ONNXTRT_CHECK_NODE(false, "Invalid data type provided for BatchNormalization", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_DATATYPE); } @@ -373,7 +376,7 @@ DEFINE_BUILTIN_OP_IMPORTER(BatchNormalization) // Validate that all the weights have the same amount of values bool allSame = scale.count() == bias.count() && mean.count() == scale.count() && variance.count() == scale.count() && combinedScale.count() == scale.count() && combinedBias.count() == scale.count(); - ASSERT_NODE( + ONNXTRT_CHECK_NODE( allSame, "Inputs to BatchNormalization must have the same shape!", node, nodeIdx, ErrorCode::kINVALID_NODE); for (int32_t i = 0; i < nbChannels; ++i) @@ -406,14 +409,14 @@ DEFINE_BUILTIN_OP_IMPORTER(BlackmanWindow) OnnxAttrs attrs(node, ctx); int32_t outputDtype = attrs.get("output_datatype", 1); int32_t periodic = attrs.get("periodic", 1); - ASSERT_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE); constexpr float alpha = 0.42F; constexpr float beta = 0.5F; constexpr float gamma = 0.08F; auto* N = &convertToTensor(inputs.at(0), ctx); - ASSERT_NODE( + ONNXTRT_CHECK_NODE( N->getDimensions().nbDims == 0, "Window length must be a scalar!", node, nodeIdx, ErrorCode::kINVALID_NODE); auto* window = generateWindow(ctx, N); @@ -452,7 +455,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Cast) auto onnxType = attrs.get("to"); DataType newType{DataType::kFLOAT}; LOG_VERBOSE("Casting to type: " << newType); - ASSERT_NODE(convertDtype(onnxType, &newType), "Unsupported cast!", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(convertDtype(onnxType, &newType), "Unsupported cast!", node, nodeIdx, ErrorCode::kINVALID_NODE); // Add the layer. nvinfer1::ICastLayer* layer = N_CHECK(ctx->network()->addCast(tensor, newType)); @@ -484,7 +487,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Celu) using uOp = nvinfer1::UnaryOperation; using eOpInstuctor = std::tuple; - ASSERT_NODE((!inputs.empty()), "Inputs vector is empty.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE((!inputs.empty()), "Inputs vector is empty.", node, nodeIdx, ErrorCode::kINVALID_NODE); OnnxAttrs attrs(node, ctx); TensorOrWeights input = inputs.at(0); float alpha = attrs.get("alpha", 1.0); @@ -516,15 +519,15 @@ DEFINE_BUILTIN_OP_IMPORTER(Celu) auto* tensor_ptr = &convertToTensor(i, ctx); // Broadcast all input tensors to size of maxNbDims - CHECK_STATUS(broadcastTensor(ctx, tensor_ptr, maxNbDims)); - ASSERT_NODE(tensor_ptr->getDimensions().nbDims == maxNbDims, "Failed to broadcast tensors elementwise!", node, - nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + broadcastTensor(ctx, tensor_ptr, maxNbDims); + ONNXTRT_CHECK_NODE(tensor_ptr->getDimensions().nbDims == maxNbDims, "Failed to broadcast tensors elementwise!", + node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); inputTensors.push_back(tensor_ptr); } // Calculate (x/alpha) std::vector tempInputs{newInputs[0], newInputs[3]}; - CHECK_STATUS(elementwiseCheck(tempInputs, eOp::kDIV, node, nodeIdx)); + elementwiseCheck(tempInputs, eOp::kDIV, node, nodeIdx); nvinfer1::ITensor* combined = inputTensors.at(0); auto* divLayer = N_CHECK(ctx->network()->addElementWise(*combined, *inputTensors.at(3), eOp::kDIV)); ctx->registerLayer(divLayer, node); @@ -555,8 +558,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Celu) nvinfer1::ITensor* secondTensor = inputTensors.at(std::get<1>(it)); eOp const op = std::get<2>(it); tempInputs = {firstTensor, secondTensor}; - CHECK_STATUS(elementwiseCheck(tempInputs, op, node, nodeIdx)); - ASSERT_NODE((firstTensor->getDimensions().nbDims == secondTensor->getDimensions().nbDims), + elementwiseCheck(tempInputs, op, node, nodeIdx); + ONNXTRT_CHECK_NODE((firstTensor->getDimensions().nbDims == secondTensor->getDimensions().nbDims), "The rank of operands should be the same adding inputs. First tensor rank is " << firstTensor->getDimensions().nbDims << ", but second tensor rank is " << secondTensor->getDimensions().nbDims << ".", @@ -570,7 +573,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Celu) // Helper function to perform clip through elementwise operations template -NodeImportResult elementwiseClipHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, +NodeOutputs elementwiseClipHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector& inputs, size_t numInputs, int32_t onnxType) { OnnxAttrs attrs(node, ctx); @@ -612,8 +615,8 @@ NodeImportResult elementwiseClipHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::N // Now that we have alphaT and betaT, do the elementwise calculation using eOp = nvinfer1::ElementWiseOperation; - CHECK_STATUS(broadcastTensors(ctx, input, alphaT)); - CHECK_STATUS(broadcastTensors(ctx, input, betaT)); + broadcastTensors(ctx, input, alphaT); + broadcastTensors(ctx, input, betaT); auto* lowerClipLayer = N_CHECK(ctx->network()->addElementWise(*input, *alphaT, eOp::kMAX)); auto* lowerClip = N_CHECK(lowerClipLayer->getOutput(0)); auto* upperClipLayer = N_CHECK(ctx->network()->addElementWise(*lowerClip, *betaT, eOp::kMIN)); @@ -623,7 +626,7 @@ NodeImportResult elementwiseClipHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::N DEFINE_BUILTIN_OP_IMPORTER(Clip) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); // For INT32 and multi-input clips, use elementwise operators instead. size_t numInputs = inputs.size(); bool elementwiseClip = inputs.at(0).isInt32() || inputs.at(0).isInt64(); @@ -634,8 +637,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Clip) if (elementwiseClip) { auto type = convertToTensor(inputs.at(0), ctx).getType(); - ASSERT_NODE((type == DataType::kFLOAT || type == DataType::kHALF || type == DataType::kBF16 - || type == DataType::kINT32 || type == DataType::kINT64), + ONNXTRT_CHECK_NODE((type == DataType::kFLOAT || type == DataType::kHALF || type == DataType::kBF16 + || type == DataType::kINT32 || type == DataType::kINT64), "This version of TensorRT only supports floating-point, INT32, or INT64 inputs for Clip! The current input " "type is " + getTrtDtypeName(type) + ".", @@ -672,7 +675,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Clip) // Handle "min" node input. if (numInputs == 2) { - ASSERT_NODE(inputs.at(1).is_weights(), "Clip min value must be an initializer!", node, nodeIdx, + ONNXTRT_CHECK_NODE(inputs.at(1).is_weights(), "Clip min value must be an initializer!", node, nodeIdx, ErrorCode::kINVALID_NODE); auto min = inputs.at(1).weights(); alpha = getSingleValueAsFloat(min); @@ -683,7 +686,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Clip) // "min" can be optional if "max" is specified. Check for this case here if (!inputs.at(1).isNullTensor()) { - ASSERT_NODE(inputs.at(1).is_weights(), "Clip min value must be an initializer!", node, nodeIdx, + ONNXTRT_CHECK_NODE(inputs.at(1).is_weights(), "Clip min value must be an initializer!", node, nodeIdx, ErrorCode::kINVALID_NODE); auto min = inputs.at(1).weights(); alpha = getSingleValueAsFloat(min); @@ -691,7 +694,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Clip) if (!inputs.at(2).isNullTensor()) { - ASSERT_NODE(inputs.at(2).is_weights(), "Clip max value must be an initializer!", node, nodeIdx, + ONNXTRT_CHECK_NODE(inputs.at(2).is_weights(), "Clip max value must be an initializer!", node, nodeIdx, ErrorCode::kINVALID_NODE); auto max = inputs.at(2).weights(); beta = getSingleValueAsFloat(max); @@ -709,7 +712,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Clip) DEFINE_BUILTIN_OP_IMPORTER(Concat) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); std::vector tensors; for (auto& input : inputs) { @@ -719,7 +722,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Concat) OnnxAttrs attrs(node, ctx); int32_t axis = attrs.get("axis"); int32_t nbDims = inputs.at(0).shape().nbDims; - CHECK_STATUS(convertAxis(axis, nbDims, node, nodeIdx)); + convertAxis(axis, nbDims, node, nodeIdx); auto* layer = N_CHECK(ctx->network()->addConcatenation(tensors.data(), tensors.size())); ctx->registerLayer(layer, node); layer->setAxis(axis); @@ -778,6 +781,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Constant) return {{convertedWeights}}; } } + attrs.get("value"); return {{attrs.get("value")}}; } @@ -809,7 +813,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv) nvinfer1::Dims dims = tensorPtr->getDimensions(); LOG_VERBOSE("Convolution input dimensions: " << dims); - ASSERT_NODE(dims.nbDims >= 0, "TensorRT could not compute output dimensions of Conv", node, nodeIdx, + ONNXTRT_CHECK_NODE(dims.nbDims >= 0, "TensorRT could not compute output dimensions of Conv", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); bool const needToExpandDims = (dims.nbDims == 3); @@ -818,7 +822,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv) // Expand spatial dims from 1D to 2D std::vector axes{3}; tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); - ASSERT_NODE(tensorPtr, "Failed to unsqueeze tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(tensorPtr, "Failed to unsqueeze tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); dims = tensorPtr->getDimensions(); } if (kernelWeights.shape.nbDims == 3) @@ -829,7 +833,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv) int32_t const nbSpatialDims = dims.nbDims - 2; // Check that the number of spatial dimensions and the kernel shape matches up. - ASSERT_NODE((nbSpatialDims == kernelWeights.shape.nbDims - 2), + ONNXTRT_CHECK_NODE((nbSpatialDims == kernelWeights.shape.nbDims - 2), "The number of spatial dimensions and the kernel shape doesn't match up for the Conv operator. Number of " "spatial dimensions = " << nbSpatialDims << ", number of kernel dimensions = " << kernelWeights.shape.nbDims << ".", @@ -845,9 +849,9 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv) { shapedBiasWeights.shape = {1, {1}}; } - ASSERT_NODE((shapedBiasWeights.shape.nbDims == 1), "The bias tensor is required to be 1D.", node, nodeIdx, - ErrorCode::kINVALID_NODE); - ASSERT_NODE((shapedBiasWeights.shape.d[0] == kernelWeights.shape.d[0]), + ONNXTRT_CHECK_NODE((shapedBiasWeights.shape.nbDims == 1), "The bias tensor is required to be 1D.", node, + nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE((shapedBiasWeights.shape.d[0] == kernelWeights.shape.d[0]), "The shape of the bias tensor misaligns with the weight tensor. Shape of bias weights = " << shapedBiasWeights.shape.d[0] << ", shape of kernel weights = " << kernelWeights.shape.d[0] << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -869,12 +873,12 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv) nvinfer1::Dims dilations = makeDims(nbSpatialDims, 1); nvinfer1::PaddingMode paddingMode; bool excludePadding; - CHECK_STATUS(getKernelParams( - ctx, node, &kernelSize, &strides, &begPadding, &endPadding, paddingMode, excludePadding, &dilations)); + getKernelParams( + ctx, node, &kernelSize, &strides, &begPadding, &endPadding, paddingMode, excludePadding, &dilations); for (int32_t i = 1; i <= nbSpatialDims; ++i) { - ASSERT_NODE((kernelSize.d[nbSpatialDims - i] == kernelWeights.shape.d[kernelWeights.shape.nbDims - i]), + ONNXTRT_CHECK_NODE((kernelSize.d[nbSpatialDims - i] == kernelWeights.shape.d[kernelWeights.shape.nbDims - i]), "The size of spatial dimension and the size of kernel shape are not equal for the Conv operator. " "Size of spatial dimensions = " << kernelSize.d[nbSpatialDims - i] @@ -894,7 +898,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv) layer->setDilationNd(dilations); OnnxAttrs attrs(node, ctx); int32_t ngroup = attrs.get("group", 1); - ASSERT_NODE((nchan == -1 || kernelWeights.shape.d[1] * ngroup == nchan), + ONNXTRT_CHECK_NODE((nchan == -1 || kernelWeights.shape.d[1] * ngroup == nchan), "Kernel weight dimension failed to broadcast to input.", node, nodeIdx, ErrorCode::kINVALID_NODE); layer->setNbGroups(ngroup); // Register layer name as well as kernel weights and bias weights (if any) @@ -912,7 +916,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv) // Un-expand spatial dims back to 1D std::vector axes{3}; tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes); - ASSERT_NODE(tensorPtr, "Failed to squeeze tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(tensorPtr, "Failed to squeeze tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } LOG_VERBOSE("Using kernel: " << kernelSize << ", strides: " << strides << ", prepadding: " << begPadding @@ -946,7 +950,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) return false; }; - ASSERT_NODE( + ONNXTRT_CHECK_NODE( inputs.size() >= 2, "deconvolution require at least 2 inputs.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); @@ -957,7 +961,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) nvinfer1::Dims dims = tensorPtr->getDimensions(); // Deconvolution input must be at least 3D and at most 5D. - ASSERT_NODE(dims.nbDims >= 3 && dims.nbDims <= 5, + ONNXTRT_CHECK_NODE(dims.nbDims >= 3 && dims.nbDims <= 5, "Deconvolution input must be at least 3D and at most 5D! The current input is rank " << dims.nbDims << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -969,7 +973,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) int32_t const nbSpatialDims = dims.nbDims - 2; // Check that the number of spatial dimensions and the kernel shape matches up. - ASSERT_NODE((nbSpatialDims == kernelShape.nbDims - 2), + ONNXTRT_CHECK_NODE((nbSpatialDims == kernelShape.nbDims - 2), "The number of spatial dimensions and the kernel shape doesn't match up. Number of spatial dimensions = " << nbSpatialDims << ", number of kernel dimensions = " << kernelShape.nbDims << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -995,10 +999,10 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) { auto shapedBiasWeights = inputs.at(2).weights(); // ONNX requires shapedBiasWeights to be 1D - ASSERT_NODE(shapedBiasWeights.shape.nbDims == 1, + ONNXTRT_CHECK_NODE(shapedBiasWeights.shape.nbDims == 1, "The bias tensor is required to be 1D. Provided bias has rank " << shapedBiasWeights.shape.nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((shapedBiasWeights.shape.d[0] == noutput), + ONNXTRT_CHECK_NODE((shapedBiasWeights.shape.d[0] == noutput), "The number of the bias weights does not align with the number of output maps. Number of bias weights = " << shapedBiasWeights.shape.d[0] << ", number of output maps = " << noutput << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -1016,12 +1020,12 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) kernelSize.d[nbSpatialDims - i] = kernelShape.d[kernelShape.nbDims - i]; } - CHECK_STATUS(getKernelParams(ctx, node, &kernelSize, &strides, &begPadding, &endPadding, paddingMode, - excludePadding, &dilations, &outputPadding)); + getKernelParams(ctx, node, &kernelSize, &strides, &begPadding, &endPadding, paddingMode, excludePadding, &dilations, + &outputPadding); for (int32_t i = 1; i <= nbSpatialDims; ++i) { - ASSERT_NODE((kernelSize.d[nbSpatialDims - i] == kernelShape.d[kernelShape.nbDims - i]), + ONNXTRT_CHECK_NODE((kernelSize.d[nbSpatialDims - i] == kernelShape.d[kernelShape.nbDims - i]), "Attribute kernel_shape misaligns with the dimensions of the weight tensor. Number of spatial dimensions = " << kernelSize.d[nbSpatialDims - i] << ", number of kernel dimensions = " << kernelShape.d[kernelShape.nbDims - i] << ".", @@ -1051,7 +1055,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) for (int32_t i = 0; i < nbSpatialDims; i++) { - ASSERT_NODE(begPadding.d[i] >= 0, + ONNXTRT_CHECK_NODE(begPadding.d[i] >= 0, "TensorRT does not support negative pre-padding in the ConvTranspose operator!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); // Update outputPadding with any negative values in endPadding, and set the corresponding value to 0. @@ -1092,7 +1096,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) // If there is still output padding, remove the bias weights. Bias will be added below. auto* layer = N_CHECK(ctx->network()->addDeconvolutionNd( *tensorPtr, noutput, kernelSize, kernelWeights, hasOutputPadding ? emptyBiasWeights : biasWeights)); - ASSERT_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); layer->setStrideNd(strides); layer->setNbGroups(ngroup); layer->setDilationNd(dilations); @@ -1113,11 +1117,10 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) layer->setPrePadding(begPadding); layer->setPostPadding(endPadding); - LOG_VERBOSE("Running deconvolution with: " - << "\n" - << "Padding mode: " << autoPadMode << "\n" - << "Pre-padding: " << begPadding << "\n" - << "Post-padding: " << endPadding); + LOG_VERBOSE("Running deconvolution with: " << "\n" + << "Padding mode: " << autoPadMode << "\n" + << "Pre-padding: " << begPadding << "\n" + << "Post-padding: " << endPadding); // Register layer, along with refittable kernel weights and bias weights (if any) ctx->registerLayer(layer, node); @@ -1138,9 +1141,10 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) combinePadding.insert(combinePadding.begin(), 0); combinePadding.push_back(outputPadding.d[i]); } - ASSERT_NODE(convertOnnxPadding(ctx, dims.nbDims, combinePadding, start, totalPadding), + ONNXTRT_CHECK_NODE(convertOnnxPadding(ctx, dims.nbDims, combinePadding, start, totalPadding), "Failed to convert padding!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - auto const size = getElementWiseResult(ctx, shapeOf(*tensorPtr).tensor(ctx), *totalPadding, nvinfer1::ElementWiseOperation::kSUM); + auto const size = getElementWiseResult( + ctx, shapeOf(*tensorPtr).tensor(ctx), *totalPadding, nvinfer1::ElementWiseOperation::kSUM); auto const stride = makeDims(dims.nbDims, 1); auto const& dummy = stride; auto* sliceLayer = N_CHECK(ctx->network()->addSlice(*tensorPtr, dummy, dummy, stride)); @@ -1156,7 +1160,8 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) auto constantDims = makeDims(dims.nbDims, 1); constantDims.d[dims.nbDims - nbSpatialDims - 1] = biasWeights.count; auto biasConstant = N_CHECK(ctx->network()->addConstant(constantDims, biasWeights)); - tensorPtr = getElementWiseResult(ctx, *tensorPtr, *N_CHECK(biasConstant->getOutput(0)), nvinfer1::ElementWiseOperation::kSUM); + tensorPtr = getElementWiseResult( + ctx, *tensorPtr, *N_CHECK(biasConstant->getOutput(0)), nvinfer1::ElementWiseOperation::kSUM); } } @@ -1169,7 +1174,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) { std::vector axes{3}; tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes); - ASSERT_NODE(tensorPtr, "Failed to squeeze tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(tensorPtr, "Failed to squeeze tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } return {{tensorPtr}}; @@ -1197,7 +1202,7 @@ DEFINE_BUILTIN_OP_IMPORTER(CumSum) assertIsWeights(inputs.at(1), "Axis input for CumSum must be an initializer!"); ShapedWeights axisWeights = inputs.at(1).weights(); int32_t axis = static_cast(axisWeights.values)[0]; - CHECK_STATUS(convertAxis(axis, dims.nbDims, node, nodeIdx)); + convertAxis(axis, dims.nbDims, node, nodeIdx); // Create "inputSliced" tensor that is sliced on dimension[axis] to length 1 auto inputSliced = sliceAcrossAxis(ctx, node, input, axis); @@ -1255,7 +1260,8 @@ DEFINE_BUILTIN_OP_IMPORTER(CumSum) auto runningSum = loop->addRecurrence(*zeroTensor); auto runningSumTensor = N_CHECK(runningSum->getOutput(0)); - auto curSum = N_CHECK(ctx->network()->addElementWise(*data, *runningSumTensor, nvinfer1::ElementWiseOperation::kSUM)); + auto curSum + = N_CHECK(ctx->network()->addElementWise(*data, *runningSumTensor, nvinfer1::ElementWiseOperation::kSUM)); auto* curSumOutput = N_CHECK(curSum->getOutput(0)); runningSum->setInput(1, *curSumOutput); @@ -1272,11 +1278,11 @@ DEFINE_BUILTIN_OP_IMPORTER(DeformConv) auto weightDataType = inputs.at(1).getDataType(); auto offsetDataType = inputs.at(2).getDataType(); - ASSERT_NODE((inputDataType == DataType::kFLOAT || inputDataType == DataType::kHALF), + ONNXTRT_CHECK_NODE((inputDataType == DataType::kFLOAT || inputDataType == DataType::kHALF), "Inputs must be either FLOAT or FLOAT16. Input type is " + getTrtDtypeName(inputDataType) + ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((inputDataType == weightDataType && inputDataType == offsetDataType), + ONNXTRT_CHECK_NODE((inputDataType == weightDataType && inputDataType == offsetDataType), "Inputs must be either all FLOAT or all FLOAT16. Input type = " + getTrtDtypeName(inputDataType) + ", weight type = " + getTrtDtypeName(weightDataType) + ", offset type = " + getTrtDtypeName(offsetDataType) + ".", @@ -1285,7 +1291,7 @@ DEFINE_BUILTIN_OP_IMPORTER(DeformConv) if (inputs.size() > 3) { auto biasDataType = inputs.at(3).getDataType(); - ASSERT_NODE((inputDataType == biasDataType), + ONNXTRT_CHECK_NODE((inputDataType == biasDataType), "Inputs must be either all FLOAT or all FLOAT16. Input type = " + getTrtDtypeName(inputDataType) + ", bias type = " + getTrtDtypeName(biasDataType) + ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -1294,7 +1300,7 @@ DEFINE_BUILTIN_OP_IMPORTER(DeformConv) if (inputs.size() > 4) { auto maskDataType = inputs.at(4).getDataType(); - ASSERT_NODE((inputDataType == maskDataType), + ONNXTRT_CHECK_NODE((inputDataType == maskDataType), "Inputs must be either all FLOAT or all FLOAT16. Input type = " + getTrtDtypeName(inputDataType) + ", mask type = " + getTrtDtypeName(maskDataType) + ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -1305,9 +1311,9 @@ DEFINE_BUILTIN_OP_IMPORTER(DeformConv) DEFINE_BUILTIN_OP_IMPORTER(DepthToSpace) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"BOOL", "UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"BOOL", "UINT8"}, node, nodeIdx); // Input tensor is in NCHW format - ASSERT_NODE((inputs.at(0).shape().nbDims == 4), "The input tensor must be in NCHW format.", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(0).shape().nbDims == 4), "The input tensor must be in NCHW format.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); @@ -1404,15 +1410,15 @@ ShapedWeights getWeightsFromIdentityOrConstant(nvinfer1::INetworkDefinition& net } // This is a helper function for QuantizeLinear/DequantizeLinear -NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t nodeIdx, +NodeOutputs QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t nodeIdx, std::vector& inputs, bool isDQ, bool isCustomOp, DataType customOpType = DataType::kFP8) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); // For QuantizeLinear, the output type (and thus quantization type) is dependent on the second input (zero point). if (!isDQ && inputs.size() >= 3) { - CHECK_STATUS(notInvalidType(inputs.at(2), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(2), {"UINT8"}, node, nodeIdx); } auto addConstantLayer = [ctx, node](nvinfer1::INetworkDefinition& network, ShapedWeights const& weights) -> nvinfer1::ITensor* { @@ -1427,7 +1433,7 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE }; // Read the optional quantization axis attribute. Set it to the rank of the input tensor if not provided - ASSERT_NODE((inputs.size() >= 2), + ONNXTRT_CHECK_NODE((inputs.size() >= 2), "This version of TensorRT requires at least 2 inputs for the QuantizeLinear/DequantizeLinear operator.", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); @@ -1441,7 +1447,7 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE { // Scale is concrete so verify it now. auto scale = inputs.at(1).weights(); - ASSERT_NODE( + ONNXTRT_CHECK_NODE( scale.count() > 0, "Cannot have scale with no coefficients.", node, nodeIdx, ErrorCode::kINVALID_NODE); bool scaleAllPositive = false; @@ -1461,7 +1467,7 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE auto const* scaleVal = static_cast(scale.values); scaleAllPositive = std::all_of(scaleVal, scaleVal + scale.count(), [](BFloat16 x) { return x > 0; }); } - ASSERT_NODE( + ONNXTRT_CHECK_NODE( scaleAllPositive, "Scale coefficients must all be positive", node, nodeIdx, ErrorCode::kINVALID_NODE); // If the scale is concrete weights, then add a ConstantLayer that will be an input which @@ -1476,7 +1482,7 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE auto const& scaleDims = scaleInput->getDimensions(); auto const& scaleType = scaleInput->getType(); - ASSERT_NODE(!isDynamic(scaleDims), "Dynamic shape for scale tensor is not supported.", node, nodeIdx, + ONNXTRT_CHECK_NODE(!isDynamic(scaleDims), "Dynamic shape for scale tensor is not supported.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_DYNAMIC); auto const& scaleSize = volume(scaleDims); @@ -1485,7 +1491,7 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE // ONNX default is UINT8, TRT will default to INT8 as TRT doesn't allow UINT8 quantization // When importing CustomOp FP8/INT4 Q/DQ, default to FP8/INT4 DataType chosenDataType = isCustomOp ? customOpType : DataType::kINT8; - ASSERT_NODE(!isCustomOp || customOpType == DataType::kFP8 || customOpType == DataType::kINT4, + ONNXTRT_CHECK_NODE(!isCustomOp || customOpType == DataType::kFP8 || customOpType == DataType::kINT4, "Custom QDQ ops are available only for FP8 and INT4", node, nodeIdx, ErrorCode::kINTERNAL_ERROR); OnnxAttrs attrs(node, ctx); @@ -1495,7 +1501,7 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE if (isOutputDtypeSet) { isOutputDtypeSet = convertDtype(outputDTypeOnnx, &outputDtype); - ASSERT_NODE(isOutputDtypeSet, + ONNXTRT_CHECK_NODE(isOutputDtypeSet, "Attribute output_dtype specifies an unsupported data type " << outputDtype << ".", node, nodeIdx, nvonnxparser::ErrorCode::kUNSUPPORTED_NODE); } @@ -1504,7 +1510,7 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE { // ONNX spec definition is that when zero point is set, use its datatype for quantization DataType zeroPointDataType = inputs.at(2).getDataType(); - ASSERT_NODE(!isOutputDtypeSet || outputDtype == zeroPointDataType, + ONNXTRT_CHECK_NODE(!isOutputDtypeSet || outputDtype == zeroPointDataType, "Mismatch between attribute output_dtype " << outputDtype << " and zero-point data type " << zeroPointDataType << ".", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); @@ -1537,7 +1543,7 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE else { zeroPoint = zeroPtInput.weights(); - ASSERT_NODE(zeroPoint.values, + ONNXTRT_CHECK_NODE(zeroPoint.values, "QuantizeLinear/DequantizeLinear operator must contains all zeros values.", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); } @@ -1549,7 +1555,7 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE else { // Create new constant for zero input. - ASSERT_NODE(shiftIsAllZeros(zeroPoint), + ONNXTRT_CHECK_NODE(shiftIsAllZeros(zeroPoint), "TensorRT only supports symmetric quantization. The zero point for the " "QuantizeLinear/DequantizeLinear operator must be all zeros.", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); @@ -1564,7 +1570,7 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE { auto const zeroPointSize = volume(zeroPointInput->getDimensions()); // ONNX may represent a scalar using either 0-D or 1-D, so compare sizes instead of shapes. - ASSERT_NODE(zeroPointSize == scaleSize, + ONNXTRT_CHECK_NODE(zeroPointSize == scaleSize, "The scale and zero point must have the same volume. Size of zero point = " << zeroPointSize << ", size of the scale = " << scaleSize << ".", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); @@ -1573,14 +1579,15 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE } else if (isOutputDtypeSet) { - ASSERT_NODE(outputDtype == DataType::kFP8 || outputDtype == DataType::kINT8 || outputDtype == DataType::kINT4, + ONNXTRT_CHECK_NODE( + outputDtype == DataType::kFP8 || outputDtype == DataType::kINT8 || outputDtype == DataType::kINT4, "Attribute output_dtype specifies an invalid data type " << outputDtype << ".", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); chosenDataType = outputDtype; } int32_t axis = attrs.get("axis", inputDims.nbDims); - CHECK_STATUS(convertAxis(axis, inputDims.nbDims, node, nodeIdx)); + convertAxis(axis, inputDims.nbDims, node, nodeIdx); if (scaleSize != 1) { @@ -1595,7 +1602,7 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE { // Ensure that number of scale-coefficients is equal to the number of output channels. int64_t const K = dataInput->getDimensions().d[axis]; - ASSERT_NODE(K == scaleSize, + ONNXTRT_CHECK_NODE(K == scaleSize, "The number of scales is not equal to the number of output channels. Number of output channels = " << K << ", number of scales = " << scaleSize << ".", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); @@ -1611,12 +1618,12 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE inputDims.d, inputDims.d + rank, scaleDims.d, blockDims.begin(), std::divides()); auto equals_one = [](int32_t i) { return i == 1; }; - ASSERT_NODE(std::count_if(blockDims.begin(), blockDims.end(), equals_one) == rank - 1, + ONNXTRT_CHECK_NODE(std::count_if(blockDims.begin(), blockDims.end(), equals_one) == rank - 1, "Only a single blocking dimension is allowed.", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); auto const inputSize = volume(inputDims); - ASSERT_NODE(inputSize % scaleSize == 0, + ONNXTRT_CHECK_NODE(inputSize % scaleSize == 0, "Inferred block size is not an integer. Input volume = " << inputSize << ", scale volume = " << scaleSize << ".", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); @@ -1624,8 +1631,8 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE } else { - ASSERT_NODE(false, "Invalid rank for the scale tensor. Rank = " << scaleDims.nbDims << ".", node, nodeIdx, - nvonnxparser::ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(false, "Invalid rank for the scale tensor. Rank = " << scaleDims.nbDims << ".", node, + nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); } } else @@ -1645,13 +1652,13 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE // INT4 requires an even last-dimension due to packing restrictions if (!isDynamic(inputDims)) { - ASSERT_NODE((chosenDataType != DataType::kINT4 || inputDims.d[inputDims.nbDims - 1] % 2 == 0), + ONNXTRT_CHECK_NODE((chosenDataType != DataType::kINT4 || inputDims.d[inputDims.nbDims - 1] % 2 == 0), "Quantization to INT4 is not supported for tensors with an odd last dimension.", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); } nvinfer1::ILayer* layer = nullptr; - ASSERT_NODE( + ONNXTRT_CHECK_NODE( (chosenDataType == DataType::kINT8 || chosenDataType == DataType::kFP8 || chosenDataType == DataType::kINT4), "TensorRT only allows FP8, INT8, and INT4 quantization. The requested quantization type is" + getTrtDtypeName(chosenDataType) + ".", @@ -1769,13 +1776,13 @@ DEFINE_BUILTIN_OP_IMPORTER(Dropout) // We can deal with the cases where training_mode is an initializer. if (ctx->getOpsetVersion() >= 12 && node.input().size() == 3) { - ASSERT_NODE(inputs.at(2).is_weights(), + ONNXTRT_CHECK_NODE(inputs.at(2).is_weights(), "This Version of TensorRT only supports the training_mode input as an initializer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); std::vector trainingMode; - CHECK_STATUS(weightsToVector(inputs.at(2).weights(), &trainingMode)); - ASSERT_NODE(!trainingMode[0], "TensorRT does not support the Dropout operator in training mode.", node, nodeIdx, - ErrorCode::kUNSUPPORTED_NODE); + weightsToVector(inputs.at(2).weights(), &trainingMode); + ONNXTRT_CHECK_NODE(!trainingMode[0], "TensorRT does not support the Dropout operator in training mode.", node, + nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } size_t noutputs = node.output().size(); @@ -1808,11 +1815,11 @@ DEFINE_BUILTIN_OP_IMPORTER(Dropout) DEFINE_BUILTIN_OP_IMPORTER(Einsum) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); OnnxAttrs attrs(node, ctx); std::string equation = attrs.get("equation"); - ASSERT_NODE((!inputs.empty()), "Inputs vector is empty.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE((!inputs.empty()), "Inputs vector is empty.", node, nodeIdx, ErrorCode::kINVALID_NODE); std::vector inputTensors; @@ -1832,7 +1839,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Einsum) if (withEllipsis || nbInputs > 2) { LOG_VERBOSE("Equation before preprocessing ellipsis and output: " << equation); - CHECK_STATUS(processEllipsisAndImplicitOutput(inputTensors, equation, withEllipsis)); + processEllipsisAndImplicitOutput(inputTensors, equation, withEllipsis); LOG_VERBOSE("Equation after preprocessing ellipsis and output: " << equation); } @@ -1874,14 +1881,14 @@ DEFINE_BUILTIN_OP_IMPORTER(Exp) DEFINE_BUILTIN_OP_IMPORTER(Expand) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); // "Broadcast the input tensor following the given shape and the broadcast rule." nvinfer1::ITensor& inputTensor = convertToTensor(inputs.at(0), ctx); auto const inputDims = shapeOf(inputTensor); auto const inputRank = shapeOf(inputDims); // "A 1-D tensor indicates the shape you want to expand to, following the broadcast rule" - ASSERT_NODE((inputs.at(1).shape().nbDims == 1), "The shape tensor is required to be 1D.", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(1).shape().nbDims == 1), "The shape tensor is required to be 1D.", node, nodeIdx, ErrorCode::kINVALID_VALUE); ShapeTensor shape{ctx, inputs.at(1)}; auto const shapeLength = shapeOf(shape); @@ -1909,7 +1916,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Expand) DEFINE_BUILTIN_OP_IMPORTER(EyeLike) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); // Get input node. nvinfer1::ITensor& tensor = convertToTensor(inputs.at(0), ctx); OnnxAttrs attrs(node, ctx); @@ -1917,7 +1924,7 @@ DEFINE_BUILTIN_OP_IMPORTER(EyeLike) // "Only 2D tensors are supported, i.e. input T1 must be of rank 2..." nvinfer1::Dims dims = tensor.getDimensions(); - ASSERT_NODE(dims.nbDims == 2, "Only 2D tensors are supported. Input must be of rank 2.", node, nodeIdx, + ONNXTRT_CHECK_NODE(dims.nbDims == 2, "Only 2D tensors are supported. Input must be of rank 2.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); // The data type can be specified by the 'dtype' argument @@ -1925,12 +1932,13 @@ DEFINE_BUILTIN_OP_IMPORTER(EyeLike) if (attrs.count("dtype")) { auto onnxType = attrs.get("dtype"); - ASSERT_NODE(convertDtype(onnxType, &dtype), "Unsupported cast!", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( + convertDtype(onnxType, &dtype), "Unsupported cast!", node, nodeIdx, ErrorCode::kINVALID_NODE); LOG_VERBOSE("Casting to type: " << dtype); } // Create weights and constant layer - ASSERT_NODE(!isDynamic(dims), "Eyelike does not work for dynamically shaped tensors.", node, nodeIdx, + ONNXTRT_CHECK_NODE(!isDynamic(dims), "Eyelike does not work for dynamically shaped tensors.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); int totalWeights = dims.d[0] * dims.d[1]; std::vector values(totalWeights); @@ -1965,7 +1973,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Flatten) nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); int32_t nbDims = tensorPtr->getDimensions().nbDims; int32_t axis = attrs.get("axis", 1); - CHECK_STATUS(convertAxis(axis, nbDims, node, nodeIdx)); + convertAxis(axis, nbDims, node, nodeIdx); // No-op Flatten: (a, b) => Flatten(axis = 1) => (a, b) // Add identity layer to avoid name mangling of engine bindings @@ -1976,7 +1984,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Flatten) } tensorPtr = flattenTensor(ctx, node, *tensorPtr, axis, true); - ASSERT_NODE(tensorPtr, "Failed to flatten the tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(tensorPtr, "Failed to flatten the tensor.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); return {{tensorPtr}}; } @@ -1987,13 +1995,13 @@ DEFINE_BUILTIN_OP_IMPORTER(Floor) DEFINE_BUILTIN_OP_IMPORTER(Gather) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); nvinfer1::ITensor& data = convertToTensor(inputs.at(0), ctx); nvinfer1::ITensor* indices = &convertToTensor(inputs.at(1), ctx); OnnxAttrs attrs(node, ctx); int32_t axis = attrs.get("axis", 0); int32_t nbDims = inputs.at(0).shape().nbDims; - CHECK_STATUS(convertAxis(axis, nbDims, node, nodeIdx)); + convertAxis(axis, nbDims, node, nodeIdx); LOG_VERBOSE("Using Gather axis: " << axis); if (inputs.at(0).getType() != "INT64" && inputs.at(1).getType() == "INT64") @@ -2008,7 +2016,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Gather) DEFINE_BUILTIN_OP_IMPORTER(GatherElements) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); nvinfer1::ITensor& data = convertToTensor(inputs.at(0), ctx); nvinfer1::ITensor* indices = &convertToTensor(inputs.at(1), ctx); nvinfer1::Dims const& dataDims = data.getDimensions(); @@ -2016,7 +2024,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GatherElements) OnnxAttrs attrs(node, ctx); int32_t axis = attrs.get("axis", 0); int32_t const dataNbDims = dataDims.nbDims; - CHECK_STATUS(convertAxis(axis, dataNbDims, node, nodeIdx)); + convertAxis(axis, dataNbDims, node, nodeIdx); LOG_VERBOSE("Using Gather axis: " << axis); if (inputs.at(0).getType() != "INT64") @@ -2032,7 +2040,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GatherElements) DEFINE_BUILTIN_OP_IMPORTER(GatherND) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); nvinfer1::ITensor& data = convertToTensor(inputs.at(0), ctx); nvinfer1::ITensor* indices = &convertToTensor(inputs.at(1), ctx); @@ -2045,7 +2053,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GatherND) indices = castHelper(ctx, indices, DataType::kINT32); } auto* layer = ctx->network()->addGatherV2(data, *indices, nvinfer1::GatherMode::kND); - ASSERT_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); layer->setNbElementWiseDims(nbElementWiseDims); ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer, node, nodeIdx); @@ -2065,14 +2073,14 @@ DEFINE_BUILTIN_OP_IMPORTER(Gelu) } else { - ASSERT_NODE(false, "Invalid value provided for the Gelu \'approximate\' attribute: " << approximate, node, - nodeIdx, ErrorCode::kUNSUPPORTED_NODE_ATTR); + ONNXTRT_CHECK_NODE(false, "Invalid value provided for the Gelu \'approximate\' attribute: " << approximate, + node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_ATTR); } } DEFINE_BUILTIN_OP_IMPORTER(Gemm) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"INT32", "INT64"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"INT32", "INT64"}, node, nodeIdx); OnnxAttrs attrs(node, ctx); float alpha = attrs.get("alpha", 1.f); float beta = attrs.get("beta", 1.f); @@ -2081,7 +2089,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Gemm) nvinfer1::ITensor& inputA = convertToTensor(inputs.at(0), ctx); nvinfer1::ITensor& inputB = convertToTensor(inputs.at(1), ctx); // Validate inputs - ASSERT_NODE(inputA.getDimensions().nbDims == 2 && inputB.getDimensions().nbDims == 2, + ONNXTRT_CHECK_NODE(inputA.getDimensions().nbDims == 2 && inputB.getDimensions().nbDims == 2, "GEMM must have 2D inputs! inputA has rank " << inputA.getDimensions().nbDims << ", inputB has rank " << inputB.getDimensions().nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -2114,9 +2122,9 @@ DEFINE_BUILTIN_OP_IMPORTER(Gemm) nvinfer1::IConstantLayer* alphaConstant = addConstantScalar(ctx, alpha, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT); nvinfer1::ITensor* alphaConstantTensor = N_CHECK(alphaConstant->getOutput(0)); - CHECK_STATUS(broadcastTensors(ctx, alphaConstantTensor, matmulTensor)); - nvinfer1::IElementWiseLayer* scaledMatmul = N_CHECK(ctx->network()->addElementWise( - *alphaConstantTensor, *matmulTensor, nvinfer1::ElementWiseOperation::kPROD)); + broadcastTensors(ctx, alphaConstantTensor, matmulTensor); + nvinfer1::IElementWiseLayer* scaledMatmul = N_CHECK( + ctx->network()->addElementWise(*alphaConstantTensor, *matmulTensor, nvinfer1::ElementWiseOperation::kPROD)); matmulTensor = N_CHECK(scaledMatmul->getOutput(0)); } @@ -2131,12 +2139,12 @@ DEFINE_BUILTIN_OP_IMPORTER(Gemm) nvinfer1::IConstantLayer* betaConstant = addConstantScalar(ctx, beta, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT); nvinfer1::ITensor* betaConstantTensor = N_CHECK(betaConstant->getOutput(0)); - CHECK_STATUS(broadcastTensors(ctx, betaConstantTensor, biasTensor)); + broadcastTensors(ctx, betaConstantTensor, biasTensor); nvinfer1::IElementWiseLayer* scaledBias = N_CHECK(ctx->network()->addElementWise( *betaConstantTensor, *biasTensor, nvinfer1::ElementWiseOperation::kPROD)); biasTensor = N_CHECK(scaledBias->getOutput(0)); } - CHECK_STATUS(broadcastTensors(ctx, matmulTensor, biasTensor)); + broadcastTensors(ctx, matmulTensor, biasTensor); nvinfer1::IElementWiseLayer* biasAdd = N_CHECK(ctx->network()->addElementWise(*matmulTensor, *biasTensor, nvinfer1::ElementWiseOperation::kSUM)); auto output = N_CHECK(biasAdd->getOutput(0)); @@ -2157,7 +2165,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GlobalLpPool) { auto& tensor = convertToTensor(inputs.at(0), ctx); auto inputType = tensor.getType(); - ASSERT_NODE((inputType == DataType::kFLOAT || inputType == DataType::kHALF), + ONNXTRT_CHECK_NODE((inputType == DataType::kFLOAT || inputType == DataType::kHALF), "Only FLOAT and HALF are supported in GlobalLpPool. The current type = " + getTrtDtypeName(inputType) + ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); nvinfer1::Dims dims = tensor.getDimensions(); @@ -2184,11 +2192,13 @@ DEFINE_BUILTIN_OP_IMPORTER(GlobalLpPool) } // firstPow = pow(x, p) - auto* firstPow = getElementWiseResult(ctx, tensor, *N_CHECK(pLayer->getOutput(0)), nvinfer1::ElementWiseOperation::kPOW); + auto* firstPow + = getElementWiseResult(ctx, tensor, *N_CHECK(pLayer->getOutput(0)), nvinfer1::ElementWiseOperation::kPOW); // reduced = reduce_sum(firstPow) auto* reduced = globalPoolingHelper(ctx, node, *firstPow, nvinfer1::ReduceOperation::kSUM); // finalPow = pow(reduced, 1./p) - auto* finalPow = getElementWiseResult(ctx, *reduced, *N_CHECK(pInvLayer->getOutput(0)), nvinfer1::ElementWiseOperation::kPOW); + auto* finalPow + = getElementWiseResult(ctx, *reduced, *N_CHECK(pInvLayer->getOutput(0)), nvinfer1::ElementWiseOperation::kPOW); return {{finalPow}}; } @@ -2335,11 +2345,10 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) ::ONNX_NAMESPACE::TensorProto::INT32, Dims{1, {3}})->getOutput(0); auto* weightsZRSizeLayer = N_CHECK(net->addConcatenation( - std::array{{numDirectionsTensor, hiddenSizeDoubledTensor, eDimTensor}}.data(), - 3)); + std::array{{numDirectionsTensor, hiddenSizeDoubledTensor, eDimTensor}}.data(), 3)); nvinfer1::ITensor* weightsZRSize = N_CHECK(weightsZRSizeLayer->getOutput(0)); nvinfer1::ISliceLayer* weightsZRLayer = N_CHECK(net->addSlice(weights, Dims{3}, Dims{3}, Dims3{1, 1, 1})); - ASSERT_NODE(weightsZRLayer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(weightsZRLayer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); weightsZRLayer->setInput(1, *weightsZRStart); weightsZRLayer->setInput(2, *weightsZRSize); nvinfer1::ITensor* weightsZR = N_CHECK(weightsZRLayer->getOutput(0)); @@ -2349,7 +2358,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) ::ONNX_NAMESPACE::TensorProto::INT32, Dims{1, {3}})->getOutput(0); auto weightsHSizeLayer = N_CHECK(net->addConcatenation( - std::array{{numDirectionsTensor, hiddenSizeTensor, eDimTensor}}.data(), 3)); + std::array{{numDirectionsTensor, hiddenSizeTensor, eDimTensor}}.data(), 3)); nvinfer1::ITensor* weightsHSize = N_CHECK(weightsHSizeLayer->getOutput(0)); nvinfer1::ISliceLayer* weightsHLayer = N_CHECK(net->addSlice(weights, Dims{3}, Dims{3}, Dims3{1, 1, 1})); weightsHLayer->setInput(1, *weightsHStart); @@ -2357,12 +2366,12 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) nvinfer1::ITensor* weightsH = N_CHECK(weightsHLayer->getOutput(0)); LOG_VERBOSE("Weights for H gate shape is: " << weightsH->getDimensions()); - auto recurrenceWeightsZRLayer = N_CHECK(net->addSlice(recurrenceWeights, Dims3{0, 0, 0}, - Dims3{numDirections, 2 * hiddenSize, hiddenSize}, Dims3{1, 1, 1})); + auto recurrenceWeightsZRLayer = N_CHECK(net->addSlice( + recurrenceWeights, Dims3{0, 0, 0}, Dims3{numDirections, 2 * hiddenSize, hiddenSize}, Dims3{1, 1, 1})); nvinfer1::ITensor* recurrenceWeightsZR = N_CHECK(recurrenceWeightsZRLayer->getOutput(0)); LOG_VERBOSE("Recurrence weights for ZR gates shape is: " << recurrenceWeightsZR->getDimensions()); - auto recurrenceWeightsHLayer = N_CHECK(net->addSlice(recurrenceWeights, Dims3{0, 2 * hiddenSize, 0}, - Dims3{numDirections, hiddenSize, hiddenSize}, Dims3{1, 1, 1})); + auto recurrenceWeightsHLayer = N_CHECK(net->addSlice( + recurrenceWeights, Dims3{0, 2 * hiddenSize, 0}, Dims3{numDirections, hiddenSize, hiddenSize}, Dims3{1, 1, 1})); nvinfer1::ITensor* recurrenceWeightsH = N_CHECK(recurrenceWeightsHLayer->getOutput(0)); LOG_VERBOSE("Recurrence weights for H gate shape is: " << recurrenceWeightsH->getDimensions()); @@ -2382,20 +2391,21 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) unsqueeze->setZeroIsPlaceholder(false); concatenatedBias = N_CHECK(unsqueeze->getOutput(0)); - auto biasZRLayer = N_CHECK(net->addSlice(*concatenatedBias, Dims3{0, 0, 0}, Dims3{numDirections, 1, 2 * hiddenSize}, Dims3{1, 1, 1})); + auto biasZRLayer = N_CHECK( + net->addSlice(*concatenatedBias, Dims3{0, 0, 0}, Dims3{numDirections, 1, 2 * hiddenSize}, Dims3{1, 1, 1})); biasZR = N_CHECK(biasZRLayer->getOutput(0)); LOG_VERBOSE("Bias for ZR gates shape is: " << biasZR->getDimensions()); - auto biasHLayer = N_CHECK(net->addSlice(*concatenatedBias, Dims3{0, 0, 2 * hiddenSize}, Dims3{numDirections, 1, hiddenSize}, - Dims3{1, 1, 1})); + auto biasHLayer = N_CHECK(net->addSlice( + *concatenatedBias, Dims3{0, 0, 2 * hiddenSize}, Dims3{numDirections, 1, hiddenSize}, Dims3{1, 1, 1})); biasH = N_CHECK(biasHLayer->getOutput(0)); LOG_VERBOSE("Bias for H gate shape is: " << biasH->getDimensions()); auto recurrenceBiasZRLayer = N_CHECK(net->addSlice(*concatenatedBias, Dims3{0, 0, NUM_GATES * hiddenSize}, - Dims3{numDirections, 1, 2 * hiddenSize}, Dims3{1, 1, 1})); + Dims3{numDirections, 1, 2 * hiddenSize}, Dims3{1, 1, 1})); recurrenceBiasZR = N_CHECK(recurrenceBiasZRLayer->getOutput(0)); LOG_VERBOSE("Recurrence bias for ZR gates shape is: " << recurrenceBiasZR->getDimensions()); auto recurrenceBiasHLayer = N_CHECK(net->addSlice(*concatenatedBias, Dims3{0, 0, (NUM_GATES + 2) * hiddenSize}, - Dims3{numDirections, 1, hiddenSize}, Dims3{1, 1, 1})); + Dims3{numDirections, 1, hiddenSize}, Dims3{1, 1, 1})); recurrenceBiasH = N_CHECK(recurrenceBiasHLayer->getOutput(0)); LOG_VERBOSE("Recurrence bias for H gate shape is: " << recurrenceBiasH->getDimensions()); } @@ -2480,7 +2490,8 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) auto const isolateGate = [&ctx, &hiddenSize, &gateOutputShape, &net]( nvinfer1::ITensor* gates, int32_t gateIndex) -> nvinfer1::ITensor* { - nvinfer1::ISliceLayer* isolateGate = N_CHECK(net->addSlice(*gates, Dims3{0, 0, 0}, Dims3{0, 0, 0}, Dims3{1, 1, 1})); + nvinfer1::ISliceLayer* isolateGate + = N_CHECK(net->addSlice(*gates, Dims3{0, 0, 0}, Dims3{0, 0, 0}, Dims3{1, 1, 1})); isolateGate->setInput(1, *addConstant(ctx, std::vector{0, 0, gateIndex * hiddenSize}, ::ONNX_NAMESPACE::TensorProto_DataType_INT32, Dims{1, {3}}) @@ -2521,7 +2532,8 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) actInput = getElementWiseResult(ctx, *actInput, *secondSum, eOp::kSUM); } - nvinfer1::IActivationLayer* htLayer = N_CHECK(net->addActivation(*addClip(ctx, actInput, clip), activations.at(1))); + nvinfer1::IActivationLayer* htLayer + = N_CHECK(net->addActivation(*addClip(ctx, actInput, clip), activations.at(1))); htLayer->setAlpha(activationAlphas.at(1)); htLayer->setBeta(activationBetas.at(1)); ht = N_CHECK(htLayer->getOutput(0)); @@ -2547,7 +2559,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) } nvinfer1::IActivationLayer* htLayer = N_CHECK(net->addActivation( *addClip(ctx, getElementWiseResult(ctx, *xtWTH, *rtHtRhRbh, eOp::kSUM), clip), activations.at(1))); - ASSERT_NODE(htLayer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(htLayer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); htLayer->setAlpha(activationAlphas.at(1)); htLayer->setBeta(activationBetas.at(1)); ht = htLayer->getOutput(0); @@ -2558,7 +2570,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) // Constant `1` needs to be the same type as the inputs, either FP16 or FP32. auto* constOne = zt->getType() == nvinfer1::DataType::kHALF ? N_CHECK(addConstantScalar( - ctx, static_cast(1), ::ONNX_NAMESPACE::TensorProto::FLOAT16, Dims3{1, 1, 1}) + ctx, static_cast(1), ::ONNX_NAMESPACE::TensorProto::FLOAT16, Dims3{1, 1, 1}) ->getOutput(0)) : N_CHECK(addConstantScalar(ctx, 1.f, ::ONNX_NAMESPACE::TensorProto::FLOAT, Dims3{1, 1, 1})->getOutput(0)); nvinfer1::ITensor* Ht = getElementWiseResult(ctx, @@ -2566,12 +2578,11 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) *getElementWiseResult(ctx, *zt, *Ht1Output, eOp::kPROD), eOp::kSUM); // singlePassShape = (1, batchSize, hiddenSize) - nvinfer1::ITensor* singlePassShape - = getElementWiseResult(ctx, *gateOutputShape, - *addConstant(ctx, std::vector{numDirections, 1, 1}, - ::ONNX_NAMESPACE::TensorProto_DataType_INT32, nvinfer1::Dims{1, {3}}) - ->getOutput(0), - nvinfer1::ElementWiseOperation::kDIV); + nvinfer1::ITensor* singlePassShape = getElementWiseResult(ctx, *gateOutputShape, + *addConstant(ctx, std::vector{numDirections, 1, 1}, ::ONNX_NAMESPACE::TensorProto_DataType_INT32, + nvinfer1::Dims{1, {3}}) + ->getOutput(0), + nvinfer1::ElementWiseOperation::kDIV); if (inputs.size() > 4 && inputs.at(4)) { nvinfer1::ITensor* seqLens = &convertToTensor(inputs.at(4), ctx); @@ -2616,13 +2627,13 @@ DEFINE_BUILTIN_OP_IMPORTER(HammingWindow) OnnxAttrs attrs(node, ctx); int32_t outputDtype = attrs.get("output_datatype", 1); int32_t periodic = attrs.get("periodic", 1); - ASSERT_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE); constexpr float alpha = 25.F / 46.F; constexpr float beta = 1.F - alpha; auto* N = &convertToTensor(inputs.at(0), ctx); - ASSERT_NODE( + ONNXTRT_CHECK_NODE( N->getDimensions().nbDims == 0, "Window length must be a scalar!", node, nodeIdx, ErrorCode::kINVALID_NODE); auto* window = generateWindow(ctx, N); @@ -2644,8 +2655,8 @@ DEFINE_BUILTIN_OP_IMPORTER(HammingWindow) DEFINE_BUILTIN_OP_IMPORTER(HannWindow) { - /*** - + /*** + Operation returns a window vector, where: Y[n] = sin^2(pi*n / N) @@ -2661,10 +2672,10 @@ DEFINE_BUILTIN_OP_IMPORTER(HannWindow) OnnxAttrs attrs(node, ctx); int32_t outputDtype = attrs.get("output_datatype", 1); int32_t periodic = attrs.get("periodic", 1); - ASSERT_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE); auto* N = &convertToTensor(inputs.at(0), ctx); - ASSERT_NODE( + ONNXTRT_CHECK_NODE( N->getDimensions().nbDims == 0, "Window length must be a scalar!", node, nodeIdx, ErrorCode::kINVALID_NODE); auto* window = generateWindow(ctx, N); @@ -2678,13 +2689,13 @@ DEFINE_BUILTIN_OP_IMPORTER(HannWindow) DEFINE_BUILTIN_OP_IMPORTER(Hardmax) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"INT64", "INT32", "INT8", "UINT8", "BOOL"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"INT64", "INT32", "INT8", "UINT8", "BOOL"}, node, nodeIdx); OnnxAttrs attrs(node, ctx); nvinfer1::ITensor* values = &convertToTensor(inputs.at(0), ctx); auto originalDims = values->getDimensions(); int32_t axis = attrs.get("axis", ctx->getOpsetVersion() < 13 ? 1 : -1); - CHECK_STATUS(convertAxis(axis, originalDims.nbDims, node, nodeIdx)); + convertAxis(axis, originalDims.nbDims, node, nodeIdx); if (ctx->getOpsetVersion() < 13) { // Reshape into 2D tensor @@ -2696,9 +2707,9 @@ DEFINE_BUILTIN_OP_IMPORTER(Hardmax) auto* topKLayer = N_CHECK(ctx->network()->addTopK(*values, nvinfer1::TopKOperation::kMAX, /* k */ 1, axisMask)); auto* squeezedIndices = squeezeTensor(ctx, node, *topKLayer->getOutput(1), {axis}); - auto* zeroOneTensor = N_CHECK(addConstant(ctx, std::vector{0, 1}, ::ONNX_NAMESPACE::TensorProto_DataType_INT32, - nvinfer1::Dims{ - 1, {2}})->getOutput(0)); + auto* zeroOneTensor = N_CHECK(addConstant(ctx, std::vector{0, 1}, + ::ONNX_NAMESPACE::TensorProto_DataType_INT32, + nvinfer1::Dims{1, {2}})->getOutput(0)); auto* depth = getAxisLength(ctx, values, axis, nvinfer1::Dims{0}); auto* oneHotLayer = N_CHECK(ctx->network()->addOneHot(*squeezedIndices, *zeroOneTensor, *depth, axis)); auto* oneHotOutput = N_CHECK(oneHotLayer->getOutput(0)); @@ -2735,7 +2746,7 @@ DEFINE_BUILTIN_OP_IMPORTER(If) ::ONNX_NAMESPACE::GraphProto const& elseGraph = attrs.get<::ONNX_NAMESPACE::GraphProto const&>("else_branch"); // Number of outputs are the same between the two branches. - ASSERT_NODE(thenGraph.output_size() == elseGraph.output_size(), + ONNXTRT_CHECK_NODE(thenGraph.output_size() == elseGraph.output_size(), "then/else subgraphs should have the same number of outputs: then outputs = " << thenGraph.output_size() << ", else outputs = " << elseGraph.output_size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -2753,7 +2764,7 @@ DEFINE_BUILTIN_OP_IMPORTER(If) NameScope nameScope(*ctx); std::vector errors{}; - CHECK_STATUS(onnx2trt::parseGraph(ctx, body, errors)); + onnx2trt::parseGraph(ctx, body, errors); for (int32_t i = 0; i < nbOutputs; i++) { graphOutputs.emplace_back(ctx->tensors().at(body.output(i).name())); @@ -2767,7 +2778,8 @@ DEFINE_BUILTIN_OP_IMPORTER(If) // The `condition` tensor must be a scalar boolean. auto* condTensor = convertToScalar(ctx, &convertToTensor(cond, ctx)); - ASSERT_NODE(condTensor, "Failed to convert the input cond to a scalar.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( + condTensor, "Failed to convert the input cond to a scalar.", node, nodeIdx, ErrorCode::kINVALID_NODE); auto conditional = N_CHECK(ctx->network()->addIfConditional()); conditional->setName(getNodeName(node).c_str()); @@ -2776,15 +2788,15 @@ DEFINE_BUILTIN_OP_IMPORTER(If) std::vector thenLayers, elseLayers; std::vector thenSubgraphTensors; std::vector elseSubgraphTensors; - CHECK_STATUS(importSubgraph(ctx, thenGraph, thenLayers, thenSubgraphTensors)); - CHECK_STATUS(importSubgraph(ctx, elseGraph, elseLayers, elseSubgraphTensors)); + importSubgraph(ctx, thenGraph, thenLayers, thenSubgraphTensors); + importSubgraph(ctx, elseGraph, elseLayers, elseSubgraphTensors); using InputsMap = std::unordered_map; InputsMap inputsMap; - CHECK_STATUS(addIfInputLayers(ctx, conditional, inputsMap, thenLayers)); - CHECK_STATUS(addIfInputLayers(ctx, conditional, inputsMap, elseLayers)); + addIfInputLayers(ctx, conditional, inputsMap, thenLayers); + addIfInputLayers(ctx, conditional, inputsMap, elseLayers); - ASSERT_NODE(thenSubgraphTensors.size() == elseSubgraphTensors.size(), + ONNXTRT_CHECK_NODE(thenSubgraphTensors.size() == elseSubgraphTensors.size(), "Found different number of output tensors in If conditional subgraphs! then outputs = " << thenSubgraphTensors.size() << ", else outputs = " << elseSubgraphTensors.size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -2828,11 +2840,11 @@ DEFINE_BUILTIN_OP_IMPORTER(InstanceNormalization) auto scaleDataType = inputs.at(1).getDataType(); auto biasDataType = inputs.at(2).getDataType(); - ASSERT_NODE((inputDataType == DataType::kFLOAT || inputDataType == DataType::kHALF), + ONNXTRT_CHECK_NODE((inputDataType == DataType::kFLOAT || inputDataType == DataType::kHALF), "Inputs must be either FLOAT or FLOAT16. Input type is " + getTrtDtypeName(inputDataType) + ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((inputDataType == scaleDataType && scaleDataType == biasDataType), + ONNXTRT_CHECK_NODE((inputDataType == scaleDataType && scaleDataType == biasDataType), "Inputs must be either all FLOAT or all FLOAT16. Input type = " + getTrtDtypeName(inputDataType) + ", scale type = " + getTrtDtypeName(scaleDataType) + ", bias type = " + getTrtDtypeName(biasDataType) + ".", @@ -2867,7 +2879,8 @@ DEFINE_BUILTIN_OP_IMPORTER(IsInf) if (detectNegative) { - auto* isNegLayer = N_CHECK(ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kLESS)); + auto* isNegLayer + = N_CHECK(ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kLESS)); auto* isNeg = N_CHECK(isNegLayer->getOutput(0)); auto* isInfLayer = N_CHECK(ctx->network()->addUnary(input, nvinfer1::UnaryOperation::kISINF)); auto* isInf = N_CHECK(isInfLayer->getOutput(0)); @@ -2876,7 +2889,8 @@ DEFINE_BUILTIN_OP_IMPORTER(IsInf) } if (detectPositive) { - auto* isPosLayer = N_CHECK(ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kGREATER)); + auto* isPosLayer + = N_CHECK(ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kGREATER)); auto* isPos = N_CHECK(isPosLayer->getOutput(0)); auto* isInfLayer = N_CHECK(ctx->network()->addUnary(input, nvinfer1::UnaryOperation::kISINF)); auto* isInf = N_CHECK(isInfLayer->getOutput(0)); @@ -2932,7 +2946,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LayerNormalization) convertDtype(attrs.get("stash_type", 1), &computeType); int32_t const nbDims = input->getDimensions().nbDims; - CHECK_STATUS(convertAxis(axis, nbDims, node, nodeIdx)); + convertAxis(axis, nbDims, node, nodeIdx); uint32_t axesMask{0}; // Populate axesMask with axis values @@ -2942,8 +2956,8 @@ DEFINE_BUILTIN_OP_IMPORTER(LayerNormalization) } // Broadcast scale and bias to input size - CHECK_STATUS(broadcastTensors(ctx, input, scale)); - CHECK_STATUS(broadcastTensors(ctx, input, bias)); + broadcastTensors(ctx, input, scale); + broadcastTensors(ctx, input, bias); auto* layer = N_CHECK(ctx->network()->addNormalization(*input, *scale, *bias, axesMask)); layer->setEpsilon(epsilon); @@ -2995,7 +3009,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop) constexpr int32_t NB_DISCARDED_OUTPUTS = 1; // First output is the updated value of the condition, and is ignored by the outer loop node. constexpr int32_t DUMMY_SCAN_OUTPUT_LENGTH = 1024; - ASSERT_NODE((inputs.size() >= 2), + ONNXTRT_CHECK_NODE((inputs.size() >= 2), "The Loop operator requires at least 2 inputs. The current number of inputs = " << inputs.size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); OnnxAttrs attrs(node, ctx); @@ -3024,7 +3038,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop) { tripLimit = convertToScalar(ctx, &convertToTensor(inputs[0], ctx)); tripLimit = castHelper(ctx, tripLimit, DataType::kINT32); - ASSERT_NODE(tripLimit, "Failed to convert the trip-count input to a scalar.", node, nodeIdx, + ONNXTRT_CHECK_NODE(tripLimit, "Failed to convert the trip-count input to a scalar.", node, nodeIdx, ErrorCode::kINVALID_NODE); ctx->loopTensors()[body.input(0).name()] = node.input(0); loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT); @@ -3037,7 +3051,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop) if (inputs[1]) { cond = convertToScalar(ctx, &convertToTensor(inputs[1], ctx)); - ASSERT_NODE(cond, "Failed to convert the input cond to a scalar.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( + cond, "Failed to convert the input cond to a scalar.", node, nodeIdx, ErrorCode::kINVALID_NODE); ctx->loopTensors()[body.input(1).name()] = node.input(1); ctx->registerTensor(cond, body.input(1).name()); } @@ -3053,7 +3068,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop) // Loop body std::vector errors{}; - CHECK_STATUS(onnx2trt::parseGraph(ctx, body, errors)); + onnx2trt::parseGraph(ctx, body, errors); if (cond) { @@ -3092,7 +3107,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop) auto& scanOutput = convertToTensor(ctx->tensors().at(bodyOutputName), ctx); LOG_VERBOSE("For scan output: " << bodyOutputName << ", found matching tensor: " << scanOutput.getName() << ", with shape: " << scanOutput.getDimensions()); - nvinfer1::ILoopOutputLayer* trtScanOut = N_CHECK(loop->addLoopOutput(scanOutput, nvinfer1::LoopOutput::kCONCATENATE, 0)); + nvinfer1::ILoopOutputLayer* trtScanOut + = N_CHECK(loop->addLoopOutput(scanOutput, nvinfer1::LoopOutput::kCONCATENATE, 0)); // If trip limit is set, we can set the loop output to the tripLimit, otherwise, set to some dummy constant // value. // In the latter case, the scan outputs must not be used in the rest of the model. @@ -3104,7 +3120,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop) { trtScanOut->setInput(1, *N_CHECK(addConstantScalar(ctx, DUMMY_SCAN_OUTPUT_LENGTH, ::ONNX_NAMESPACE::TensorProto_DataType_INT32) - ->getOutput(0))); + ->getOutput(0))); } nodeOutputs.emplace_back(N_CHECK(trtScanOut->getOutput(0))); } @@ -3168,9 +3184,9 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM) reshapeBias->setZeroIsPlaceholder(false); auto* reshapeBiasOut = N_CHECK(reshapeBias->getOutput(0)); LOG_VERBOSE("Reshaping bias to: " << reshapeBiasOut->getDimensions()); - auto reduceLayer = N_CHECK(ctx->network() - ->addReduce(*reshapeBiasOut, nvinfer1::ReduceOperation::kSUM, /*axis=*/0b010, - /*keepDimensions=*/true)); + auto reduceLayer + = N_CHECK(ctx->network()->addReduce(*reshapeBiasOut, nvinfer1::ReduceOperation::kSUM, /*axis=*/0b010, + /*keepDimensions=*/true)); combinedBias = N_CHECK(reduceLayer->getOutput(0)); LOG_VERBOSE("After reduction, bias shape is: " << combinedBias->getDimensions()); } @@ -3221,7 +3237,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM) // Add X(t) nvinfer1::ITensor* iterationInput = addRNNInput(ctx, node, loop, inputs, direction); - ASSERT_NODE(iterationInput, "Failed to add RNN input.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(iterationInput, "Failed to add RNN input.", node, nodeIdx, ErrorCode::kINVALID_NODE); // H(t-1) nvinfer1::IRecurrenceLayer* Ht1 = N_CHECK(loop->addRecurrence(*initialHidden)); @@ -3243,8 +3259,8 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM) LOG_VERBOSE("X(t) * W^T -> " << xtWT->getDimensions()); nvinfer1::ITensor* ht1RT = ctx->network() - ->addMatrixMultiply(*Ht1Output, nvinfer1::MatrixOperation::kNONE, - *recurrenceWeights, nvinfer1::MatrixOperation::kTRANSPOSE) + ->addMatrixMultiply(*Ht1Output, nvinfer1::MatrixOperation::kNONE, *recurrenceWeights, + nvinfer1::MatrixOperation::kTRANSPOSE) ->getOutput(0); LOG_VERBOSE("H(t-1) * R^T -> " << ht1RT->getDimensions()); @@ -3259,7 +3275,8 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM) auto const isolateGate = [&ctx, &hiddenSize, &gateOutputShape](nvinfer1::ITensor* gates, int32_t gateIndex) -> nvinfer1::ITensor* { nvinfer1::ISliceLayer* isolate = N_CHECK(ctx->network()->addSlice( - *gates, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{1, 1, 1}));; + *gates, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{1, 1, 1})); + ; isolate->setInput(1, *addConstant(ctx, std::vector{0, 0, gateIndex * hiddenSize}, ::ONNX_NAMESPACE::TensorProto_DataType_INT32, nvinfer1::Dims{1, {3}}) @@ -3283,7 +3300,8 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM) auto* peepholeWeights = unsqueezeTensor(ctx, node, *isolatePeephole->getOutput(0), std::vector{1}); LOG_VERBOSE("Peephole weight for gate: " << gateIndex << " shape: " << peepholeWeights->getDimensions()); - return getElementWiseResult(ctx, *gate, *getElementWiseResult(ctx, *peepholeWeights, *cellState, eOp::kPROD), eOp::kSUM); + return getElementWiseResult( + ctx, *gate, *getElementWiseResult(ctx, *peepholeWeights, *cellState, eOp::kPROD), eOp::kSUM); }; // NOTE: . represents a hadamard product @@ -3329,12 +3347,11 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM) nvinfer1::ITensor* operandFC = getElementWiseResult(ctx, *ftGate, *Ct1Output, eOp::kPROD); nvinfer1::ITensor* Ct = getElementWiseResult(ctx, *operandFC, *operandIC, eOp::kSUM); - nvinfer1::ITensor* singlePassShape - = getElementWiseResult(ctx, *gateOutputShape, - *addConstant(ctx, std::vector{numDirections, 1, 1}, ::ONNX_NAMESPACE::TensorProto_DataType_INT32, - nvinfer1::Dims{1, {3}}) - ->getOutput(0), - eOp::kDIV); + nvinfer1::ITensor* singlePassShape = getElementWiseResult(ctx, *gateOutputShape, + *addConstant(ctx, std::vector{numDirections, 1, 1}, ::ONNX_NAMESPACE::TensorProto_DataType_INT32, + nvinfer1::Dims{1, {3}}) + ->getOutput(0), + eOp::kDIV); if (inputs.size() > 4 && inputs.at(4)) { @@ -3363,7 +3380,8 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM) otGate = N_CHECK(otGateAct->getOutput(0)); // H(t) = o(t) . h(C(t)) - nvinfer1::IActivationLayer* hAct = N_CHECK(ctx->network()->addActivation(*addClip(ctx, Ct, clip), activations.at(2))); + nvinfer1::IActivationLayer* hAct + = N_CHECK(ctx->network()->addActivation(*addClip(ctx, Ct, clip), activations.at(2))); hAct->setAlpha(activationAlphas.at(2)); hAct->setBeta(activationBetas.at(2)); auto hActTensor = N_CHECK(hAct->getOutput(0)); @@ -3409,16 +3427,16 @@ DEFINE_BUILTIN_OP_IMPORTER(LpNormalization) int32_t p = attrs.get("p", 2); int32_t nbDims = input->getDimensions().nbDims; DataType dt = input->getType(); - ASSERT_NODE((dt == DataType::kFLOAT || dt == DataType::kHALF), + ONNXTRT_CHECK_NODE((dt == DataType::kFLOAT || dt == DataType::kHALF), "Only float inputs/outputs supported in LpNormalization. The current data type = " + getTrtDtypeName(dt) + ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - CHECK_STATUS(convertAxis(axis, nbDims, node, nodeIdx)); + convertAxis(axis, nbDims, node, nodeIdx); nvinfer1::ITensor* norm{nullptr}; TensorOrWeights zeros = ctx->createNamedTempWeights(trtDataTypeToONNX(inputType), {0, {}}); nvinfer1::ITensor* zerosTensor = &convertToTensor(zeros, ctx); - CHECK_STATUS(broadcastTensor(ctx, zerosTensor, nbDims)); + broadcastTensor(ctx, zerosTensor, nbDims); if (p == 1) { @@ -3480,7 +3498,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LpPool) int32_t nbSpatialDims = attrs.get("kernel_shape").nbDims; DataType dt = input->getType(); - ASSERT_NODE((dt == DataType::kFLOAT || dt == DataType::kHALF), + ONNXTRT_CHECK_NODE((dt == DataType::kFLOAT || dt == DataType::kHALF), "Only float inputs/outputs supported in LpPool. The current data type = " + getTrtDtypeName(dt) + ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -3491,8 +3509,8 @@ DEFINE_BUILTIN_OP_IMPORTER(LpPool) nvinfer1::PaddingMode paddingMode; bool excludePadding(false); bool ceilMode = static_cast(attrs.get("ceil_mode", 0)); - CHECK_STATUS(getKernelParams(ctx, node, &kernelShape, &strides, &begPadding, &endPadding, paddingMode, - excludePadding, nullptr, nullptr, ceilMode)); + getKernelParams(ctx, node, &kernelShape, &strides, &begPadding, &endPadding, paddingMode, excludePadding, nullptr, + nullptr, ceilMode); nvinfer1::Dims scalarDims = makeDims(nbDims, 1); float kernelSz{1.0F}; @@ -3539,7 +3557,8 @@ DEFINE_BUILTIN_OP_IMPORTER(LpPool) output = N_CHECK(poolLayer->getOutput(0)); // pool_sum = pool_avg(x')*kernel_size - auto* correctedSumLayer = N_CHECK(ctx->network()->addElementWise(*output, *kernelSzLayer->getOutput(0), eOp::kPROD)); + auto* correctedSumLayer + = N_CHECK(ctx->network()->addElementWise(*output, *kernelSzLayer->getOutput(0), eOp::kPROD)); ctx->registerLayer(correctedSumLayer, node); output = correctedSumLayer->getOutput(0); @@ -3556,7 +3575,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LpPool) DEFINE_BUILTIN_OP_IMPORTER(MatMul) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"INT32", "INT64"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"INT32", "INT64"}, node, nodeIdx); nvinfer1::ITensor* inputA = &convertToTensor(inputs.at(0), ctx); nvinfer1::ITensor* inputB = &convertToTensor(inputs.at(1), ctx); @@ -3577,7 +3596,7 @@ DEFINE_BUILTIN_OP_IMPORTER(MatMul) // This is done in broadcast extra dimensions. needSqueezeHead = true; } - CHECK_STATUS(broadcastTensors(ctx, inputA, inputB)); + broadcastTensors(ctx, inputA, inputB); auto const getMatrixOp = [](nvinfer1::ITensor const& input) { return (input.getDimensions().nbDims == 1) ? nvinfer1::MatrixOperation::kVECTOR @@ -3619,8 +3638,8 @@ DEFINE_BUILTIN_OP_IMPORTER(MaxPool) DEFINE_BUILTIN_OP_IMPORTER(Mean) { - std::vector sumResult; - GET_VALUE(elementwiseHelper(ctx, node, nodeIdx, inputs, nvinfer1::ElementWiseOperation::kSUM), &sumResult); + std::vector sumResult + = elementwiseHelper(ctx, node, nodeIdx, inputs, nvinfer1::ElementWiseOperation::kSUM); auto sum_input = sumResult.at(0); nvinfer1::ITensor& sum_tensor = convertToTensor(sum_input, ctx); @@ -3654,7 +3673,7 @@ DEFINE_BUILTIN_OP_IMPORTER(MeanVarianceNormalization) auto const dims = input->getDimensions(); DataType const dt = input->getType(); - ASSERT_NODE((dt == DataType::kFLOAT || dt == DataType::kHALF || dt == DataType::kBF16), + ONNXTRT_CHECK_NODE((dt == DataType::kFLOAT || dt == DataType::kHALF || dt == DataType::kBF16), "Only float32/float16/bfloat16 inputs/outputs supported in MeanVarianceNormalization. The current data type = " + getTrtDtypeName(dt) + ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_DATATYPE); @@ -3666,7 +3685,7 @@ DEFINE_BUILTIN_OP_IMPORTER(MeanVarianceNormalization) for (int32_t axis : axes) { - CHECK_STATUS(convertAxis(axis, dims.nbDims, node, nodeIdx)); + convertAxis(axis, dims.nbDims, node, nodeIdx); axesMask |= 1 << axis; } @@ -3722,18 +3741,18 @@ DEFINE_BUILTIN_OP_IMPORTER(Mul) DEFINE_BUILTIN_OP_IMPORTER(Mod) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); using eOp = nvinfer1::ElementWiseOperation; OnnxAttrs attrs(node, ctx); int32_t const fmod = attrs.get("fmod", 0); nvinfer1::ITensor* input0 = &convertToTensor(inputs.at(0), ctx); nvinfer1::ITensor* input1 = &convertToTensor(inputs.at(1), ctx); - CHECK_STATUS(broadcastTensors(ctx, input0, input1)); + broadcastTensors(ctx, input0, input1); if (fmod == 0) { // fmod = 0, inputs can only be integers - ASSERT_NODE((input0->getType() == DataType::kINT32 || input0->getType() == DataType::kINT64), + ONNXTRT_CHECK_NODE((input0->getType() == DataType::kINT32 || input0->getType() == DataType::kINT64), "The fmod attribute is set to 0. Inputs cannot be of floating point types. The current input type is " + getTrtDtypeName(input0->getType()) + ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_DATATYPE); @@ -3755,21 +3774,18 @@ DEFINE_BUILTIN_OP_IMPORTER(Mod) else { // Calculate input0 / input1 - std::vector divResult; - GET_VALUE(elementwiseHelper(ctx, node, nodeIdx, {input0, input1}, eOp::kDIV), &divResult); + std::vector divResult = elementwiseHelper(ctx, node, nodeIdx, {input0, input1}, eOp::kDIV); auto* divResultTensor = &convertToTensor(divResult.at(0), ctx); // Calculate input0 - (input1 * floor(input0 / input1)) - nvinfer1::IElementWiseLayer* layerWithDivFloor - = modWithFPInputs(ctx, input0, input1, divResultTensor, true); + nvinfer1::IElementWiseLayer* layerWithDivFloor = modWithFPInputs(ctx, input0, input1, divResultTensor, true); // Calculate input0 - (input1 * ceil(input0 / input1)) - nvinfer1::IElementWiseLayer* layerWithDivCeil - = modWithFPInputs(ctx, input0, input1, divResultTensor, false); + nvinfer1::IElementWiseLayer* layerWithDivCeil = modWithFPInputs(ctx, input0, input1, divResultTensor, false); auto* zero = createZeroTensor(ctx, divResultTensor); - std::vector greaterOrEqualResult; - GET_VALUE(greaterLessOrEqual(ctx, node, nodeIdx, divResultTensor, zero, true), &greaterOrEqualResult); + std::vector greaterOrEqualResult + = greaterLessOrEqual(ctx, node, nodeIdx, divResultTensor, zero, true); auto* condition = &convertToTensor(greaterOrEqualResult.at(0), ctx); auto* outputWithDivFloor = layerWithDivFloor->getOutput(0); auto* outputWithDivCeil = layerWithDivCeil->getOutput(0); @@ -3790,19 +3806,19 @@ DEFINE_BUILTIN_OP_IMPORTER(Neg) DEFINE_BUILTIN_OP_IMPORTER(NonMaxSuppression) { // max_output, iou_threshold and score_threshold are optional - ASSERT_NODE(inputs.size() >= 2 && inputs.size() <= 5, + ONNXTRT_CHECK_NODE(inputs.size() >= 2 && inputs.size() <= 5, "The node requires between 2-5 inputs. The actual input size is " << inputs.size() << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); // Input: boxes nvinfer1::ITensor* boxesTensorPtr = &convertToTensor(inputs.at(0), ctx); - ASSERT_NODE(boxesTensorPtr->getDimensions().nbDims == 3, + ONNXTRT_CHECK_NODE(boxesTensorPtr->getDimensions().nbDims == 3, "The boxes tensor must be 3D. The actual rank is " << boxesTensorPtr->getDimensions().nbDims << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); // Input: scores nvinfer1::ITensor* scoresTensorPtr = &convertToTensor(inputs.at(1), ctx); - ASSERT_NODE(scoresTensorPtr->getDimensions().nbDims == 3, + ONNXTRT_CHECK_NODE(scoresTensorPtr->getDimensions().nbDims == 3, "The scores tensor must be 3D. The actual rank is " << scoresTensorPtr->getDimensions().nbDims << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -3824,15 +3840,15 @@ DEFINE_BUILTIN_OP_IMPORTER(NonMaxSuppression) ->addElementWise(*maxOutputBoxesPerClassTensorPtr, *int32Max, nvinfer1::ElementWiseOperation::kMIN) ->getOutput(0); maxOutputBoxesPerClassTensorPtr = castHelper(ctx, maxOutputBoxesPerClassTensorPtr, DataType::kINT32); - ASSERT_NODE(maxOutputBoxesPerClassTensorPtr != nullptr, "The max_output_boxes_per_class tensor must be 0D", - node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(maxOutputBoxesPerClassTensorPtr != nullptr, + "The max_output_boxes_per_class tensor must be 0D", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } else { auto* constantLayer = N_CHECK(ctx->network()->addConstant( nvinfer1::Dims{0, {}}, nvinfer1::Weights{DataType::kINT32, &maxOutputBoxesPerClassDefault, 1})); - ASSERT_NODE(constantLayer != nullptr, "Failed to add in constant for default max_output_boxes_per_class", node, - nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(constantLayer != nullptr, "Failed to add in constant for default max_output_boxes_per_class", + node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); maxOutputBoxesPerClassTensorPtr = N_CHECK(constantLayer->getOutput(0)); } @@ -3840,7 +3856,7 @@ DEFINE_BUILTIN_OP_IMPORTER(NonMaxSuppression) if (inputs.size() >= 4 && !inputs.at(3).isNullTensor()) { iouThresholdTensorPtr = convertToScalar(inputs.at(3), ctx); - ASSERT_NODE(iouThresholdTensorPtr != nullptr, "The iou_threshold tensor must be 0D", node, nodeIdx, + ONNXTRT_CHECK_NODE(iouThresholdTensorPtr != nullptr, "The iou_threshold tensor must be 0D", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } @@ -3848,18 +3864,19 @@ DEFINE_BUILTIN_OP_IMPORTER(NonMaxSuppression) if (inputs.size() >= 5 && !inputs.at(4).isNullTensor()) { scoreThresholdTensorPtr = convertToScalar(inputs.at(4), ctx); - ASSERT_NODE(scoreThresholdTensorPtr != nullptr, "The score_threshold tensor must be 0D", node, nodeIdx, + ONNXTRT_CHECK_NODE(scoreThresholdTensorPtr != nullptr, "The score_threshold tensor must be 0D", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } // Transpose scores tensor from [batch, classes, bounding_boxes] to [batch, bounding_boxes, classes] nvinfer1::Permutation perm{0, 2, 1}; nvinfer1::ITensor* transposedScoresTensorPtr = transposeTensor(ctx, node, *scoresTensorPtr, perm); - ASSERT_NODE(transposedScoresTensorPtr, "Failed to transpose the scores input.", node, nodeIdx, + ONNXTRT_CHECK_NODE(transposedScoresTensorPtr, "Failed to transpose the scores input.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); // Create the NMS layer - auto* layer = N_CHECK(ctx->network()->addNMS(*boxesTensorPtr, *transposedScoresTensorPtr, *maxOutputBoxesPerClassTensorPtr)); + auto* layer = N_CHECK( + ctx->network()->addNMS(*boxesTensorPtr, *transposedScoresTensorPtr, *maxOutputBoxesPerClassTensorPtr)); ctx->registerLayer(layer, node); // Handle the optional threshold inputs @@ -3880,7 +3897,7 @@ DEFINE_BUILTIN_OP_IMPORTER(NonMaxSuppression) case 0: fmt = nvinfer1::BoundingBoxFormat::kCORNER_PAIRS; break; case 1: fmt = nvinfer1::BoundingBoxFormat::kCENTER_SIZES; break; default: - ASSERT_NODE(false, "Invalid value provided for the center_point_box attribute", node, nodeIdx, + ONNXTRT_CHECK_NODE(false, "Invalid value provided for the center_point_box attribute", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_ATTR); } layer->setBoundingBoxFormat(fmt); @@ -3897,11 +3914,12 @@ DEFINE_BUILTIN_OP_IMPORTER(Not) DEFINE_BUILTIN_OP_IMPORTER(OneHot) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); - CHECK_STATUS(notInvalidType(inputs.at(1), {"UINT8"}, node, nodeIdx)); - CHECK_STATUS(notInvalidType(inputs.at(2), {"UINT8"}, node, nodeIdx)); - ASSERT_NODE(node.input_size(), "OneHot must have exactly 3 inputs. Number of inputs = " << node.input_size() << ".", - node, nodeIdx, ErrorCode::kINVALID_NODE); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); + checkNotInvalidType(inputs.at(1), {"UINT8"}, node, nodeIdx); + checkNotInvalidType(inputs.at(2), {"UINT8"}, node, nodeIdx); + ONNXTRT_CHECK_NODE(node.input_size(), + "OneHot must have exactly 3 inputs. Number of inputs = " << node.input_size() << ".", node, nodeIdx, + ErrorCode::kINVALID_NODE); nvinfer1::ITensor* values = &convertToTensor(inputs.at(2), ctx); @@ -3916,12 +3934,12 @@ DEFINE_BUILTIN_OP_IMPORTER(OneHot) depth = castHelper(ctx, depth, DataType::kINT32); } depth = convertToScalar(ctx, depth); - ASSERT_NODE(depth, "Failed to convert the depth to a scalar.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(depth, "Failed to convert the depth to a scalar.", node, nodeIdx, ErrorCode::kINVALID_NODE); OnnxAttrs attrs(node, ctx); auto axis = attrs.get("axis", -1); auto nbDims = indices->getDimensions().nbDims; - CHECK_STATUS(convertAxis(axis, nbDims + 1, node, nodeIdx)); + convertAxis(axis, nbDims + 1, node, nodeIdx); auto* layer = N_CHECK(ctx->network()->addOneHot(*indices, *values, *depth, axis)); ctx->registerLayer(layer, node); @@ -3935,7 +3953,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Or) DEFINE_BUILTIN_OP_IMPORTER(Pad) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); int32_t const nbDims = tensorPtr->getDimensions().nbDims; ShapeTensor const tensorDims = shapeOf(*tensorPtr); @@ -3962,7 +3980,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) // In opset >= 11, padding indicies and values moved from attributes to inputs if (inputs.at(1).is_weights()) { - CHECK_STATUS(weightsToVector(inputs.at(1).weights(), &onnxPadding)); + weightsToVector(inputs.at(1).weights(), &onnxPadding); } if (inputs.size() >= 3 && !inputs.at(2).isNullTensor()) { @@ -3970,8 +3988,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) if (inputs.at(2).is_weights()) { auto const padWeight = inputs.at(2).weights(); - ASSERT_NODE((padWeight.count() == 1), "The input constant_value is required to be a scalar.", node, - nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE((padWeight.count() == 1), "The input constant_value is required to be a scalar.", + node, nodeIdx, ErrorCode::kINVALID_NODE); switch (padWeight.type) { case ::ONNX_NAMESPACE::TensorProto::FLOAT: @@ -4015,7 +4033,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) } // Sanity check. - ASSERT_NODE(static_cast(onnxPadding.size()) == padAxesSize * 2, + ONNXTRT_CHECK_NODE(static_cast(onnxPadding.size()) == padAxesSize * 2, "Length of pads input must be twice the length of axes input.", node, nodeIdx, ErrorCode::kINVALID_NODE); std::vector beginPadsVec(onnxPadding.begin(), onnxPadding.begin() + padAxesSize); @@ -4026,11 +4044,11 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) else { nvinfer1::ITensor* onnxPaddingPtr = &convertToTensor(inputs.at(1), ctx); - ASSERT_NODE((onnxPaddingPtr->getDimensions().nbDims == 1), + ONNXTRT_CHECK_NODE((onnxPaddingPtr->getDimensions().nbDims == 1), "The padding input must be 1D. The rank of padding input = " << onnxPaddingPtr->getDimensions().nbDims << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - ASSERT_NODE(onnxPaddingPtr->getDimensions().d[0] == padAxesSize * 2, + ONNXTRT_CHECK_NODE(onnxPaddingPtr->getDimensions().d[0] == padAxesSize * 2, "pads should be twice the length of input axes i.e. " << 2 * padAxesSize << ", actual length is: " << onnxPaddingPtr->getDimensions().d[0], node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -4053,7 +4071,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) if (padAxes.allValuesKnown()) { // gather() requires indices to be normalized if their values are known - CHECK_STATUS(normalizeAxes(padAxes, nbDims)); + normalizeAxes(padAxes, nbDims); } auto axesDims = gather(ctx, tensorDims, padAxes); ShapeTensor const zeros = similar(ctx, beginPads, 0); @@ -4062,7 +4080,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) ShapeTensor const stride = similar(ctx, start, 1); auto* layer = N_CHECK(addSlice(ctx, *tensorPtr, start, size, stride)); - ASSERT_NODE(layer, "Could not create padding layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Could not create padding layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); if (padAxes.allValuesKnown()) { layer->setAxes(shapeTensorToDims(padAxes, "slice axes", -nbDims, nbDims - 1)); @@ -4104,7 +4122,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) ->getOutput(0); break; } - ASSERT_NODE( + ONNXTRT_CHECK_NODE( fillValue, "Could not create layer for constant_value", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); layer->setInput(4, *fillValue); } @@ -4123,7 +4141,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) } else { - return MAKE_ERROR("Unsupported pad mode", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_THROW(MAKE_ERROR("Unsupported pad mode", ErrorCode::kUNSUPPORTED_NODE)); } ctx->registerLayer(layer, node); @@ -4145,24 +4163,24 @@ DEFINE_BUILTIN_OP_IMPORTER(Pow) DEFINE_BUILTIN_OP_IMPORTER(PRelu) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"INT32", "INT64"}, node, nodeIdx)); - CHECK_STATUS(notInvalidType(inputs.at(1), {"INT32", "INT64"}, node, nodeIdx)); - ASSERT_NODE((inputs.size() == 2), + checkNotInvalidType(inputs.at(0), {"INT32", "INT64"}, node, nodeIdx); + checkNotInvalidType(inputs.at(1), {"INT32", "INT64"}, node, nodeIdx); + ONNXTRT_CHECK_NODE((inputs.size() == 2), "The PRelu operator requires exactly 2 inputs. Current input size = " << inputs.size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); nvinfer1::ITensor* input = &convertToTensor(inputs.at(0), ctx); nvinfer1::ITensor* slopes = &convertToTensor(inputs.at(1), ctx); - CHECK_STATUS(broadcastTensors(ctx, input, slopes)); + broadcastTensors(ctx, input, slopes); auto* layer = N_CHECK(ctx->network()->addParametricReLU(*input, *slopes)); ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer, node, nodeIdx); } -NodeImportResult randomHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t nodeIdx, +NodeOutputs randomHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t nodeIdx, ShapeTensor const& inputShape, OnnxAttrs const& attrs, DataType const& inputDType, nvinfer1::FillOperation op) { auto* fillLayer = addFill(ctx, inputShape, op); - ASSERT_NODE(fillLayer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(fillLayer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); ctx->registerLayer(fillLayer, node); bool const isUniform = op == nvinfer1::FillOperation::kRANDOM_UNIFORM; @@ -4177,7 +4195,7 @@ NodeImportResult randomHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto { case ::ONNX_NAMESPACE::TensorProto::FLOAT: fillLayer->setToType(DataType::kFLOAT); break; case ::ONNX_NAMESPACE::TensorProto::FLOAT16: fillLayer->setToType(DataType::kHALF); break; - default: return MAKE_ERROR("Unsupported data type", ErrorCode::kINVALID_VALUE); + default: ONNXTRT_THROW(MAKE_ERROR("Unsupported data type", ErrorCode::kINVALID_VALUE)); } } else @@ -4216,10 +4234,10 @@ DEFINE_BUILTIN_OP_IMPORTER(RandomUniform) DEFINE_BUILTIN_OP_IMPORTER(RandomUniformLike) { - ASSERT_NODE((inputs.size() == 1), + ONNXTRT_CHECK_NODE((inputs.size() == 1), "The RandomUniformLike operator requires exactly 1 input. Current input size = " << inputs.size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((inputs.at(0).is_tensor()), "The input tensor cannot be an initializer.", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(0).is_tensor()), "The input tensor cannot be an initializer.", node, nodeIdx, nvonnxparser::ErrorCode::kUNSUPPORTED_NODE); auto& input = inputs.at(0).tensor(); auto const inputShape = shapeOf(input); @@ -4241,10 +4259,10 @@ DEFINE_BUILTIN_OP_IMPORTER(RandomNormal) DEFINE_BUILTIN_OP_IMPORTER(RandomNormalLike) { - ASSERT_NODE((inputs.size() == 1), + ONNXTRT_CHECK_NODE((inputs.size() == 1), "The RandomNormalLike operator requires exactly 1 input. Current input size = " << inputs.size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((inputs.at(0).is_tensor()), "The input tensor cannot be an initializer.", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(0).is_tensor()), "The input tensor cannot be an initializer.", node, nodeIdx, nvonnxparser::ErrorCode::kUNSUPPORTED_NODE); auto& input = inputs.at(0).tensor(); auto const inputShape = shapeOf(input); @@ -4256,7 +4274,8 @@ DEFINE_BUILTIN_OP_IMPORTER(RandomNormalLike) DEFINE_BUILTIN_OP_IMPORTER(Range) { - ASSERT_NODE((inputs.at(0).getType() == inputs.at(1).getType(), inputs.at(0).getType() == inputs.at(2).getType()), + ONNXTRT_CHECK_NODE( + (inputs.at(0).getType() == inputs.at(1).getType() && inputs.at(0).getType() == inputs.at(2).getType()), "For range operator types for start, limit, and delta must be identical. Type of start = " + inputs.at(0).getType() + ", type of limit = " + inputs.at(1).getType() + ", type of delta = " + inputs.at(2).getType() + ".", @@ -4265,7 +4284,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Range) bool const isInt32 = inputs.at(0).isInt32(); bool const isInt64 = inputs.at(0).isInt64(); bool const isFp32 = inputs.at(0).isFp32(); - ASSERT_NODE((isInt32 || isInt64 || isFp32), + ONNXTRT_CHECK_NODE((isInt32 || isInt64 || isFp32), "This version of TensorRT only supports int32, int64, and float input types for Range!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -4294,11 +4313,6 @@ DEFINE_BUILTIN_OP_IMPORTER(Range) delta = ShapeTensor{*input2}; } - // In reality, although the ONNX spec requires scalars the inputs may be a vector of rank 1. Squeeze here if necessary. - start = start.rank() == 1 ? convertTo0D(ctx, start) : start; - limit = limit.rank() == 1 ? convertTo0D(ctx, limit) : limit; - delta = delta.rank() == 1 ? convertTo0D(ctx, delta) : delta; - // "number_of_elements = max( ceil( (limit - start) / delta ) , 0 )" // // To implement this in TensorRT using only operations allowed on @@ -4316,7 +4330,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Range) numberOfElements = max(ctx, sub(ctx, zero, quotient), zero); nvinfer1::IFillLayer* layer = addFill(ctx, convertTo1D(ctx, numberOfElements), nvinfer1::FillOperation::kLINSPACE); - ASSERT_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); ctx->registerLayer(layer, node); // TensorRT requires that alpha and beta both be dynamic or both be static. @@ -4329,8 +4343,10 @@ DEFINE_BUILTIN_OP_IMPORTER(Range) { // For constant int32 start and delta, we can set to layer params directly. // This might not be required if TRT-20829 is done. - ASSERT_NODE(inputs.at(0).weights().count() == 1, "Start must only be a single value!", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE(inputs.at(2).weights().count() == 1, "Delta must only be a single value!", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(inputs.at(0).weights().count() == 1, "Start must only be a single value!", node, nodeIdx, + ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(inputs.at(2).weights().count() == 1, "Delta must only be a single value!", node, nodeIdx, + ErrorCode::kINVALID_NODE); layer->setAlpha(inputs.at(0).weights().at(0)); layer->setBeta(inputs.at(2).weights().at(0)); } @@ -4364,8 +4380,8 @@ DEFINE_BUILTIN_OP_IMPORTER(ReduceL1) { RETURN_IDENTITY(inputs.at(0), node, nodeIdx); } - std::vector absResult; - GET_VALUE(unaryHelper(ctx, node, nodeIdx, inputs.at(0), nvinfer1::UnaryOperation::kABS), &absResult); + std::vector absResult + = unaryHelper(ctx, node, nodeIdx, inputs.at(0), nvinfer1::UnaryOperation::kABS); return reduceTensor(ctx, node, nodeIdx, absResult.at(0), nvinfer1::ReduceOperation::kSUM, inputs.size() >= 2 ? inputs.at(1) : TensorOrWeights()); @@ -4379,11 +4395,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ReduceLogSum) } auto sum_result = importReduceSum(ctx, node, nodeIdx, inputs); - if (sum_result.is_error()) - { - return sum_result; - } - TensorOrWeights sum_input = sum_result.value().at(0); + TensorOrWeights sum_input = sum_result.at(0); return unaryHelper(ctx, node, nodeIdx, sum_input, nvinfer1::UnaryOperation::kLOG); } DEFINE_BUILTIN_OP_IMPORTER(ReduceLogSumExp) @@ -4393,8 +4405,8 @@ DEFINE_BUILTIN_OP_IMPORTER(ReduceLogSumExp) RETURN_IDENTITY(inputs.at(0), node, nodeIdx); } - std::vector expResult; - GET_VALUE(unaryHelper(ctx, node, nodeIdx, inputs.at(0), nvinfer1::UnaryOperation::kEXP), &expResult); + std::vector expResult + = unaryHelper(ctx, node, nodeIdx, inputs.at(0), nvinfer1::UnaryOperation::kEXP); return importReduceLogSum(ctx, node, nodeIdx, expResult); } @@ -4407,11 +4419,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ReduceL2) } auto sum_sqr_result = importReduceSumSquare(ctx, node, nodeIdx, inputs); - if (sum_sqr_result.is_error()) - { - return sum_sqr_result; - } - TensorOrWeights sum_sqr = sum_sqr_result.value().at(0); + TensorOrWeights sum_sqr = sum_sqr_result.at(0); return unaryHelper(ctx, node, nodeIdx, sum_sqr, nvinfer1::UnaryOperation::kSQRT); } DEFINE_BUILTIN_OP_IMPORTER(ReduceMax) @@ -4470,11 +4478,12 @@ DEFINE_BUILTIN_OP_IMPORTER(Round) DEFINE_BUILTIN_OP_IMPORTER(Resize) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"BOOL", "UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"BOOL", "UINT8"}, node, nodeIdx); nvinfer1::ITensor& input = convertToTensor(inputs.at(0), ctx); int32_t const inputRank = input.getDimensions().nbDims; ShapeTensor const inputDims = shapeOf(input); - ASSERT_NODE((inputRank > 0), "The input tensor cannot be a scalar.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE( + (inputRank > 0), "The input tensor cannot be a scalar.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); // Add resize layer nvinfer1::IResizeLayer* layer = N_CHECK(ctx->network()->addResize(input)); ctx->registerLayer(layer, node); @@ -4498,13 +4507,13 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize) int32_t counter = 0; for (int32_t& axis : resizeAxes) { - CHECK_STATUS(convertAxis(axis, inputRank, node, nodeIdx)); + convertAxis(axis, inputRank, node, nodeIdx); isCompleteIota &= (axis == counter++); } bool const axesInterlacingNeeded = !resizeAxes.empty() && !isCompleteIota; // Note: This check is done after the conversion of axes to be in range [0, inputRank - 1] to make sure there are no // duplicates. - ASSERT_NODE(std::unordered_set(resizeAxes.begin(), resizeAxes.end()).size() == resizeAxes.size(), + ONNXTRT_CHECK_NODE(std::unordered_set(resizeAxes.begin(), resizeAxes.end()).size() == resizeAxes.size(), "The input axes must have unique elements.", node, nodeIdx, ErrorCode::kINVALID_NODE); // set transformation @@ -4556,8 +4565,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize) { // NOTE: Currently `half_pixel_symmetric` and `tf_crop_and_resize` are not supported. `extrapolation_value` // attribute and `roi` input are relevant only for `tf_crop_and_resize` and hence, aren't supported. - ASSERT_NODE(false, "Unsupported coordinate transformation mode " << transformationMode, node, nodeIdx, - ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(false, "Unsupported coordinate transformation mode " << transformationMode, node, + nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } if (transformationMode != "tf_half_pixel_for_nn") @@ -4594,14 +4603,14 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize) { if (inputs.at(3).is_weights()) { - ASSERT_NODE((inputs.at(3).weights().shape.nbDims == 1), + ONNXTRT_CHECK_NODE((inputs.at(3).weights().shape.nbDims == 1), "The sizes input must be 1D. Sizes rank = " << inputs.at(3).weights().shape.nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); std::vector sizesVec; weightsToVector(inputs.at(3).weights(), &sizesVec); if (axesInterlacingNeeded) { - ASSERT_NODE(sizesVec.size() == resizeAxes.size(), + ONNXTRT_CHECK_NODE(sizesVec.size() == resizeAxes.size(), "Length of sizes input must be same as length of axes attribute.", node, nodeIdx, ErrorCode::kINVALID_NODE); std::vector tempVec(inputDims.begin(), inputDims.end()); @@ -4612,7 +4621,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize) } sizesVec = std::move(tempVec); } - ASSERT_NODE((static_cast(sizesVec.size()) == inputRank), + ONNXTRT_CHECK_NODE((static_cast(sizesVec.size()) == inputRank), "The shape of weights must align with input data. Length of sizes = " << sizesVec.size() << ", rank of input = " << inputRank << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -4628,7 +4637,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize) auto* resizeShape = &convertToTensor(inputs.at(3), ctx); if (axesInterlacingNeeded) { - ASSERT_NODE(resizeShape->getDimensions().d[0] == static_cast(resizeAxes.size()), + ONNXTRT_CHECK_NODE(resizeShape->getDimensions().d[0] == static_cast(resizeAxes.size()), "sizes input tensor should be of the same length as axes attribute i.e. " << resizeAxes.size() << ", actual length is: " << resizeShape->getDimensions().d[0], node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -4664,7 +4673,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize) if (scales.is_weights()) { // TRT-15340: Remove this and use else path when safety support nbDims == 1. - ASSERT_NODE((scales.weights().shape.nbDims == 1), + ONNXTRT_CHECK_NODE((scales.weights().shape.nbDims == 1), "The scales input must be 1D. Scales rank = " << scales.weights().shape.nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); int32_t const scaleSize = scales.weights().shape.d[0]; @@ -4673,7 +4682,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize) weightsToVector(scales.weights(), &scalesVec); if (axesInterlacingNeeded) { - ASSERT_NODE(scalesVec.size() == resizeAxes.size(), + ONNXTRT_CHECK_NODE(scalesVec.size() == resizeAxes.size(), "Length of scales input must be same as length of axes attribute.", node, nodeIdx, ErrorCode::kINVALID_NODE); std::vector tempVec(inputRank, 1.0); @@ -4685,7 +4694,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize) // Update scalesVec to hold the ordered information. scalesVec = std::move(tempVec); } - ASSERT_NODE((static_cast(scalesVec.size()) == inputRank), + ONNXTRT_CHECK_NODE((static_cast(scalesVec.size()) == inputRank), "The shape of weights must align with input data. Length of scales = " << scalesVec.size() << ", rank of input = " << inputRank << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -4693,13 +4702,13 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize) // check resize dims if (interpolationMode == nvinfer1::InterpolationMode::kLINEAR) { - ASSERT_NODE(canUseNDResize(scaleSize, scalesVec.data(), 3), + ONNXTRT_CHECK_NODE(canUseNDResize(scaleSize, scalesVec.data(), 3), "This version of TensorRT only supports linear resizing on the outermost 3 dimensions.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } else if (interpolationMode == nvinfer1::InterpolationMode::kCUBIC) { - ASSERT_NODE(canUseNDResize(scaleSize, scalesVec.data(), 2), + ONNXTRT_CHECK_NODE(canUseNDResize(scaleSize, scalesVec.data(), 2), "This version of TensorRT only supports cubic resizing on the outermost 2 dimensions.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } @@ -4709,7 +4718,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize) { // Currently, interlacing of `scales` tensor with `axes` is not supported because interlacing needs a // `ShapeTensor` of scales (float values) and a `ShapeTensor` holding float values isn't supported yet. - ASSERT_NODE(!axesInterlacingNeeded, + ONNXTRT_CHECK_NODE(!axesInterlacingNeeded, "Currently, `axes` attribute is supported with `scales` tensor only when it's trivial i.e. it's an " "iota vector of same length as input rank.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -4776,33 +4785,33 @@ DEFINE_BUILTIN_OP_IMPORTER(Reshape) DEFINE_BUILTIN_OP_IMPORTER(ReverseSequence) { - ASSERT_NODE((inputs.size() == 2), + ONNXTRT_CHECK_NODE((inputs.size() == 2), "ReverseSequence expects two input tensors: input and sequence_lens. Current input size = " << inputs.size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); nvinfer1::ITensor* input = &convertToTensor(inputs.at(0), ctx); nvinfer1::ITensor* sequenceLens = &convertToTensor(inputs.at(1), ctx); auto const inputDims = input->getDimensions(); auto const sequenceLensDims = sequenceLens->getDimensions(); - ASSERT_NODE((inputDims.nbDims >= 2), + ONNXTRT_CHECK_NODE((inputDims.nbDims >= 2), "Rank of input must be at least two. Current rank of inputs = " << inputDims.nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((sequenceLensDims.nbDims == 1), + ONNXTRT_CHECK_NODE((sequenceLensDims.nbDims == 1), "Rank of sequence_lens must be one. Current rank of sequence lens = " << sequenceLensDims.nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); OnnxAttrs attrs{node, ctx}; int32_t const batchAxis = attrs.get("batch_axis", 1); int32_t const sequenceAxis = attrs.get("time_axis", 0); - ASSERT_NODE((batchAxis >= 0 && batchAxis <= inputDims.nbDims), "Invalid batch_axis", node, nodeIdx, + ONNXTRT_CHECK_NODE((batchAxis >= 0 && batchAxis <= inputDims.nbDims), "Invalid batch_axis", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_ATTR); - ASSERT_NODE((sequenceAxis >= 0 && sequenceAxis <= inputDims.nbDims), "Invalid time_axis", node, nodeIdx, + ONNXTRT_CHECK_NODE((sequenceAxis >= 0 && sequenceAxis <= inputDims.nbDims), "Invalid time_axis", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_ATTR); auto layer = N_CHECK(ctx->network()->addReverseSequence(*input, *sequenceLens)); ctx->registerLayer(layer, node); - ASSERT_NODE(layer, "Failed to add ReverseSequence layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Failed to add ReverseSequence layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); layer->setBatchAxis(batchAxis); layer->setSequenceAxis(sequenceAxis); @@ -4854,9 +4863,9 @@ DEFINE_BUILTIN_OP_IMPORTER(RNN) reshapeBias->setZeroIsPlaceholder(false); auto reshapeBiasOutput = N_CHECK(reshapeBias->getOutput(0)); LOG_VERBOSE("Reshaping bias to: " << reshapeBiasOutput->getDimensions()); - auto reduceLayer = N_CHECK(ctx->network() - ->addReduce(*reshapeBiasOutput, nvinfer1::ReduceOperation::kSUM, /*axis=*/0b010, - /*keepDimensions=*/true)); + auto reduceLayer + = N_CHECK(ctx->network()->addReduce(*reshapeBiasOutput, nvinfer1::ReduceOperation::kSUM, /*axis=*/0b010, + /*keepDimensions=*/true)); combinedBias = N_CHECK(reduceLayer->getOutput(0)); LOG_VERBOSE("After reduction, bias shape is: " << combinedBias->getDimensions()); } @@ -4888,7 +4897,7 @@ DEFINE_BUILTIN_OP_IMPORTER(RNN) } return constantOfShape(ctx, N_CHECK(addConstantScalar(ctx, 0.f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, nvinfer1::Dims{1, {1}}) - ->getOutput(0)), + ->getOutput(0)), initialStateShape()); }; @@ -4903,7 +4912,7 @@ DEFINE_BUILTIN_OP_IMPORTER(RNN) // Add X(t) nvinfer1::ITensor* iterationInput = addRNNInput(ctx, node, loop, inputs, direction); - ASSERT_NODE(iterationInput, "Failed to add RNN input.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(iterationInput, "Failed to add RNN input.", node, nodeIdx, ErrorCode::kINVALID_NODE); // H(t-1) nvinfer1::IRecurrenceLayer* hiddenState = loop->addRecurrence(*initialHidden); @@ -4911,15 +4920,13 @@ DEFINE_BUILTIN_OP_IMPORTER(RNN) LOG_VERBOSE("Hidden state shape: " << hiddenState->getOutput(0)->getDimensions()); // Compute intermediate(t) = (X(t) * W^T + H(t-1) * R^T + (Wb + Rb)). - auto xtWTLayer = N_CHECK(ctx->network() - ->addMatrixMultiply(*iterationInput, nvinfer1::MatrixOperation::kNONE, *weights, - nvinfer1::MatrixOperation::kTRANSPOSE)); + auto xtWTLayer = N_CHECK(ctx->network()->addMatrixMultiply( + *iterationInput, nvinfer1::MatrixOperation::kNONE, *weights, nvinfer1::MatrixOperation::kTRANSPOSE)); nvinfer1::ITensor* xtWT = N_CHECK(xtWTLayer->getOutput(0)); LOG_VERBOSE("X(t) * W^T -> " << xtWT->getDimensions()); - auto ht1RTLayer = N_CHECK(ctx->network() - ->addMatrixMultiply(*hiddenState->getOutput(0), nvinfer1::MatrixOperation::kNONE, - *recurrenceWeights, nvinfer1::MatrixOperation::kTRANSPOSE)); + auto ht1RTLayer = N_CHECK(ctx->network()->addMatrixMultiply(*hiddenState->getOutput(0), + nvinfer1::MatrixOperation::kNONE, *recurrenceWeights, nvinfer1::MatrixOperation::kTRANSPOSE)); nvinfer1::ITensor* ht1RT = N_CHECK(ht1RTLayer->getOutput(0)); LOG_VERBOSE("H(t-1) * R^T -> " << ht1RT->getDimensions()); @@ -4937,12 +4944,11 @@ DEFINE_BUILTIN_OP_IMPORTER(RNN) nvinfer1::ITensor* Ht = N_CHECK(hAct->getOutput(0)); // singlePassShape = (1, batchSize, hiddenSize) - nvinfer1::ITensor* singlePassShape - = getElementWiseResult(ctx, *initialStateShape(), - *N_CHECK(addConstant(ctx, std::vector{numDirections, 1, 1}, ::ONNX_NAMESPACE::TensorProto_DataType_INT32, - nvinfer1::Dims{1, {3}}) - ->getOutput(0)), - nvinfer1::ElementWiseOperation::kDIV); + nvinfer1::ITensor* singlePassShape = getElementWiseResult(ctx, *initialStateShape(), + *N_CHECK(addConstant(ctx, std::vector{numDirections, 1, 1}, ::ONNX_NAMESPACE::TensorProto_DataType_INT32, + nvinfer1::Dims{1, + {3}})->getOutput(0)), + nvinfer1::ElementWiseOperation::kDIV); if (inputs.size() > 4 && inputs.at(4)) { @@ -4976,7 +4982,7 @@ DEFINE_BUILTIN_OP_IMPORTER(RoiAlign) // Sanity checking auto roiDims = roisPtr->getDimensions(); - ASSERT_NODE(roiDims.nbDims == 2 && roiDims.d[1] == 4, + ONNXTRT_CHECK_NODE(roiDims.nbDims == 2 && roiDims.d[1] == 4, "Found incorrect dimensions for ROIs input! Rank of ROI input = " << roiDims.nbDims << ", roiDims.d[1] = " << roiDims.d[1] << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -5019,7 +5025,7 @@ DEFINE_BUILTIN_OP_IMPORTER(RoiAlign) auto const plugin = createPlugin(getNodeName(node), static_cast(importPluginCreator(ctx, pluginName, pluginVersion)), f); - ASSERT_NODE(plugin != nullptr, "ROIAlign plugin was not found in the plugin registry!", node, nodeIdx, + ONNXTRT_CHECK_NODE(plugin != nullptr, "ROIAlign plugin was not found in the plugin registry!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor* const inputTensorsPtr[3] = {tensorPtr, roisPtr, batchIndicesPtr}; @@ -5028,7 +5034,8 @@ DEFINE_BUILTIN_OP_IMPORTER(RoiAlign) // ROIAlign requires nvinfer_vc_plugin when using VC. #if defined(_WIN32) - ctx->addUsedVCPluginLibrary(node, pluginName.c_str(), ("nvinfer_vc_plugin_" + std::to_string(NV_TENSORRT_MAJOR)).c_str()); + ctx->addUsedVCPluginLibrary( + node, pluginName.c_str(), ("nvinfer_vc_plugin_" + std::to_string(NV_TENSORRT_MAJOR)).c_str()); #else ctx->addUsedVCPluginLibrary(node, pluginName.c_str(), "nvinfer_vc_plugin"); #endif @@ -5054,8 +5061,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Scan) const int32_t opset8Offset = ctx->getOpsetVersion() == 8 ? 1 : 0; if (opset8Offset == 1) { - ASSERT_NODE(inputs.at(0).isNullTensor(), "TensorRT doesn't support sequence_lens input for this node!", node, - nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(inputs.at(0).isNullTensor(), "TensorRT doesn't support sequence_lens input for this node!", + node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } int32_t const nbInputs = node.input().size() - opset8Offset; int32_t const nbScanInputs = attrs.get("num_scan_inputs"); @@ -5088,12 +5095,12 @@ DEFINE_BUILTIN_OP_IMPORTER(Scan) // Support possible negative axis for input and output axes: for (auto& axis : scanInputAxes) { - CHECK_STATUS(convertAxis(axis, nvinfer1::Dims::MAX_DIMS, node, nodeIdx)); + convertAxis(axis, nvinfer1::Dims::MAX_DIMS, node, nodeIdx); } for (auto& axis : scanOutputAxes) { - CHECK_STATUS(convertAxis(axis, nvinfer1::Dims::MAX_DIMS, node, nodeIdx)); + convertAxis(axis, nvinfer1::Dims::MAX_DIMS, node, nodeIdx); } auto loop = N_CHECK(ctx->network()->addLoop()); @@ -5117,7 +5124,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Scan) for (int32_t i = 0; i < nbScanInputs; ++i) { const int32_t index = nbStateVars + i; // Scan Inputs are after the state variables. - nvinfer1::IIteratorLayer* scanInput = N_CHECK(loop->addIterator(convertToTensor(inputs.at(index + opset8Offset), ctx))); + nvinfer1::IIteratorLayer* scanInput + = N_CHECK(loop->addIterator(convertToTensor(inputs.at(index + opset8Offset), ctx))); scanInput->setAxis(scanInputAxes.at(i)); scanInput->setReverse(scanInputDirections.at(i) == 1); ctx->registerTensor(TensorOrWeights{N_CHECK(scanInput->getOutput(0))}, body.input(index).name()); @@ -5125,7 +5133,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Scan) // Loop Body. This is handled by dispatching to other op converters. std::vector errors{}; - CHECK_STATUS(onnx2trt::parseGraph(ctx, body, errors)); + onnx2trt::parseGraph(ctx, body, errors); // Set up recurrence outputs (first N body graph outputs). std::vector nodeOutputs{}; @@ -5166,18 +5174,20 @@ DEFINE_BUILTIN_OP_IMPORTER(Scan) DEFINE_BUILTIN_OP_IMPORTER(GridSample) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"BOOL", "UINT8"}, node, nodeIdx)); - CHECK_STATUS(notInvalidType(inputs.at(1), {"BOOL", "UINT8"}, node, nodeIdx)); - ASSERT_NODE( + checkNotInvalidType(inputs.at(0), {"BOOL", "UINT8"}, node, nodeIdx); + checkNotInvalidType(inputs.at(1), {"BOOL", "UINT8"}, node, nodeIdx); + ONNXTRT_CHECK_NODE( (inputs.size() == 2), "TRT expects two input tensors: grid and input", node, nodeIdx, ErrorCode::kINVALID_NODE); nvinfer1::ITensor& input = convertToTensor(inputs.at(0), ctx); int32_t const inputRank = input.getDimensions().nbDims; - ASSERT_NODE((inputRank > 0), "The input tensor cannot be a scalar.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE( + (inputRank > 0), "The input tensor cannot be a scalar.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor& grid = convertToTensor(inputs.at(1), ctx); int32_t const gridRank = grid.getDimensions().nbDims; - ASSERT_NODE((gridRank > 0), "The grid tensor cannot be a scalar.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - ASSERT_NODE((gridRank == inputRank), + ONNXTRT_CHECK_NODE( + (gridRank > 0), "The grid tensor cannot be a scalar.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE((gridRank == inputRank), "The input tensor and the grid tensor must have the same rank. Rank of grid tensor = " << gridRank << ", rank of input = " << inputRank << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -5219,7 +5229,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GridSample) bool const alignCorners{attrs.get("align_corners", 0) == 1}; - ASSERT_NODE( + ONNXTRT_CHECK_NODE( layer->setSampleMode(sampleMode), "Failed to set sample mode!", node, nodeIdx, ErrorCode::kINVALID_VALUE); layer->setAlignCorners(alignCorners); layer->setInterpolationMode(interpolationMode); @@ -5230,12 +5240,12 @@ DEFINE_BUILTIN_OP_IMPORTER(GridSample) DEFINE_BUILTIN_OP_IMPORTER(ScatterND) { OnnxAttrs attrs(node, ctx); - ASSERT_NODE(!attrs.count("reduction"), "Attribute reduction is not supported.", node, nodeIdx, + ONNXTRT_CHECK_NODE(!attrs.count("reduction"), "Attribute reduction is not supported.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_ATTR); return addScatterLayer(ctx, node, nodeIdx, inputs, nvinfer1::ScatterMode::kND); } -NodeImportResult scatterPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t nodeIdx, +NodeOutputs scatterPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t nodeIdx, std::vector& inputs, int32_t axis, std::string const& reduction) { // Populate scatter plugin properties. @@ -5252,8 +5262,8 @@ NodeImportResult scatterPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::Nod auto const plugin = createPlugin(getNodeName(node), static_cast(importPluginCreator(ctx, pluginName, pluginVersion)), f); - ASSERT_NODE(plugin != nullptr, "ScatterReduction plugin was not found in the plugin registry!", node, nodeIdx, - ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(plugin != nullptr, "ScatterReduction plugin was not found in the plugin registry!", node, + nodeIdx, ErrorCode::kUNSUPPORTED_NODE); // Create vector of inputs std::vector pluginInputs{}; @@ -5272,7 +5282,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ScatterElements) OnnxAttrs attrs(node, ctx); int32_t axis = attrs.get("axis", 0); int32_t nbDims = inputs.at(0).shape().nbDims; - CHECK_STATUS(convertAxis(axis, nbDims, node, nodeIdx)); + convertAxis(axis, nbDims, node, nodeIdx); auto reduction = attrs.get("reduction", "none"); if (reduction != "none") @@ -5366,7 +5376,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Size) DEFINE_BUILTIN_OP_IMPORTER(Slice) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); int32_t const nbInputs = node.input().size(); // "...it uses this information to slice the input data tensor." nvinfer1::ITensor& data = convertToTensor(inputs.at(0), ctx); @@ -5400,11 +5410,11 @@ DEFINE_BUILTIN_OP_IMPORTER(Slice) ends = ShapeTensor{*input2}; // "If axes are omitted, they are set to [0, ..., ndim-1]." axes = nbInputs > 3 ? ShapeTensor(ctx, inputs.at(3)) : iotaShapeVector(dims.size()); - ASSERT_NODE((starts.size() == axes.size()), + ONNXTRT_CHECK_NODE((starts.size() == axes.size()), "The shape of input starts misaligns with the shape of input axes. Shape of input starts = " << starts.size() << ", shape of input axes = " << axes.size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE(ends.size() == axes.size(), + ONNXTRT_CHECK_NODE(ends.size() == axes.size(), "The shape of input ends misaligns with the shape of input axes. Shape of input ends = " << ends.size() << ", sahpe of input axes = " << axes.size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -5425,7 +5435,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Slice) if (axes.allValuesKnown()) { // gather() requires indices to be normalized if their values are known - CHECK_STATUS(normalizeAxes(axes, dims.size())); + normalizeAxes(axes, dims.size()); } // Get dimensions of dims that correspond to axes for the computation of sizes auto const axesDims = gather(ctx, dims, axes); @@ -5461,10 +5471,10 @@ DEFINE_BUILTIN_OP_IMPORTER(Softmax) { auto& input = convertToTensor(inputs.at(0), ctx); auto* softmax = addSoftmax(ctx, node, nodeIdx, input); - ASSERT_NODE(softmax, "Failed to create softmax layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(softmax, "Failed to create softmax layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); // Reshape back to original shape auto* reshapeLayer = addShuffle(ctx, *softmax, shapeOf(input)); - ASSERT_NODE(reshapeLayer, "Failed to create reshape layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(reshapeLayer, "Failed to create reshape layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); RETURN_FIRST_OUTPUT(reshapeLayer, node, nodeIdx); } @@ -5480,10 +5490,10 @@ DEFINE_BUILTIN_OP_IMPORTER(Softplus) DEFINE_BUILTIN_OP_IMPORTER(SpaceToDepth) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"BOOL", "UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"BOOL", "UINT8"}, node, nodeIdx); // Input tensor is in NCHW format - ASSERT_NODE((inputs.at(0).shape().nbDims == 4), "The input tensor must be in the NCHW format.", node, nodeIdx, - ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE((inputs.at(0).shape().nbDims == 4), "The input tensor must be in the NCHW format.", node, + nodeIdx, ErrorCode::kINVALID_NODE); nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); // Extract attributes @@ -5524,7 +5534,7 @@ DEFINE_BUILTIN_OP_IMPORTER(SpaceToDepth) DEFINE_BUILTIN_OP_IMPORTER(Split) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); size_t const numOutputs = node.output().size(); // "input : T @@ -5539,7 +5549,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Split) // "A negative value means counting dimensions from the back. // Accepted range is [-rank, rank-1] where r = rank(input)." - CHECK_STATUS(convertAxis(axis, inputDims.size(), node, nodeIdx)); + convertAxis(axis, inputDims.size(), node, nodeIdx); std::vector tmp(inputDims.size()); std::iota(tmp.begin(), tmp.end(), 0); @@ -5576,9 +5586,11 @@ DEFINE_BUILTIN_OP_IMPORTER(Split) } // In opset >= 18, a new attribute num_outputs has been added. // "Either input 'split' or the attribute 'num_outputs' should be specified, but not both." - if (ctx->getOpsetVersion() >= 18) { - ASSERT_NODE(!attrs.count("num_outputs"), - "Either 'split' should be provided as an input or 'num_outputs' should be provided as an attribute. But not both.", + if (ctx->getOpsetVersion() >= 18) + { + ONNXTRT_CHECK_NODE(!attrs.count("num_outputs"), + "Either 'split' should be provided as an input or 'num_outputs' should be provided as an " + "attribute. But not both.", node, nodeIdx, ErrorCode::kINVALID_NODE); } } @@ -5587,7 +5599,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Split) { splitList = attrs.get>("split"); } - ASSERT_NODE((splitList.empty() || (splitList.size() == numOutputs)), + ONNXTRT_CHECK_NODE((splitList.empty() || (splitList.size() == numOutputs)), "The number of the split attribute misaligns with the number of outputs. Number of split attributes = " << splitList.size() << ", number of outputs = " << numOutputs << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -5595,9 +5607,11 @@ DEFINE_BUILTIN_OP_IMPORTER(Split) else { // In opset >= 18, a new attribute 'num_outputs' has been added. - if (ctx->getOpsetVersion() >= 18 && attrs.count("num_outputs")) { - ASSERT_NODE(attrs.get("num_outputs") == static_cast(numOutputs), - "The number of node outputs is not the same as the value of 'num_outputs' attribute. num_outputs attribute value = " + if (ctx->getOpsetVersion() >= 18 && attrs.count("num_outputs")) + { + ONNXTRT_CHECK_NODE(attrs.get("num_outputs") == static_cast(numOutputs), + "The number of node outputs is not the same as the value of 'num_outputs' attribute. num_outputs " + "attribute value = " << attrs.get("num_outputs") << ", number of node outputs = " << numOutputs << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); } @@ -5608,7 +5622,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Split) // Check for invalid size. if (sizeSliceAxisLastOne.allValuesKnown()) { - ASSERT_NODE(sizeSliceAxisLastOne[0] >= 0, + ONNXTRT_CHECK_NODE(sizeSliceAxisLastOne[0] >= 0, "The last chunk size is negative, see details in https://github.com/onnx/onnx/issues/5766", node, nodeIdx, ErrorCode::kINVALID_NODE); } @@ -5679,7 +5693,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Squeeze) for (size_t i = 0; i < axesWeights.count(); i++) { int64_t axesValue = axesValues[i]; - ASSERT_NODE(axesValue >= std::numeric_limits::min() + ONNXTRT_CHECK_NODE(axesValue >= std::numeric_limits::min() && axesValue <= std::numeric_limits::max(), "Axes value truncated.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_DATATYPE); axes.push_back(axesValues[i]); @@ -5700,7 +5714,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Squeeze) if (axes.size() == 0) { auto const shape = data.getDimensions(); - ASSERT_NODE(!isDynamic(shape), + ONNXTRT_CHECK_NODE(!isDynamic(shape), "Cannot infer squeeze dimensions from a dynamic shape! Please re-export your model with the Squeeze axes " "input set.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_DYNAMIC); @@ -5716,18 +5730,178 @@ DEFINE_BUILTIN_OP_IMPORTER(Squeeze) int32_t rank = data.getDimensions().nbDims; for (auto& axis : axes) { - CHECK_STATUS(convertAxis(axis, rank, node, nodeIdx)); + convertAxis(axis, rank, node, nodeIdx); } // "squeezed : T // Reshaped tensor with same data as input." auto* squeezed = squeezeTensor(ctx, node, data, axes, true); - ASSERT_NODE(squeezed, "Failed to squeeze tensor!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(squeezed, "Failed to squeeze tensor!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); return {{squeezed}}; } +DEFINE_BUILTIN_OP_IMPORTER(STFT) +{ + /* + STFT is implemented via decomposion into 1D convs. + + This assumes that the input signal contains non-complex values, and that all non-signal inputs are initializers. + + STFT Input (Batch, SignalLength, 1) or (Batch, SignalLength) + | + | + Reshape to (Batch, 1, 1, SignalLength) + | + ____________|____________ + | | + | | + | | + Conv 1D (Real) Conv 1D (Imaginary) with weights (dftUniqueOutputs, 1, 1, FrameLength) + | | output shape is (Batch, dftUniqueOutputs, 1, Frames) + | | + |_______________________| + | + | + Concat on axis 2 - (Batch, dftUniqueOutputs, 2, Frames) + | + | + Transpose to ONNX output shape (Batch, Frames, dftUniqueOutputs, 2) + | + | + Output + */ + + OnnxAttrs attrs(node, ctx); + int64_t onesided = attrs.get("onesided", 1); + auto* input = &convertToTensor(inputs.at(0), ctx); + auto dims = input->getDimensions(); + // Signal must composed of real-valued inputs only - if rank == 2 or rank == 3 && dims.d[2] == 1 + ONNXTRT_CHECK_NODE(dims.nbDims == 2 || (dims.nbDims == 3 && dims.d[2] == 1), + "TensorRT only supports STFT on real-valued signals!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + + // Squeeze 3D shape of (Batch, SignalLength, 1) down to 2D shape of (Batch, SignalLength) to unify future unsqueeze + // logic. + if (dims.nbDims == 3) + { + std::vector const axes{2}; + input = squeezeTensor(ctx, node, *input, axes); + } + + // Float only support. + ONNXTRT_CHECK_NODE(input->getType() == nvinfer1::DataType::kFLOAT, + "Input to STFT must be Float32. Received type: " << input->getType(), node, nodeIdx, + ErrorCode::kUNSUPPORTED_NODE); + + int64_t frameStep{0}; + ShapedWeights windowWeights = ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + int64_t frameLength{0}; + + // Frame step - must be constant as this corresponds to the strides of the convolution. + ONNXTRT_CHECK_NODE( + inputs.at(1).is_weights(), "FrameStep must be an initializer!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + frameStep = static_cast(inputs.at(1).weights().values)[0]; + + // Window - optional. + if (inputs.size() >= 3 && !inputs.at(2).isNullTensor()) + { + ONNXTRT_CHECK_NODE(inputs.at(2).is_weights(), "windowWeights must be an initializer!", node, nodeIdx, + ErrorCode::kUNSUPPORTED_NODE); + windowWeights = inputs.at(2).weights(); + } + // Frame length - scalar value (optional) + if (inputs.size() >= 4 && !inputs.at(3).isNullTensor()) + { + ONNXTRT_CHECK_NODE(inputs.at(3).is_weights(), "Frame length must be an initializer!", node, nodeIdx, + ErrorCode::kUNSUPPORTED_NODE); + frameLength = static_cast(inputs.at(3).weights().values)[0]; + } + // If both windowWeights and frameLength are not provided, we cannot infer the size for the Window + if (frameLength == 0 && windowWeights.values == nullptr) + { + ONNXTRT_CHECK_NODE(false, "Both frame_length and window inputs are missing for STFT!", node, nodeIdx, + ErrorCode::kUNSUPPORTED_NODE); + } + + // Generate missing values if necessary. + if (frameLength == 0 && windowWeights.values != nullptr) + { + frameLength = windowWeights.shape.d[0]; + } + if (frameLength != 0 && windowWeights.values == nullptr) + { + windowWeights = ctx->createNamedTempWeights(::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, {1, {frameLength}}); + for (int64_t c = 0; c < frameLength; c++) + { + static_cast(windowWeights.values)[c] = 1.F; + } + } + + // Calculate dftUniqueBins depending on the onesided attribute. + int64_t dftUniqueBins = onesided == 1 ? ((frameLength >> 1) + 1) : frameLength; + + /* + Generate the weights for the convolutions, of shape (dftUniqueBins, 1, 1, frameLength). + + We need to generate weights of values window[k] * e^(-2 * pi * j * w * k / n), where: + j is the imaginary number sqrt(-1) + w is the frequency for 0 <= w < dftUniqueBins + k is the index in the window, for 0 <= k < frameLength + n is equal to frameLength. + + The real and imaginary components are generated separately using Euler's formula. + + for each w in dftUniqueBins: + for each k in frameLength: + realWeights[w, 1, 1, k] = cos(-2 * pi * k * w / n) * window[k] + imagWeights[w, 1, 1, k] = sin(-2 * pi * k * w / n) * window[k] + */ + + auto realWeights = ctx->createNamedTempWeights( + ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, {4, {dftUniqueBins, 1, 1, frameLength}}); + auto imaginaryWeights = ctx->createNamedTempWeights( + ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, {4, {dftUniqueBins, 1, 1, frameLength}}); + + for (int64_t w = 0; w < static_cast(dftUniqueBins); w++) + { + for (int64_t k = 0; k < static_cast(frameLength); k++) + { + int64_t weightIndex = w * frameLength + k; + auto angle = -2.F * M_PI * w * k / frameLength; + static_cast(realWeights.values)[weightIndex] + = static_cast(cos(angle)) * static_cast(windowWeights.values)[k]; + static_cast(imaginaryWeights.values)[weightIndex] + = static_cast(sin(angle)) * static_cast(windowWeights.values)[k]; + } + } + + // Unsqueeze input to [batch, 1, 1, numFrames] + auto signalReshaped = unsqueezeTensor(ctx, node, *input, {1, 2}); + + // 1D Convolution to calculate the real part of the signal. + auto convReal = N_CHECK(ctx->network()->addConvolutionNd(*signalReshaped, dftUniqueBins, {2, {1, frameLength}}, realWeights, {})); + convReal->setStrideNd(nvinfer1::Dims{2, {1, frameStep}}); + auto* convRealOutput = N_CHECK(convReal->getOutput(0)); + + // 1D Convolution to caclulate the imaginary part of the signal. + auto convImag = ctx->network()->addConvolutionNd(*signalReshaped, dftUniqueBins, {2, {1, frameLength}}, imaginaryWeights, {}); + convImag->setStrideNd(nvinfer1::Dims{2, {1, frameStep}}); + auto* convImagOutput = N_CHECK(convImag->getOutput(0)); + + // Concat outputs together on axis 2, convolution outputs have shape: (Batch, dftUniqueOutputs, 1, Frames) + std::vector concatInputs{convRealOutput, convImagOutput}; + auto concatLayer = N_CHECK(ctx->network()->addConcatenation(concatInputs.data(), concatInputs.size())); + concatLayer->setAxis(2); + auto* concatOutput = N_CHECK(concatLayer->getOutput(0)); + + // Transpose to ONNX expected output shape - (Batch, dftUniqueOutputs, 2, Frames) -> (Batch, Frames, + // dftUniqueOutputs, 2) + auto transpose = N_CHECK(ctx->network()->addShuffle(*concatOutput)); + transpose->setFirstTranspose(nvinfer1::Permutation{{0, 3, 1, 2}}); + RETURN_FIRST_OUTPUT(transpose, node, nodeIdx); +} + DEFINE_BUILTIN_OP_IMPORTER(Sub) { return elementwiseHelper(ctx, node, nodeIdx, inputs, nvinfer1::ElementWiseOperation::kSUB); @@ -5757,7 +5931,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ThresholdedRelu) DEFINE_BUILTIN_OP_IMPORTER(Tile) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); // "input : T // Input tensor of any shape." nvinfer1::ITensor& input = convertToTensor(inputs.at(0), ctx); @@ -5779,7 +5953,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Tile) DEFINE_BUILTIN_OP_IMPORTER(TopK) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); OnnxAttrs attrs(node, ctx); int32_t axis = attrs.get("axis", -1); @@ -5789,7 +5963,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TopK) k = attrs.get("k"); } int32_t nbDims = tensorPtr->getDimensions().nbDims; - CHECK_STATUS(convertAxis(axis, nbDims, node, nodeIdx)); + convertAxis(axis, nbDims, node, nodeIdx); uint32_t axisMask = 1 << axis; bool needToExpandDims = (nbDims == 1); @@ -5798,7 +5972,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TopK) // Expand spatial dims from 1D to 2D std::vector axes{1}; tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); - ASSERT_NODE(tensorPtr, "Failed to unsqueeze input x.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(tensorPtr, "Failed to unsqueeze input x.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } // Default is top max k. @@ -5812,10 +5986,10 @@ DEFINE_BUILTIN_OP_IMPORTER(TopK) } } nvinfer1::ITopKLayer* layer = N_CHECK(ctx->network()->addTopK(*tensorPtr, operation, k, axisMask)); - ASSERT_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); if (ctx->getOpsetVersion() >= 10) { - ASSERT_NODE((inputs.size() == 2), + ONNXTRT_CHECK_NODE((inputs.size() == 2), "Expects two input tensors for opset >= 10: X and K. Current input size = " << inputs.size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); nvinfer1::ITensor* kPtr = &convertToTensor(inputs.at(1), ctx); @@ -5823,7 +5997,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TopK) layer->setInput(1, *kPtr); } ctx->registerLayer(layer, node); - ASSERT_NODE(layer, "Failed to add TopK layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Failed to add TopK layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor* values = N_CHECK(layer->getOutput(0)); nvinfer1::ITensor* indices = N_CHECK(layer->getOutput(1)); @@ -5833,9 +6007,10 @@ DEFINE_BUILTIN_OP_IMPORTER(TopK) // Un-expand spatial dims back to 1D std::vector axes{1}; values = squeezeTensor(ctx, node, *values, axes); - ASSERT_NODE(values, "Failed to squeeze the input values.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(values, "Failed to squeeze the input values.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); indices = squeezeTensor(ctx, node, *indices, axes); - ASSERT_NODE(indices, "Failed to squeeze the input indices.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE( + indices, "Failed to squeeze the input indices.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } // TensorRT doesn't support int64 for TopK indices @@ -5848,7 +6023,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Transpose) TensorOrWeights input = inputs.at(0); OnnxAttrs attrs(node, ctx); int32_t ndim = input.shape().nbDims; - ASSERT_NODE((ndim <= nvinfer1::Dims::MAX_DIMS), + ONNXTRT_CHECK_NODE((ndim <= nvinfer1::Dims::MAX_DIMS), "The rank of the input tensor exceeds the maximum supported by this version of TensorRT. Current rank of " "inputs = " << ndim << ", max supported rank = " << nvinfer1::Dims::MAX_DIMS << ".", @@ -5861,22 +6036,22 @@ DEFINE_BUILTIN_OP_IMPORTER(Transpose) nvinfer1::Permutation perm = attrs.get("perm", default_perm); for (int32_t i = 0; i < ndim; ++i) { - CHECK_STATUS(convertAxis(perm.order[i], ndim, node, nodeIdx)); + convertAxis(perm.order[i], ndim, node, nodeIdx); } nvinfer1::ITensor& itensor = input.is_tensor() ? input.tensor() : convertToTensor(input, ctx); nvinfer1::ITensor* output_tensor = transposeTensor(ctx, node, itensor, perm); - ASSERT_NODE(output_tensor, "Failed to transpose the input.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(output_tensor, "Failed to transpose the input.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); return {{output_tensor}}; } DEFINE_BUILTIN_OP_IMPORTER(Trilu) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"UINT8"}, node, nodeIdx); // Data Tensor using eOp = nvinfer1::ElementWiseOperation; auto* data = &convertToTensor(inputs.at(0), ctx); auto const nbDims = data->getDimensions().nbDims; - ASSERT_NODE((nbDims >= 2), + ONNXTRT_CHECK_NODE((nbDims >= 2), "Trilu input must have at least 2 dimensions! Current number of dimensions = " << nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); OnnxAttrs attrs(node, ctx); @@ -5903,8 +6078,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Trilu) if (inputs.size() == 2) { auto* k = &convertToTensor(inputs.at(1), ctx); - std::vector shiftResult; - GET_VALUE(elementwiseHelper(ctx, node, nodeIdx, {cols, k}, eOp::kSUB), &shiftResult); + std::vector shiftResult = elementwiseHelper(ctx, node, nodeIdx, {cols, k}, eOp::kSUB); cols = &convertToTensor(shiftResult.at(0), ctx); } @@ -5919,8 +6093,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Trilu) // For lower Trilus, use greaterOrEquals. For upper Trilus, use lessOrEquals bool const greater = upper == 0 ? true : false; - std::vector greaterOrEqualResult; - GET_VALUE(greaterLessOrEqual(ctx, node, nodeIdx, rows, cols, greater), &greaterOrEqualResult); + std::vector greaterOrEqualResult = greaterLessOrEqual(ctx, node, nodeIdx, rows, cols, greater); auto* condition = &convertToTensor(greaterOrEqualResult.at(0), ctx); auto* result = N_CHECK(ctx->network()->addSelect(*condition, *data, *zero)); @@ -5942,8 +6115,9 @@ DEFINE_BUILTIN_OP_IMPORTER(Unsqueeze) if (inputs.size() == 2) { ShapeTensor const axesInput{ctx, inputs.at(1)}; - ASSERT_NODE(axesInput.allValuesKnown(), "Axes input for unsqueeze operation should be a constant tensor.", - node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(axesInput.allValuesKnown(), + "Axes input for unsqueeze operation should be a constant tensor.", node, nodeIdx, + ErrorCode::kUNSUPPORTED_NODE); for (auto& a : axesInput) { axes.push_back(a); @@ -5962,27 +6136,27 @@ DEFINE_BUILTIN_OP_IMPORTER(Unsqueeze) int32_t const newSize = data.getDimensions().nbDims + axes.size(); for (auto& axis : axes) { - CHECK_STATUS(convertAxis(axis, newSize, node, nodeIdx)); + convertAxis(axis, newSize, node, nodeIdx); } // "expanded : T // Reshaped tensor with same data as input." auto* expanded = unsqueezeTensor(ctx, node, data, axes, true); - ASSERT_NODE(expanded, "Failed to unsqueeze tensor!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(expanded, "Failed to unsqueeze tensor!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); return {{expanded}}; } DEFINE_BUILTIN_OP_IMPORTER(Upsample) { - CHECK_STATUS(notInvalidType(inputs.at(0), {"BOOL", "UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(0), {"BOOL", "UINT8"}, node, nodeIdx); nvinfer1::ITensor& tensor = convertToTensor(inputs.at(0), ctx); int32_t const nbDims = tensor.getDimensions().nbDims; - ASSERT_NODE((nbDims > 0), "The input tensor cannot be a scalar.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE((nbDims > 0), "The input tensor cannot be a scalar.", node, nodeIdx, ErrorCode::kINVALID_NODE); OnnxAttrs attrs(node, ctx); nvinfer1::IResizeLayer* const layer = N_CHECK(ctx->network()->addResize(tensor)); - ASSERT_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); auto mode = attrs.get("mode", "nearest"); // Set default resize mode. Nearest resize support N-D (where 0 < N <= 8) resize. @@ -5993,7 +6167,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Upsample) if (ctx->getOpsetVersion() >= 9) { // Get scale factors from inputs[1] - ASSERT_NODE((inputs.size() == 2), + ONNXTRT_CHECK_NODE((inputs.size() == 2), "Operator Upsample requires exactly 2 inputs. Current input size = " << inputs.size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); auto scales_input = inputs.at(1); @@ -6001,15 +6175,15 @@ DEFINE_BUILTIN_OP_IMPORTER(Upsample) { // TRT-15340: Remove this and use else path when safety support nbDims == 1. ShapedWeights scales_weights = scales_input.weights(); - ASSERT_NODE((scales_weights.shape.nbDims == 1), + ONNXTRT_CHECK_NODE((scales_weights.shape.nbDims == 1), "The scales input must be 1D. Current rank of scales input = " << scales_weights.shape.nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); // Scale factors has batch dimension. - ASSERT_NODE((scales_weights.count() == static_cast(nbDims)), + ONNXTRT_CHECK_NODE((scales_weights.count() == static_cast(nbDims)), "The shape of the scales input must align with the dimensions of the input. Shape of scales input = " << scales_weights.count() << ", dimension of input = " << nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((scales_weights.type == ::ONNX_NAMESPACE::TensorProto::FLOAT), + ONNXTRT_CHECK_NODE((scales_weights.type == ::ONNX_NAMESPACE::TensorProto::FLOAT), "This version of TensorRT only supports FLOAT scales input. Current scales weight type = " << scales_weights.type << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -6021,7 +6195,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Upsample) } if (mode == "linear" || mode == "bilinear") { - ASSERT_NODE(canUseNDResize(scale_factors.size(), &scale_factors.front(), 3), + ONNXTRT_CHECK_NODE(canUseNDResize(scale_factors.size(), &scale_factors.front(), 3), "This version of TensorRT only supports linear resizing on the outermost 3 dimensions", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } @@ -6031,16 +6205,16 @@ DEFINE_BUILTIN_OP_IMPORTER(Upsample) { nvinfer1::ITensor* resizeShape = resizeShapeTensor(ctx, tensor, scales_input); nvinfer1::Dims const outDims = resizeShape->getDimensions(); - ASSERT_NODE((outDims.nbDims == 1), + ONNXTRT_CHECK_NODE((outDims.nbDims == 1), "The scales input must be 1D. Current rank of the scales input = " << outDims.nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); // Scale factors has batch dimension. - ASSERT_NODE((outDims.d[0] == nbDims), + ONNXTRT_CHECK_NODE((outDims.d[0] == nbDims), "The shape of the scales input must align with the dimensions of the input. Current shape of the " "scales input = " << outDims.nbDims << ", dimension of the input = " << nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((resizeShape->getType() == DataType::kINT32), + ONNXTRT_CHECK_NODE((resizeShape->getType() == DataType::kINT32), "Resize output shape type must be integral. The actual type is " + getTrtDtypeName(resizeShape->getType()) + ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -6050,12 +6224,12 @@ DEFINE_BUILTIN_OP_IMPORTER(Upsample) else { // TRT-15340: Adapt to use resizeShapeTensor instead when safety support nbDims == 1. - ASSERT_NODE( + ONNXTRT_CHECK_NODE( attrs.count("scales"), "Attribute scales is missing.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_ATTR); // Get scale factors from OnnxAttrs. auto scales = attrs.get>("scales"); // Scale factors has batch dimension. - ASSERT_NODE((static_cast(scales.size()) == nbDims), + ONNXTRT_CHECK_NODE((static_cast(scales.size()) == nbDims), "The shape of the scales input must align with the dimensions of the input. Current shape of the scales " "input = " << scales.size() << ", dimension of the input = " << nbDims << ".", @@ -6067,7 +6241,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Upsample) } if (mode == "linear" || mode == "bilinear") { - ASSERT_NODE(canUseNDResize(scale_factors.size(), &scale_factors.front(), 3), + ONNXTRT_CHECK_NODE(canUseNDResize(scale_factors.size(), &scale_factors.front(), 3), "This version of TensorRT only supports linear resizing on the outermost 3 dimensions", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } @@ -6083,24 +6257,24 @@ DEFINE_BUILTIN_OP_IMPORTER(Upsample) DEFINE_BUILTIN_OP_IMPORTER(Where) { - CHECK_STATUS(notInvalidType(inputs.at(1), {"UINT8"}, node, nodeIdx)); - CHECK_STATUS(notInvalidType(inputs.at(2), {"UINT8"}, node, nodeIdx)); + checkNotInvalidType(inputs.at(1), {"UINT8"}, node, nodeIdx); + checkNotInvalidType(inputs.at(2), {"UINT8"}, node, nodeIdx); nvinfer1::ITensor* condition = &convertToTensor(inputs.at(0), ctx); nvinfer1::ITensor* x = &convertToTensor(inputs.at(1), ctx); nvinfer1::ITensor* y = &convertToTensor(inputs.at(2), ctx); - CHECK_STATUS(broadcastTensors(ctx, x, y, condition)); + broadcastTensors(ctx, x, y, condition); nvinfer1::Dims cDims = condition->getDimensions(); nvinfer1::Dims xDims = x->getDimensions(); nvinfer1::Dims yDims = y->getDimensions(); - ASSERT_NODE((cDims.nbDims == xDims.nbDims), + ONNXTRT_CHECK_NODE((cDims.nbDims == xDims.nbDims), "The rank of the condition input tensor must be the same of the input x tensor. Rank of the condition input " "tensor = " << cDims.nbDims << ", rank of the input x tensor = " << xDims.nbDims << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((cDims.nbDims == yDims.nbDims), + ONNXTRT_CHECK_NODE((cDims.nbDims == yDims.nbDims), "The rank of the condition input tensor must be the same of the input y tensor. Rank of the condition input " "tensor = " << cDims.nbDims << ", rank of the input y tensor = " << yDims.nbDims << ".", @@ -6287,11 +6461,10 @@ nvinfer1::IPluginV3Layer* addPluginLayer(ImporterContext* ctx, std::vector -NodeImportResult addPluginWithCreator(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, - size_t const& nodeIdx, std::vector& inputs, OnnxAttrs const& attrs, - nvinfer1::IPluginCreatorInterface* creator) +NodeOutputs addPluginWithCreator(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const& nodeIdx, + std::vector& inputs, OnnxAttrs const& attrs, nvinfer1::IPluginCreatorInterface* creator) { - ASSERT_NODE(creator, "Invalid plugin creator.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(creator, "Invalid plugin creator.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); nvinfer1::PluginFieldCollection const* fieldNames = static_cast(creator)->getFieldNames(); @@ -6305,7 +6478,8 @@ NodeImportResult addPluginWithCreator(ImporterContext* ctx, ::ONNX_NAMESPACE::No { if (std::strcmp(creator->getInterfaceInfo().kind, "PLUGIN CREATOR_V1") != 0) { - ASSERT_NODE(attrs.type("tensorrt_plugin_shape_input_indices") == ::ONNX_NAMESPACE::AttributeProto::INTS, + ONNXTRT_CHECK_NODE( + attrs.type("tensorrt_plugin_shape_input_indices") == ::ONNX_NAMESPACE::AttributeProto::INTS, "Shape input indices defined, but attribute tensorrt_plugin_shape_input_indices has unsupported type. " "Only AttributeProto::INTS is supported.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -6314,8 +6488,8 @@ NodeImportResult addPluginWithCreator(ImporterContext* ctx, ::ONNX_NAMESPACE::No for (auto const& curr : shapeInputIdxsVec) { // check for out-of-range indices: index must be in [-n, n-1] where n = number of inputs. - ASSERT_NODE((curr < nbInputs) && (curr >= (-nbInputs)), "Out-of-range shape input index: " << curr, - node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE((curr < nbInputs) && (curr >= (-nbInputs)), + "Out-of-range shape input index: " << curr, node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); auto const& e = (curr >= 0) ? curr : nbInputs + curr; auto res = shapeInputIdxsSet.insert(e); @@ -6338,7 +6512,7 @@ NodeImportResult addPluginWithCreator(ImporterContext* ctx, ::ONNX_NAMESPACE::No auto const plugin = createPlugin(getNodeName(node), static_cast(creator), fields); - ASSERT_NODE(plugin, "Could not create the plugin.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(plugin, "Could not create the plugin.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); std::vector pluginInputs{}; std::vector pluginShapeInputs{}; @@ -6369,7 +6543,7 @@ NodeImportResult addPluginWithCreator(ImporterContext* ctx, ::ONNX_NAMESPACE::No LOG_INFO("Successfully created plugin: " << pluginName); auto* layer = addPluginLayer(ctx, pluginInputs, pluginShapeInputs, *plugin); - ASSERT_NODE(layer, "Could not add the plugin layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Could not add the plugin layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); ctx->registerLayer(layer, node); RETURN_ALL_OUTPUTS(layer, node, nodeIdx); } @@ -6384,8 +6558,9 @@ DEFINE_BUILTIN_OP_IMPORTER(Shrink) nvinfer1::ITensor* x = &convertToTensor(inputs.at(0), ctx); auto originalType = x->getType(); - ASSERT_NODE((originalType == DataType::kFLOAT || originalType == DataType::kHALF || originalType == DataType::kINT8 - || originalType == DataType::kINT32 || originalType == DataType::kINT64), + ONNXTRT_CHECK_NODE( + (originalType == DataType::kFLOAT || originalType == DataType::kHALF || originalType == DataType::kINT8 + || originalType == DataType::kINT32 || originalType == DataType::kINT64), "Only FLOAT, HALF, INT8, INT32, and INT64 are supported in Shrink. The current type = " + getTrtDtypeName(originalType) + ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); @@ -6399,39 +6574,41 @@ DEFINE_BUILTIN_OP_IMPORTER(Shrink) // prepare Constant Tensors nvinfer1::ITensor* lambdTensor = addConstant(ctx, std::vector{lambd}, ::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {1}})->getOutput(0); - CHECK_STATUS(broadcastTensors(ctx, lambdTensor, x)); // align rank + broadcastTensors(ctx, lambdTensor, x); // align rank nvinfer1::ITensor* negLambdTensor = addConstant(ctx, std::vector{-lambd}, ::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {1}})->getOutput(0); - CHECK_STATUS(broadcastTensors(ctx, negLambdTensor, x)); + broadcastTensors(ctx, negLambdTensor, x); nvinfer1::ITensor* biasTensor = addConstant(ctx, std::vector{bias}, ::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {1}})->getOutput(0); - CHECK_STATUS(broadcastTensors(ctx, biasTensor, x)); + broadcastTensors(ctx, biasTensor, x); nvinfer1::ITensor* zeroTensor = addConstant(ctx, std::vector{0.}, ::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {1}})->getOutput(0); - CHECK_STATUS(broadcastTensors(ctx, zeroTensor, x)); + broadcastTensors(ctx, zeroTensor, x); // If x > lambd, y = x - bias; Otherwise, y = 0 - std::vector xGreaterThanLambd; - GET_VALUE(elementwiseHelper(ctx, node, nodeIdx, {x, lambdTensor}, nvinfer1::ElementWiseOperation::kGREATER), &xGreaterThanLambd); + std::vector xGreaterThanLambd + = elementwiseHelper(ctx, node, nodeIdx, {x, lambdTensor}, nvinfer1::ElementWiseOperation::kGREATER); - std::vector xMinusBias; - GET_VALUE(elementwiseHelper(ctx, node, nodeIdx, {x, biasTensor}, nvinfer1::ElementWiseOperation::kSUB), &xMinusBias); + std::vector xMinusBias + = elementwiseHelper(ctx, node, nodeIdx, {x, biasTensor}, nvinfer1::ElementWiseOperation::kSUB); - auto firstSelectLayer = N_CHECK(ctx->network()->addSelect(convertToTensor(xGreaterThanLambd.at(0), ctx), convertToTensor(xMinusBias.at(0), ctx), *zeroTensor)); + auto firstSelectLayer = N_CHECK(ctx->network()->addSelect( + convertToTensor(xGreaterThanLambd.at(0), ctx), convertToTensor(xMinusBias.at(0), ctx), *zeroTensor)); nvinfer1::ITensor* output = N_CHECK(firstSelectLayer->getOutput(0)); // If x < -lambd, y = x + bias; - std::vector xLessThanMinusLambd; - GET_VALUE(elementwiseHelper(ctx, node, nodeIdx, {x, negLambdTensor}, nvinfer1::ElementWiseOperation::kLESS), &xLessThanMinusLambd); + std::vector xLessThanMinusLambd + = elementwiseHelper(ctx, node, nodeIdx, {x, negLambdTensor}, nvinfer1::ElementWiseOperation::kLESS); - std::vector xAddBias; - GET_VALUE(elementwiseHelper(ctx, node, nodeIdx, {x, biasTensor}, nvinfer1::ElementWiseOperation::kSUM), &xAddBias); + std::vector xAddBias + = elementwiseHelper(ctx, node, nodeIdx, {x, biasTensor}, nvinfer1::ElementWiseOperation::kSUM); - auto* layer = N_CHECK(ctx->network()->addSelect(convertToTensor(xLessThanMinusLambd.at(0), ctx), convertToTensor(xAddBias.at(0), ctx), *output)); - ASSERT_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + auto* layer = N_CHECK(ctx->network()->addSelect( + convertToTensor(xLessThanMinusLambd.at(0), ctx), convertToTensor(xAddBias.at(0), ctx), *output)); + ONNXTRT_CHECK_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); ctx->registerLayer(layer, node); // cast back to originalType @@ -6441,7 +6618,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Shrink) DEFINE_BUILTIN_OP_IMPORTER(HardSwish) { nvinfer1::ITensor* x = &convertToTensor(inputs.at(0), ctx); - ASSERT_NODE( + ONNXTRT_CHECK_NODE( (x->getType() == DataType::kFLOAT || x->getType() == DataType::kHALF || x->getType() == DataType::kINT8), "Only FLOAT, HALF or INT8 input is supported for the HardSwish operator in this version of TensorRT. " "The current type = " @@ -6451,8 +6628,8 @@ DEFINE_BUILTIN_OP_IMPORTER(HardSwish) // activationHelper does not support const and constexpr (compile failed) float kALPHA{1.F / 6}; float kBETA{0.5F}; - std::vector activationResult; - GET_VALUE(activationHelper(ctx, node, nodeIdx, inputs, nvinfer1::ActivationType::kHARD_SIGMOID, &kALPHA, &kBETA), &activationResult); + std::vector activationResult + = activationHelper(ctx, node, nodeIdx, inputs, nvinfer1::ActivationType::kHARD_SIGMOID, &kALPHA, &kBETA); return elementwiseHelper(ctx, node, nodeIdx, {x, activationResult.at(0)}, nvinfer1::ElementWiseOperation::kPROD); } @@ -6461,8 +6638,8 @@ DEFINE_BUILTIN_OP_IMPORTER(NonZero) { nvinfer1::ITensor* x = &convertToTensor(inputs.at(0), ctx); auto const t = x->getType(); - ASSERT_NODE((t == DataType::kFLOAT || t == DataType::kHALF || t == DataType::kBF16 || t == DataType::kINT32 - || t == DataType::kINT64 || t == DataType::kINT8 || t == DataType::kBOOL), + ONNXTRT_CHECK_NODE((t == DataType::kFLOAT || t == DataType::kHALF || t == DataType::kBF16 || t == DataType::kINT32 + || t == DataType::kINT64 || t == DataType::kINT8 || t == DataType::kBOOL), "Only FLOAT32, FLOAT16, BFLOAT16, INT32, INT64, INT8 or BOOL input is supported for the NonZero operator in " "this version of TensorRT. The current type is " + getTrtDtypeName(t) + ".", @@ -6475,17 +6652,17 @@ DEFINE_BUILTIN_OP_IMPORTER(NonZero) DEFINE_BUILTIN_OP_IMPORTER(Mish) { nvinfer1::ITensor* x = &convertToTensor(inputs.at(0), ctx); - ASSERT_NODE((x->getType() == DataType::kFLOAT || x->getType() == DataType::kHALF), + ONNXTRT_CHECK_NODE((x->getType() == DataType::kFLOAT || x->getType() == DataType::kHALF), "Only FLOAT32 or FLOAT16 input is supported for the Mish operator in this version of TensorRT. " "The current type = " + getTrtDtypeName(x->getType()) + ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - std::vector softPlusOutput; - GET_VALUE(activationHelper(ctx, node, nodeIdx, inputs, nvinfer1::ActivationType::kSOFTPLUS), &softPlusOutput); + std::vector softPlusOutput + = activationHelper(ctx, node, nodeIdx, inputs, nvinfer1::ActivationType::kSOFTPLUS); - std::vector tanhOutput; - GET_VALUE(activationHelper(ctx, node, nodeIdx, softPlusOutput, nvinfer1::ActivationType::kTANH), &tanhOutput); + std::vector tanhOutput + = activationHelper(ctx, node, nodeIdx, softPlusOutput, nvinfer1::ActivationType::kTANH); return elementwiseHelper(ctx, node, nodeIdx, {x, tanhOutput.at(0)}, nvinfer1::ElementWiseOperation::kPROD); } @@ -6501,7 +6678,7 @@ DEFINE_BUILTIN_OP_IMPORTER(FallbackPluginImporter) LOG_INFO("Searching for plugin: " << pluginName << ", plugin_version: " << pluginVersion << ", plugin_namespace: " << pluginNamespace); nvinfer1::IPluginCreatorInterface* creator = importPluginCreator(ctx, pluginName, pluginVersion, pluginNamespace); - ASSERT_NODE(creator, "Plugin not found, are the plugin name, version, and namespace correct?", node, nodeIdx, + ONNXTRT_CHECK_NODE(creator, "Plugin not found, are the plugin name, version, and namespace correct?", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); if (std::strcmp(creator->getInterfaceInfo().kind, "PLUGIN CREATOR_V1") == 0) @@ -6514,7 +6691,7 @@ DEFINE_BUILTIN_OP_IMPORTER(FallbackPluginImporter) DEFINE_BUILTIN_OP_IMPORTER(LocalFunctionImporter) { auto function = ctx->localFunctions().at(node.op_type()); - ASSERT_NODE(node.input().size() == function.input().size(), + ONNXTRT_CHECK_NODE(node.input().size() == function.input().size(), "LocalFunction has an unexpected number of inputs! Number of node inputs = " << node.input().size() << ", number of function inputs = " << function.input().size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); @@ -6541,8 +6718,8 @@ DEFINE_BUILTIN_OP_IMPORTER(LocalFunctionImporter) ctx->tensors().insert({insideScopeName, ctx->tensors().at(outsideScopeName)}); localInputs.push_back(insideScopeName); } - ASSERT_NODE(ctx->tensors().count(insideScopeName), "Could not find mapping of local function input!", node, - nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(ctx->tensors().count(insideScopeName), "Could not find mapping of local function input!", + node, nodeIdx, ErrorCode::kINVALID_NODE); } // Create attribute map for the local function instance. Attributes can have default values (from the parent @@ -6584,13 +6761,16 @@ DEFINE_BUILTIN_OP_IMPORTER(LocalFunctionImporter) for (auto const& node : function.node()) { - Status status = onnx2trt::parseNode(ctx, node, nodeIdx); - if (!status.is_success()) + try + { + onnx2trt::parseNode(ctx, node, nodeIdx); + } + catch (OnnxTrtException& e) { if (ctx->localFunctions().count(node.op_type())) { ctx->localFunctionStack().pop_back(); - return status; + ONNXTRT_THROW(e.getStatus()); } else { @@ -6608,9 +6788,11 @@ DEFINE_BUILTIN_OP_IMPORTER(LocalFunctionImporter) localFunctionStackChar.push_back(ctx->localFunctionErrors().back()[i].c_str()); } ctx->localFunctionStack().pop_back(); - return Status(status.code(), std::string(status.desc()), std::string(status.file()), status.line(), - std::string(status.func()), status.node(), std::string(status.nodeName()), - std::string(status.nodeOperator()), localFunctionStackChar); + + Status status = e.getStatus(); + ONNXTRT_THROW(Status(status.code(), std::string(status.desc()), std::string(status.file()), + status.line(), std::string(status.func()), status.node(), std::string(status.nodeName()), + std::string(status.nodeOperator()), localFunctionStackChar)); } } } @@ -6619,7 +6801,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LocalFunctionImporter) std::vector outputs; for (auto const& output : function.output()) { - ASSERT_NODE( + ONNXTRT_CHECK_NODE( ctx->tensors().count(output), "Could not find output tensor!", node, nodeIdx, ErrorCode::kINVALID_NODE); outputs.push_back(TensorOrWeights(ctx->tensors().at(output))); } @@ -6638,12 +6820,12 @@ DEFINE_BUILTIN_OP_IMPORTER(LocalFunctionImporter) DEFINE_BUILTIN_OP_IMPORTER(TRT_Scale) { - ASSERT_NODE((inputs.size() >= 1), "Input is required.", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE( + ONNXTRT_CHECK_NODE((inputs.size() >= 1), "Input is required.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( (inputs.at(0).is_tensor()), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); if (inputs.size() >= 2) { - ASSERT_NODE((inputs.at(1).is_weights()), "The second input must be an initializer.", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(1).is_weights()), "The second input must be an initializer.", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); } auto& input = inputs.at(0).tensor(); @@ -6665,19 +6847,19 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Scale) if (attrs.get("scale")) { - ASSERT_NODE((inputs.at(counter).is_weights()), "The scale input must be an initializer.", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(counter).is_weights()), "The scale input must be an initializer.", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); scale = inputs.at(counter++).weights(); } if (attrs.get("shift")) { - ASSERT_NODE((inputs.at(counter).is_weights()), "The shift input must be an initializer.", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(counter).is_weights()), "The shift input must be an initializer.", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); shift = inputs.at(counter++).weights(); } if (attrs.get("power")) { - ASSERT_NODE((inputs.at(counter).is_weights()), "The power input must be an initializer.", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(counter).is_weights()), "The power input must be an initializer.", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); power = inputs.at(counter++).weights(); } @@ -6689,7 +6871,8 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Scale) DEFINE_BUILTIN_OP_IMPORTER(TRT_Shuffle) { - ASSERT_NODE(inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( + inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); auto& input = inputs.at(0).tensor(); OnnxAttrs attrs(node, ctx); @@ -6698,7 +6881,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Shuffle) bool zeroIsPlaceholder = attrs.get("zero_is_placeholder"); nvinfer1::IShuffleLayer* layer = N_CHECK(ctx->network()->addShuffle(input)); - ASSERT_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); ctx->registerLayer(layer, node); layer->setFirstTranspose(perm1); layer->setSecondTranspose(perm2); @@ -6714,7 +6897,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Shuffle) } else { - ASSERT_NODE(inputs.at(1).is_tensor(), "The second input must be a tensor.", node, nodeIdx, + ONNXTRT_CHECK_NODE(inputs.at(1).is_tensor(), "The second input must be a tensor.", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); layer->setInput(1, inputs.at(1).tensor()); } @@ -6724,11 +6907,12 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Shuffle) DEFINE_BUILTIN_OP_IMPORTER(TRT_TopK_Min) { - ASSERT_NODE(inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( + inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); auto& input = inputs.at(0).tensor(); OnnxAttrs attrs(node, ctx); - ASSERT_NODE(inputs.at(1).is_weights(), "The second input must be an initializer.", node, nodeIdx, + ONNXTRT_CHECK_NODE(inputs.at(1).is_weights(), "The second input must be an initializer.", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); auto& kWeights = inputs.at(1).weights(); int k = *static_cast(kWeights.values); @@ -6743,8 +6927,9 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_TopK_Min) DEFINE_BUILTIN_OP_IMPORTER(TRT_MatMul) { - ASSERT_NODE(inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE( + ONNXTRT_CHECK_NODE( + inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( inputs.at(1).is_tensor(), "The second input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); auto& input0 = inputs.at(0).tensor(); auto& input1 = inputs.at(1).tensor(); @@ -6760,8 +6945,9 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_MatMul) DEFINE_BUILTIN_OP_IMPORTER(TRT_RaggedSoftmax) { - ASSERT_NODE(inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE( + ONNXTRT_CHECK_NODE( + inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( inputs.at(1).is_tensor(), "The second input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); auto& input = inputs.at(0).tensor(); auto& bounds = inputs.at(1).tensor(); @@ -6773,7 +6959,8 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_RaggedSoftmax) DEFINE_BUILTIN_OP_IMPORTER(TRT_MaxAverageBlendPool) { - ASSERT_NODE(inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( + inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); auto& input = inputs.at(0).tensor(); OnnxAttrs attrs(node, ctx); @@ -6784,7 +6971,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_MaxAverageBlendPool) nvinfer1::Dims endPadding = makeDims(nbSpatialDims, 0); nvinfer1::PaddingMode paddingMode; bool excludePadding(true); - CHECK_STATUS(getKernelParams(ctx, node, &kernelSize, &strides, &begPadding, &endPadding, paddingMode, excludePadding)); + getKernelParams(ctx, node, &kernelSize, &strides, &begPadding, &endPadding, paddingMode, excludePadding); float blend = attrs.get("blend"); nvinfer1::IPoolingLayer* layer @@ -6815,7 +7002,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_PluginV2) std::string buffer = attrs.get("data"); nvinfer1::IPluginCreator* creator = registry.getPluginCreator(name.c_str(), version.c_str(), nspace.c_str()); - ASSERT_NODE(creator, "Plugin not found, are the plugin name, version, and namespace correct?", node, nodeIdx, + ONNXTRT_CHECK_NODE(creator, "Plugin not found, are the plugin name, version, and namespace correct?", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); auto const plugin = creator->deserializePlugin("", buffer.data(), buffer.size()); @@ -6823,7 +7010,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_PluginV2) std::vector tensors; for (auto& input : inputs) { - ASSERT_NODE(input.is_tensor(), "The input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE(input.is_tensor(), "The input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); nvinfer1::ITensor* inputTensor = &input.tensor(); if (onlySupportInt32TRTPlugin(name) && inputTensor->getType() == DataType::kINT64) { @@ -6844,8 +7031,9 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_PluginV2) DEFINE_BUILTIN_OP_IMPORTER(TRT_Gather) { - ASSERT_NODE(inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE( + ONNXTRT_CHECK_NODE( + inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( inputs.at(1).is_tensor(), "The second input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); auto& data = inputs.at(0).tensor(); auto& indices = inputs.at(1).tensor(); @@ -6854,12 +7042,12 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Gather) int32_t nbElementWiseDims = attrs.get("nbElementWiseDims", 0); int32_t r = data.getDimensions().nbDims; - ASSERT_NODE((indices.getType() == DataType::kINT32), + ONNXTRT_CHECK_NODE((indices.getType() == DataType::kINT32), "This version of TensorRT only supports INT32 input indices. The current indices type = " + getTrtDtypeName(indices.getType()) + ".", node, nodeIdx, nvonnxparser::ErrorCode::kUNSUPPORTED_NODE); - ASSERT_NODE((r >= 1), "0D input data is not allowed.", node, nodeIdx, ErrorCode::kINVALID_NODE); - ASSERT_NODE((-r <= axis && axis < r), + ONNXTRT_CHECK_NODE((r >= 1), "0D input data is not allowed.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE((-r <= axis && axis < r), "The attribute axis should be in range [-r, r-1], where r is the rank of the input. Provided r = " << r << ", axis = " << axis << ".", node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE); @@ -6870,7 +7058,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Gather) } nvinfer1::IGatherLayer* layer = N_CHECK(ctx->network()->addGather(data, indices, axis)); - ASSERT_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK_NODE(layer, "Failed to create layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); ctx->registerLayer(layer, node); layer->setNbElementWiseDims(nbElementWiseDims); RETURN_FIRST_OUTPUT(layer, node, nodeIdx); @@ -6878,7 +7066,8 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Gather) DEFINE_BUILTIN_OP_IMPORTER(TRT_Slice) { - ASSERT_NODE(inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( + inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); auto& input = inputs.at(0).tensor(); nvinfer1::ISliceLayer* layer; @@ -6894,7 +7083,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Slice) else { // start, size, stride are all inputs - ASSERT_NODE((inputs.size() == 4), + ONNXTRT_CHECK_NODE((inputs.size() == 4), "Exactly 4 inputs are required by TRT_Slice. Current input size = " << inputs.size() << ".", node, nodeIdx, ErrorCode::kINVALID_NODE); ShapeTensor const start{ctx, inputs.at(1)}; @@ -6908,7 +7097,8 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Slice) DEFINE_BUILTIN_OP_IMPORTER(TRT_Resize) { - ASSERT_NODE(inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( + inputs.at(0).is_tensor(), "The first input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); auto& input = inputs.at(0).tensor(); nvinfer1::IResizeLayer* layer; @@ -6937,13 +7127,14 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Resize) { // TRT-15340: Adapt to use resizeShapeTensor instead when safety support nbDims == 1. auto scales = attrs.get>("scales"); - ASSERT_NODE((scales.size() > 0), "Attribute scales is missing.", node, nodeIdx, ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK_NODE( + (scales.size() > 0), "Attribute scales is missing.", node, nodeIdx, ErrorCode::kINVALID_NODE); layer->setScales(&scales[0], scales.size()); } } else { - ASSERT_NODE((inputs.at(1).is_tensor()), "The output dimension input must be a tensor.", node, nodeIdx, + ONNXTRT_CHECK_NODE((inputs.at(1).is_tensor()), "The output dimension input must be a tensor.", node, nodeIdx, ErrorCode::kINVALID_NODE); layer->setInput(1, inputs.at(1).tensor()); } diff --git a/onnxProtoUtils.hpp b/onnxProtoUtils.hpp index c570456..2ae908c 100644 --- a/onnxProtoUtils.hpp +++ b/onnxProtoUtils.hpp @@ -5,6 +5,7 @@ #pragma once #include "Status.hpp" +#include "errorHelpers.hpp" #include #include #include @@ -55,7 +56,7 @@ std::string convertProtoToString(ProtoMessage const& message) // Deserializes an ONNX ModelProto passed in as a protobuf::Message or a protobuf::MessageLite. template -Status deserializeOnnxModel(void const* serializedModel, size_t serializedModelSize, ProtoMessage* model) +void deserializeOnnxModel(void const* serializedModel, size_t serializedModelSize, ProtoMessage* model) { google::protobuf::io::ArrayInputStream rawInput(serializedModel, serializedModelSize); google::protobuf::io::CodedInputStream codedInput(&rawInput); @@ -66,9 +67,8 @@ Status deserializeOnnxModel(void const* serializedModel, size_t serializedModelS // Note: This WARs the very low default size limit (64MB) codedInput.SetTotalBytesLimit(std::numeric_limits::max(), std::numeric_limits::max() / 4); #endif - ASSERT((model->ParseFromCodedStream(&codedInput)) && "Failed to parse the ONNX model.", + ONNXTRT_CHECK(model->ParseFromCodedStream(&codedInput) && "Failed to parse the ONNX model.", ErrorCode::kMODEL_DESERIALIZE_FAILED); - return Status::success(); } // Helper function to dispatch to deserializeOnnxModel when user provides a path to the model. @@ -93,8 +93,8 @@ bool ParseFromFileAsBinary(ProtoMessage* msg, char const* filename) return false; } - auto result = deserializeOnnxModel(buffer.data(), buffer.size(), msg); - return !result.is_error(); + deserializeOnnxModel(buffer.data(), buffer.size(), msg); + return true; } // ostream overload for printing NodeProtos. diff --git a/onnx_tensorrt/__init__.py b/onnx_tensorrt/__init__.py index afef8f2..c7dd456 100644 --- a/onnx_tensorrt/__init__.py +++ b/onnx_tensorrt/__init__.py @@ -4,4 +4,4 @@ from . import backend -__version__ = "10.4.0" +__version__ = "10.5.0"