feat: Support multi-output models in OnnxRuntimeBase (#2171)

As mentioned in #2170, multi-output models are increasingly used, for instance when training models with multiple auxiliary objectives. However our ONNX plugin does not support this at the moment. This PR adds a `runOnnxInferenceMultiOutput` that supports this usecase. The regular `runOnnxInference` methods can still be used on multi-output models -- the convention is that the output of the first node will be returned. This roughly matches the current behavior, but it was buggy in a subtle way -- the output of the first node was returned, but assuming the dimensions of the last node. This is now fixed. The change *should* be backwards compatible, hopefully the CI agrees. closes #2170
acts-project · Jun 12, 2023 · c8e8d1d · c8e8d1d
1 parent c71ecbb
commit c8e8d1d
Show file tree

Hide file tree

Showing 2 changed files with 48 additions and 22 deletions.
diff --git a/Plugins/Onnx/include/Acts/Plugins/Onnx/OnnxRuntimeBase.hpp b/Plugins/Onnx/include/Acts/Plugins/Onnx/OnnxRuntimeBase.hpp
@@ -49,6 +49,14 @@ class OnnxRuntimeBase {
   std::vector<std::vector<float>> runONNXInference(
       NetworkBatchInput& inputTensorValues) const;
 
+  /// @brief Run the multi-output ONNX inference function for a batch of input
+  ///
+  /// @param inputTensorValues Vector of the input feature values of all the inputs used for prediction
+  ///
+  /// @return The vector of output (predicted) values, one for each output
+  std::vector<std::vector<std::vector<float>>> runONNXInferenceMultiOutput(
+      NetworkBatchInput& inputTensorValues) const;
+
  private:
   /// ONNX runtime session / model properties
   std::unique_ptr<Ort::Session> m_session;
@@ -57,7 +65,7 @@ class OnnxRuntimeBase {
   std::vector<int64_t> m_inputNodeDims;
   std::vector<Ort::AllocatedStringPtr> m_outputNodeNamesAllocated;
   std::vector<const char*> m_outputNodeNames;
-  std::vector<int64_t> m_outputNodeDims;
+  std::vector<std::vector<int64_t>> m_outputNodeDims;
 };
 
 }  // namespace Acts
diff --git a/Plugins/Onnx/src/OnnxRuntimeBase.cpp b/Plugins/Onnx/src/OnnxRuntimeBase.cpp
@@ -31,8 +31,8 @@ Acts::OnnxRuntimeBase::OnnxRuntimeBase(Ort::Env& env, const char* modelPath) {
         m_session->GetInputNameAllocated(i, allocator));
     m_inputNodeNames.push_back(m_inputNodeNamesAllocated.back().get());
 
-    // Get the dimensions of the input nodes,
-    // here we assume that all input nodes have the same dimensions
+    // Get the dimensions of the input nodes
+    // Assumes single input
     Ort::TypeInfo inputTypeInfo = m_session->GetInputTypeInfo(i);
     auto tensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();
     m_inputNodeDims = tensorInfo.GetShape();
@@ -47,10 +47,9 @@ Acts::OnnxRuntimeBase::OnnxRuntimeBase(Ort::Env& env, const char* modelPath) {
     m_outputNodeNames.push_back(m_outputNodeNamesAllocated.back().get());
 
     // Get the dimensions of the output nodes
-    // here we assume that all output nodes have the dimensions
     Ort::TypeInfo outputTypeInfo = m_session->GetOutputTypeInfo(i);
     auto tensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();
-    m_outputNodeDims = tensorInfo.GetShape();
+    m_outputNodeDims.push_back(tensorInfo.GetShape());
   }
 }
 
@@ -69,28 +68,38 @@ std::vector<float> Acts::OnnxRuntimeBase::runONNXInference(
 // the function assumes that the model has 1 input node and 1 output node
 std::vector<std::vector<float>> Acts::OnnxRuntimeBase::runONNXInference(
     Acts::NetworkBatchInput& inputTensorValues) const {
+  return runONNXInferenceMultiOutput(inputTensorValues).front();
+}
+
+// Inference function for single-input, multi-output models
+std::vector<std::vector<std::vector<float>>>
+Acts::OnnxRuntimeBase::runONNXInferenceMultiOutput(
+    NetworkBatchInput& inputTensorValues) const {
   int batchSize = inputTensorValues.rows();
   std::vector<int64_t> inputNodeDims = m_inputNodeDims;
-  std::vector<int64_t> outputNodeDims = m_outputNodeDims;
+  std::vector<std::vector<int64_t>> outputNodeDims = m_outputNodeDims;
 
   // The first dim node should correspond to the batch size
   // If it is -1, it is dynamic and should be set to the input size
   if (inputNodeDims[0] == -1) {
     inputNodeDims[0] = batchSize;
   }
-  if (outputNodeDims[0] == -1) {
-    outputNodeDims[0] = batchSize;
+
+  bool outputDimsMatch = true;
+  for (std::vector<int64_t>& nodeDim : outputNodeDims) {
+    if (nodeDim[0] == -1) {
+      nodeDim[0] = batchSize;
+    }
+    outputDimsMatch &= batchSize == 1 || nodeDim[0] == batchSize;
   }
 
-  if (batchSize != 1 &&
-      (inputNodeDims[0] != batchSize || outputNodeDims[0] != batchSize)) {
+  if (batchSize != 1 && (inputNodeDims[0] != batchSize || !outputDimsMatch)) {
     throw std::runtime_error(
         "runONNXInference: batch size doesn't match the input or output node "
         "size");
   }
 
   // Create input tensor object from data values
-  // note: this assumes the model has only 1 input node
   Ort::MemoryInfo memoryInfo =
       Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
   Ort::Value inputTensor = Ort::Value::CreateTensor<float>(
@@ -107,24 +116,33 @@ std::vector<std::vector<float>> Acts::OnnxRuntimeBase::runONNXInference(
       m_session->Run(run_options, m_inputNodeNames.data(), &inputTensor,
                      m_inputNodeNames.size(), m_outputNodeNames.data(),
                      m_outputNodeNames.size());
+
   // Double-check that outputTensors contains Tensors and that the count matches
   // that of output nodes
   if (!outputTensors[0].IsTensor() ||
       (outputTensors.size() != m_outputNodeNames.size())) {
     throw std::runtime_error(
         "runONNXInference: calculation of output failed. ");
   }
-  // Get pointer to output tensor float values
-  // note: this assumes the model has only 1 output node
-  float* outputTensor = outputTensors.front().GetTensorMutableData<float>();
-  // Get the output values
-  std::vector<std::vector<float>> outputTensorValues(
-      batchSize, std::vector<float>(outputNodeDims[1], -1));
-  for (int i = 0; i < outputNodeDims[0]; i++) {
-    for (int j = 0; j < ((outputNodeDims.size() > 1) ? outputNodeDims[1] : 1);
-         j++) {
-      outputTensorValues[i][j] = outputTensor[i * outputNodeDims[1] + j];
+
+  std::vector<std::vector<std::vector<float>>> multiOutput;
+
+  for (size_t i_out = 0; i_out < outputTensors.size(); i_out++) {
+    // Get pointer to output tensor float values
+    float* outputTensor = outputTensors.at(i_out).GetTensorMutableData<float>();
+    // Get the output values
+    std::vector<std::vector<float>> outputTensorValues(
+        batchSize, std::vector<float>(outputNodeDims.at(i_out)[1], -1));
+    for (int i = 0; i < outputNodeDims.at(i_out)[0]; i++) {
+      for (int j = 0; j < ((outputNodeDims.at(i_out).size() > 1)
+                               ? outputNodeDims.at(i_out)[1]
+                               : 1);
+           j++) {
+        outputTensorValues[i][j] =
+            outputTensor[i * outputNodeDims.at(i_out)[1] + j];
+      }
     }
+    multiOutput.push_back(std::move(outputTensorValues));
   }
-  return outputTensorValues;
+  return multiOutput;
 }