Revert fuse conv fix err (#6859)

* merge fuse cuda conv revert * resolve merge conflict revert exclude unsupported type * add Stream for slicing * remove file * add Stream Co-authored-by: RandySheriffH <randysheriff@hotmail.com>
microsoft · Mar 2, 2021 · 40b0929 · 40b0929
1 parent 29b30bb
commit 40b0929
Show file tree

Hide file tree

Showing 12 changed files with 299 additions and 720 deletions.
diff --git a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc
@@ -79,7 +79,6 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, uint8_t_MLFloat16, DequantizeLinear);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float_int8_t, QAttention);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, MLFloat16_int8_t, QAttention);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, FusedConv);
 
 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BFloat16, FastGelu);
@@ -175,7 +174,6 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) {
       BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, BFloat16, FusedMatMul)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, BFloat16_float, LayerNormalization)>,
 #endif
-      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kMSDomain, 1, float, FusedConv)>,
   };
 
   for (auto& function_table_entry : function_table) {

diff --git a/onnxruntime/contrib_ops/cuda/fused_conv.cc b/onnxruntime/contrib_ops/cuda/fused_conv.cc
diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
@@ -1273,12 +1273,6 @@ activation.)DOC")
           "",
           "T",
           OpSchema::Optional)
-      .Input(
-          3,
-          "Z",
-          "",
-          "T",
-          OpSchema::Optional)
       .Output(
           0,
           "Y",

diff --git a/onnxruntime/core/optimizer/conv_activation_fusion.cc b/onnxruntime/core/optimizer/conv_activation_fusion.cc
@@ -100,106 +100,50 @@ Status ConvActivationFusion::ApplyImpl(Graph& graph, bool& modified, int graph_l
       continue;
     }
 
-    if (node->GetExecutionProviderType() == onnxruntime::kCudaExecutionProvider) {
-      if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() != 
-          ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
-        continue;
-      }
-      if (graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Relu", {6, 13})) {
-        Node& conv_node = *node;
-        Node& act_node = *graph.GetNode(next_node.Index());
-        auto node_name = graph.GenerateNodeName(conv_node.Name() + "_" + act_node.Name());
-        Node& fused_conv = graph.AddNode(node_name,
-                                         "FusedConv",
-                                         node_name,
-                                         conv_node.MutableInputDefs(),
-                                         {},
-                                         &conv_node.GetAttributes(),
-                                         onnxruntime::kMSDomain);
-        fused_conv.SetExecutionProviderType(conv_node.GetExecutionProviderType());
-        fused_conv.AddAttribute("activation", "Relu");
-        graph_utils::FinalizeNodeFusion(graph, {conv_node, act_node}, fused_conv);
-        modified = true;
-      } else if (graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Add", {6, 7, 13})) {
-        const auto& last_node = *(next_node.OutputNodesBegin());
-        if (last_node.GetExecutionProviderType() != node->GetExecutionProviderType()) {
-          continue;
-        }
-        if (graph_utils::IsSupportedOptypeVersionAndDomain(last_node, "Relu", {6, 13}) && 
-            next_node.GetOutputEdgesCount() == 1) {
-          Node& conv_node = *node;
-          Node& add_node = *graph.GetNode(next_node.Index());
-          Node& act_node = *graph.GetNode(last_node.Index());
-          auto conv_inputs = conv_node.MutableInputDefs();
-          auto conv_outputs = conv_node.MutableOutputDefs();
-          auto add_inputs = add_node.MutableInputDefs();
-          for (auto add_input : add_inputs) {
-            if (add_input->Name() != conv_outputs[0]->Name()) {
-              conv_inputs.push_back(add_input);
-              break;
-            }
-          }
-          auto node_name = graph.GenerateNodeName(conv_node.Name() + "_" +
-                                                  add_node.Name() + "_" +
-                                                  act_node.Name());
-          Node& fused_conv = graph.AddNode(node_name,
-                                           "FusedConv",
-                                           node_name,
-                                           conv_inputs,
-                                           {}, &conv_node.GetAttributes(),
-                                           onnxruntime::kMSDomain);
-          fused_conv.SetExecutionProviderType(conv_node.GetExecutionProviderType());
-          fused_conv.AddAttribute("activation", "Relu");
-          graph_utils::FinalizeNodeFusion(graph, {conv_node, add_node, act_node}, fused_conv);
-          modified = true;
-        }
-      }
-    } else {
-      // Test if this is an activation that can be fused and also extract the
-      // activation's parameters.
-      std::vector<float> activation_params;
-      if (!graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Relu", {6, 13}) &&
-          !graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Sigmoid", {6, 13}) &&
-          !graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Tanh", {6, 13})) {
-        if (graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "LeakyRelu", {6})) {
-          activation_params.push_back(graph_utils::GetNodeAttribute(next_node, "alpha")->f());
-        } else if (graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Clip", {6, 11, 12, 13})) {
-          float min, max;
-          if (GetClipConstantMinMax(graph, next_node, min, max)) {
-            activation_params.push_back(min);
-            activation_params.push_back(max);
-          } else {
-            continue;
-          }
+    // Test if this is an activation that can be fused and also extract the
+    // activation's parameters.
+    std::vector<float> activation_params;
+    if (!graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Relu", {6, 13}) &&
+        !graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Sigmoid", {6, 13}) &&
+        !graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Tanh", {6, 13})) {
+      if (graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "LeakyRelu", {6})) {
+        activation_params.push_back(graph_utils::GetNodeAttribute(next_node, "alpha")->f());
+      } else if (graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Clip", {6, 11, 12, 13})) {
+        float min, max;
+        if (GetClipConstantMinMax(graph, next_node, min, max)) {
+          activation_params.push_back(min);
+          activation_params.push_back(max);
         } else {
           continue;
         }
+      } else {
+        continue;
       }
+    } 
 
-      Node& conv_node = *node;
-      Node& act_node = *graph.GetNode(next_node.Index());
+    Node& conv_node = *node;
+    Node& act_node = *graph.GetNode(next_node.Index());
 
-      Node& fused_conv = graph.AddNode(graph.GenerateNodeName("fused " + conv_node.Name()), "FusedConv",
-                                       "fused Conv " + conv_node.Name() + "with activation " + act_node.OpType(),
-                                       conv_node.MutableInputDefs(),
-                                       {},
-                                       &conv_node.GetAttributes(),
-                                       "com.microsoft");
+    Node& fused_conv = graph.AddNode(graph.GenerateNodeName("fused " + conv_node.Name()), "FusedConv",
+                                     "fused Conv " + conv_node.Name() + "with activation " + act_node.OpType(),
+                                     conv_node.MutableInputDefs(),
+                                     {},
+                                     &conv_node.GetAttributes(),
+                                     "com.microsoft");
 
-      // Assign provider to this new node. Provider should be same as the provider for old node.
-      fused_conv.SetExecutionProviderType(conv_node.GetExecutionProviderType());
+    // Assign provider to this new node. Provider should be same as the provider for old node.
+    fused_conv.SetExecutionProviderType(conv_node.GetExecutionProviderType());
 
-      // Add attributes to specify the activation type and parameters.
-      fused_conv.AddAttribute("activation", next_node.OpType());
-      if (activation_params.size() > 0) {
-        fused_conv.AddAttribute("activation_params", activation_params);
-      }
+    // Add attributes to specify the activation type and parameters.
+    fused_conv.AddAttribute("activation", next_node.OpType());
+    if (activation_params.size() > 0) {
+      fused_conv.AddAttribute("activation_params", activation_params);
+    }
 
-      // move output definitions and edges from act_node to fused_conv. delete conv_node and act_node.
-      graph_utils::FinalizeNodeFusion(graph, {conv_node, act_node}, fused_conv);
+    // move output definitions and edges from act_node to fused_conv. delete conv_node and act_node.
+    graph_utils::FinalizeNodeFusion(graph, {conv_node, act_node}, fused_conv);
 
-      modified = true;
-    }
+    modified = true;
   }
 
   return Status::OK();

diff --git a/onnxruntime/core/optimizer/graph_transformer_utils.cc b/onnxruntime/core/optimizer/graph_transformer_utils.cc
@@ -142,9 +142,9 @@ std::vector<std::unique_ptr<GraphTransformer>> GenerateTransformers(TransformerL
       transformers.emplace_back(onnxruntime::make_unique<DynamicQuantizeMatMulFusion>(cpu_execution_providers));
 
       std::unordered_set<std::string> cpu_acl_execution_providers = {onnxruntime::kCpuExecutionProvider, onnxruntime::kAclExecutionProvider};
-      std::unordered_set<std::string> cpu_cuda_acl_armnn_execution_providers = {onnxruntime::kCpuExecutionProvider, onnxruntime::kCudaExecutionProvider, onnxruntime::kAclExecutionProvider, onnxruntime::kArmNNExecutionProvider};
+      std::unordered_set<std::string> cpu_acl_armnn_execution_providers = {onnxruntime::kCpuExecutionProvider, onnxruntime::kAclExecutionProvider, onnxruntime::kArmNNExecutionProvider};
 
-      transformers.emplace_back(onnxruntime::make_unique<ConvActivationFusion>(cpu_cuda_acl_armnn_execution_providers));
+      transformers.emplace_back(onnxruntime::make_unique<ConvActivationFusion>(cpu_acl_armnn_execution_providers));
 
       std::unordered_set<std::string> cpu_cuda_execution_providers = {onnxruntime::kCpuExecutionProvider, onnxruntime::kCudaExecutionProvider};
       transformers.emplace_back(onnxruntime::make_unique<GeluFusion>(cpu_cuda_execution_providers));