[Autocast] Fix edge case casting input directly to output

aboubezari · aboubezari · commit 16d58758e943 · 2025-09-08T17:01:35.000-07:00
diff --git a/modelopt/onnx/autocast/precisionconverter.py b/modelopt/onnx/autocast/precisionconverter.py
@@ -586,9 +586,42 @@ def _bypass_cast_node(self, node: onnx.NodeProto) -> None:
                         consumer.input[i] = input_tensor
 
     def _remove_preexisting_casts(self) -> None:
-        nodes_to_remove = []
+        # First check for special case where an input is casted directly to an output
+        model_input_names = {input.name for input in self.model.graph.input}
+        model_output_names = {output.name for output in self.model.graph.output}
+        # Ensure that special casts that we add are not removed by the following logic
+        casts_to_skip = []
+        # Add casts as a separate step to avoid modifying the graph while iterating over it
+        casts_to_add = []
         for node in self.model.graph.node:
             if node.op_type == "Cast":
+                if node.input[0] in model_input_names and node.output[0] in model_output_names:
+                    # Create a special cast just for the input-output case.
+                    new_cast = helper.make_node(
+                        "Cast",
+                        name=node.name,
+                        inputs=[node.input[0]],
+                        outputs=[node.output[0]],
+                        to=node.attribute[0].i,
+                    )
+                    casts_to_skip.append(node.name)
+                    casts_to_add.append(new_cast)
+                    # Now adjust the old cast's name, consumers and producers
+                    node.name = f"{node.name}_io_special_case"
+                    node_new_output_name = f"{node.output[0]}_io_special_case"
+                    for consumer in utils.get_consumer_nodes(self.model, node.output[0]):
+                        for i, input_name in enumerate(consumer.input):
+                            if input_name == node.output[0]:
+                                consumer.input[i] = node_new_output_name
+                    node.output[0] = node_new_output_name
+
+        for cast in casts_to_add:
+            self.model.graph.node.append(cast)
+        casts_to_skip = set(casts_to_skip)
+
+        nodes_to_remove = []
+        for node in self.model.graph.node:
+            if node.op_type == "Cast" and node.name not in casts_to_skip:
                 cast_from_type = self._get_tensor_type(node.input[0])
                 cast_to_type = utils.get_cast_to_type(node)
                 is_fp_cast = cast_to_type in [
diff --git a/tests/unit/onnx/autocast/test_precisionconverter.py b/tests/unit/onnx/autocast/test_precisionconverter.py
@@ -1023,3 +1023,68 @@ def test_constant_cast_folding(model_with_constant_cast_patterns, low_precision_
     assert utils.get_consumer_nodes(converted_model, "const_scalar")[0].op_type == "Add"
     assert len(utils.get_consumer_nodes(converted_model, "const_array")) == 1
     assert utils.get_consumer_nodes(converted_model, "const_array")[0].op_type == "Add"
+
+
+@pytest.fixture
+def model_with_casted_output():
+    """Create a model with an output produced by a Cast node."""
+    # Create input and outputs
+    x = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3])
+    y1 = helper.make_tensor_value_info("Y1", TensorProto.FLOAT, [2, 3])  # Intermediate output
+    y2 = helper.make_tensor_value_info("Y2", TensorProto.FLOAT, [2, 3])  # Final output
+
+    # Create constant value
+    const = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32)
+
+    # Create constant node
+    const_node = helper.make_node(
+        "Constant",
+        [],
+        ["const"],
+        name="const",
+        value=numpy_helper.from_array(const, name="const_value"),
+    )
+
+    # Create computation nodes
+    add1 = helper.make_node("Add", ["X", "const"], ["add1_out"], name="add1")
+    add2 = helper.make_node("Add", ["add1_out", "const"], ["Y2"], name="add2")
+
+    # Create cast node that feeds directly from input to output
+    cast_input = helper.make_node("Cast", ["X"], ["Y1"], name="cast_input", to=TensorProto.FLOAT)
+
+    graph = helper.make_graph(
+        [const_node, add1, add2, cast_input],
+        "model_with_casted_output",
+        [x],
+        [y1, y2],
+        [],
+    )
+
+    model = helper.make_model(graph, producer_name="model_with_casted_output")
+    model.opset_import[0].version = 20
+    model.ir_version = 10
+    onnx.checker.check_model(model)
+
+    model = onnx_utils.infer_shapes(model)
+    value_info_map, initializer_map, node_to_init_map = utils.setup_mappings(model)
+    onnx.save(model, "/tmp/model_with_casted_output.onnx")
+
+    return model, value_info_map, initializer_map, node_to_init_map
+
+
+@pytest.mark.parametrize("low_precision_type", ["fp16", "bf16"])
+def test_casted_output_model(model_with_casted_output, low_precision_type):
+    model, value_info_map, initializer_map, node_to_init_map = model_with_casted_output
+
+    converter = PrecisionConverter(
+        model,
+        value_info_map,
+        initializer_map,
+        node_to_init_map,
+        keep_io_types=True,
+        low_precision_type=low_precision_type,
+    )
+    converted_model = converter.convert(
+        high_precision_nodes=["cast_input"], low_precision_nodes=["add1", "add2"]
+    )
+    onnx.checker.check_model(converted_model)