address comments

andrewor14 · andrewor14 · commit 615de5ec9532 · 2025-10-30T10:10:45.000-07:00
diff --git a/torchao/quantization/quantize_/workflows/float8/float8_tensor.py b/torchao/quantization/quantize_/workflows/float8/float8_tensor.py
@@ -268,13 +268,15 @@ def _(func, types, args, kwargs):
 
 @implements(aten.addmm_.default)
 def _(func, types, args, kwargs):
-    output_tensor, input_tensor, weight_tensor = (
+    bias_tensor, input_tensor, weight_tensor = (
         args[0],
         args[1],
-        args[2] if len(args) > 2 else None,
+        args[2],
     )
+    assert kwargs.get("alpha", 1) == 1, "only alpha=1 is supported"
+    assert kwargs.get("beta", 1) == 1, "only beta=1 is supported"
     out = _float8_mm_impl(input_tensor, weight_tensor)
-    return output_tensor.copy_(out)
+    return bias_tensor.add_(out)
 
 
 def _float8_mm_impl(
@@ -708,51 +710,6 @@ def _(func, types, args, kwargs):
     return return_and_correct_aliasing(func, args, kwargs, new)
 
 
-@implements(torch.ops.aten.to.dtype_layout)
-def _(func, types, args, kwargs):
-    # only support kwargs for now
-    assert len(args) == 1
-    self = args[0]
-    # only support dtype, layout, and device for now
-    for k in kwargs.keys():
-        assert k in ["dtype", "layout", "device"]
-    # only support same dtype and layout
-    # different dtype and layout has undefined behavior
-    if "dtype" in kwargs:
-        assert kwargs["dtype"] == self.dtype
-    if "layout" in kwargs:
-        assert kwargs["layout"] == self.layout
-    # if device is the same, treat this like a no-op
-    device = kwargs.get("device")
-    if device == self.device:
-        return self
-    # otherwise, move all inner tensors to the new device
-    new_tensor = self.__class__(
-        func(self.qdata, device=device),
-        func(self.scale, device=device),
-        self.block_size,
-        self.mm_config,
-        self.act_quant_kwargs,
-        self.kernel_preference,
-        self.dtype,
-    )
-    return return_and_correct_aliasing(func, args, kwargs, new_tensor)
-
-
-# This is called during _apply() to see if we can shallow
-# copy the content of one tensor into another. For now,
-# we only allow shallow copy if both tensors are `Float8Tensor`
-# and have the same shape.
-@implements_torch_function(torch._has_compatible_shallow_copy_type)
-def _(func, types, args, kwargs):
-    assert len(args) == 2
-    return (
-        isinstance(args[0], Float8Tensor)
-        and isinstance(args[1], Float8Tensor)
-        and args[0].shape == args[1].shape
-    )
-
-
 @implements(aten.t.default)
 def _(func, types, args, kwargs):
     assert len(args) == 1
diff --git a/torchao/utils.py b/torchao/utils.py
@@ -507,6 +507,36 @@ def _implements_common_tensor_ops(cls):
     implements_torch_function = cls.implements_torch_function
     aten = torch.ops.aten
 
+    @implements(torch.ops.aten.to.dtype_layout)
+    def _(func, types, args, kwargs):
+        # only support kwargs for now
+        assert len(args) == 1
+        self = args[0]
+        # only support dtype, layout, and device for now
+        for k in kwargs.keys():
+            assert k in ["dtype", "layout", "device"]
+        # only support same dtype and layout
+        # different dtype and layout has undefined behavior
+        if "dtype" in kwargs:
+            assert kwargs["dtype"] == self.dtype
+        if "layout" in kwargs:
+            assert kwargs["layout"] == self.layout
+        # if device is the same, treat this like a no-op
+        device = kwargs.get("device")
+        if device == self.device:
+            return self
+        new_tensor = args[0]._apply_fn_to_data(lambda x: func(x, device=device))
+        return return_and_correct_aliasing(func, args, kwargs, new_tensor)
+
+    # This is called during _apply() to see if we can shallow
+    # copy the content of one tensor into another. For now,
+    # we only allow shallow copy if both tensors are of the
+    # same type and have the same shape.
+    @implements_torch_function(torch._has_compatible_shallow_copy_type)
+    def _(func, types, args, kwargs):
+        assert len(args) == 2
+        return type(args[0]) == type(args[1]) and args[0].shape == args[1].shape
+
     @implements_torch_function(
         [
             torch.Tensor.contiguous,