Remove unused attributes in Float8Tensor (#2935)

jerryzh168 · web-flow · commit b34c10379a3a · 2025-09-04T10:25:53.000-07:00
Removing unused attributes in Float8Tensor

Summary:
att, hp_value_lb and hp_value_ub for weight are only used when calculating scale for the float8 tensor, doesn't have to be stored
in the tensor itself. This PR removes it.

We also have BC testing to make sure the change does not break BC.

Test Plan:
Regression tests:
python test/integration/test_load_and_run_checkpoint.py

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/torchao/quantization/quantize_/workflows/float8/float8_tensor.py b/torchao/quantization/quantize_/workflows/float8/float8_tensor.py
@@ -85,8 +85,6 @@ class Float8Tensor(TorchAOBaseTensor):
         sharing the same set of quantization parameters (scale), have the same rank as qdata or
         is an empty list (representing per tensor quantization)
         mm_config (Float8MMConfig): Configuration for the matrix multiplication. Default uses fast accumulation.
-        hp_value_lb (Optional[float]): the lower bound for high precision floating point value for calculating scale
-        hp_value_ub (Optional[float]): the upper bound for high precision floating point value for calculating scale
         act_quant_kwargs (QuantizeTensorToFloat8Kwargs): the kwargs for Float8Tensor.from_hp
         kernel_preference (KernelPreference): the preference for quantize, mm etc. kernel to use,
         by default, this will be chosen for user based on hardware, library availabilities etc.
@@ -98,8 +96,6 @@ class Float8Tensor(TorchAOBaseTensor):
     optional_tensor_attribute_names = [
         "block_size",
         "mm_config",
-        "hp_value_lb",
-        "hp_value_ub",
         "act_quant_kwargs",
         "kernel_preference",
         "dtype",
@@ -111,8 +107,6 @@ def __new__(
         scale: torch.Tensor,
         block_size: Optional[List[int]] = None,
         mm_config: Optional[Float8MMConfig] = None,
-        hp_value_lb: Optional[float] = None,
-        hp_value_ub: Optional[float] = None,
         act_quant_kwargs: Optional[QuantizeTensorToFloat8Kwargs] = None,
         kernel_preference: KernelPreference = KernelPreference.AUTO,
         dtype: Optional[torch.dtype] = None,
@@ -130,8 +124,6 @@ def __init__(
         scale: torch.Tensor,
         block_size: Optional[List[int]] = None,
         mm_config: Optional[Float8MMConfig] = None,
-        hp_value_lb: Optional[float] = None,
-        hp_value_ub: Optional[float] = None,
         act_quant_kwargs: Optional[QuantizeTensorToFloat8Kwargs] = None,
         kernel_preference: KernelPreference = KernelPreference.AUTO,
         dtype: Optional[torch.dtype] = None,
@@ -141,8 +133,6 @@ def __init__(
         self.scale = scale
         self.block_size = block_size
         self.mm_config = mm_config
-        self.hp_value_lb = hp_value_lb
-        self.hp_value_ub = hp_value_ub
         self.act_quant_kwargs = act_quant_kwargs
         self.kernel_preference = kernel_preference
 
@@ -248,8 +238,6 @@ def from_hp(
             scale,
             block_size=block_size,
             mm_config=mm_config,
-            hp_value_lb=hp_value_lb,
-            hp_value_ub=hp_value_ub,
             act_quant_kwargs=act_quant_kwargs,
             kernel_preference=kernel_preference,
             dtype=hp_dtype,
@@ -472,8 +460,6 @@ def _(func, types, args, kwargs):
             sliced_scale,
             block_size,
             self.mm_config,
-            self.hp_value_lb,
-            self.hp_value_ub,
             self.act_quant_kwargs,
             self.kernel_preference,
             dtype=self.dtype,
@@ -503,8 +489,6 @@ def _(func, types, args, kwargs):
         assert tensor_0.scale.ndim == tensors[i].scale.ndim
         assert tensor_0.block_size == tensors[i].block_size
         assert tensor_0.mm_config == tensors[i].mm_config
-        assert tensor_0.hp_value_lb == tensors[i].hp_value_lb
-        assert tensor_0.hp_value_ub == tensors[i].hp_value_ub
         assert tensor_0.act_quant_kwargs == tensors[i].act_quant_kwargs
         assert tensor_0.kernel_preference == tensors[i].kernel_preference
 
@@ -528,8 +512,6 @@ def _(func, types, args, kwargs):
         cat_scale,
         block_size,
         tensor_0.mm_config,
-        tensor_0.hp_value_lb,
-        tensor_0.hp_value_ub,
         tensor_0.act_quant_kwargs,
         tensor_0.kernel_preference,
         tensor_0.dtype,
@@ -551,8 +533,6 @@ def _(func, types, args, kwargs):
         scale,
         block_size,
         self.mm_config,
-        self.hp_value_lb,
-        self.hp_value_ub,
         self.act_quant_kwargs,
         self.kernel_preference,
         self.dtype,
@@ -603,8 +583,6 @@ def _(func, types, args, kwargs):
         scale,
         block_size,
         self.mm_config,
-        self.hp_value_lb,
-        self.hp_value_ub,
         self.act_quant_kwargs,
         self.kernel_preference,
         self.dtype,
@@ -627,8 +605,6 @@ def _(func, types, args, kwargs):
         scale,
         block_size,
         self.mm_config,
-        self.hp_value_lb,
-        self.hp_value_ub,
         self.act_quant_kwargs,
         self.kernel_preference,
         self.dtype,