SpinQuant rotate bias (#2913)

rohansjoshi · web-flow · commit 71bfccb23404 · 2025-09-02T09:12:21.000-07:00
Summary: Added bias rotation. This is needed to apply SpinQuant R2 to models which have bias such as Qwen models.

Differential Revision: D81352249
diff --git a/torchao/prototype/spinquant/hadamard_utils.py b/torchao/prototype/spinquant/hadamard_utils.py
@@ -237,16 +237,24 @@ def apply_exact_had_to_linear(module, had_dim=-1, output=False, R2=None):
         assert is_pow2(had_dim), "Hadamard dimension must be a power of 2!"
 
     W = module.weight.data
+    if module.bias is not None:
+        B = module.bias.data
+        bias_dtype_orig = B.dtype
+        B = B.float()
     dtype_orig = W.dtype
     W = W.float()
 
     if had_dim == -1:
         if output:
             had_K, K = get_hadK(out_features)
             W = matmul_hadU(W.t(), had_K.to(W.device), K).t()
+            if module.bias is not None:
+                B = matmul_hadU(B, had_K.to(B.device), K)
         else:
             had_K, K = get_hadK(in_features)
             W = matmul_hadU(W, had_K.to(W.device), K)
+            if module.bias is not None:
+                B = matmul_hadU(B, had_K.to(B.device), K)
     else:
         if R2 is not None:
             hadK = R2.to(torch.float64)
@@ -260,8 +268,15 @@ def apply_exact_had_to_linear(module, had_dim=-1, output=False, R2=None):
         temp = W.reshape(-1, shape[-1] // had_dim, had_dim)
         temp = temp.to(torch.float64) @ hadK
         W = temp.reshape(shape)
+        if module.bias is not None:
+            shape = B.shape
+            temp = B.reshape(-1, had_dim)
+            temp = temp.to(torch.float64) @ hadK
+            B = temp.reshape(shape)
 
         if output:
             W = W.t()
 
     module.weight.data = W.to(dtype=dtype_orig)
+    if module.bias is not None:
+        module.bias.data = B.to(dtype=bias_dtype_orig)