neuralmagic · bfineran · Jun 19, 2024 · Jun 18, 2024 · Jun 18, 2024
diff --git a/src/compressed_tensors/quantization/lifecycle/forward.py b/src/compressed_tensors/quantization/lifecycle/forward.py
@@ -93,6 +93,14 @@ def dequantize(
     :param args: quantization args used to quantize x_q
     :return: dequantized float tensor
     """
+    # ensure all tensors are on the same device
+    # assumes that the target device is the input
+    # tensor's device
+    if x_q.device != scale.device:
+        scale = scale.to(x_q.device)
+    if x_q.device != zero_point.device:
+        zero_point = zero_point.to(x_q.device)
+
     if args is None:
         if scale.ndim == 0 or scale.ndim == 1:
             args = QuantizationArgs(strategy=QuantizationStrategy.TENSOR)