neuralmagic · bfineran · Apr 18, 2024 · Apr 19, 2024 · Apr 19, 2024 · Apr 19, 2024
diff --git a/src/compressed_tensors/quantization/observers/min_max.py b/src/compressed_tensors/quantization/observers/min_max.py
@@ -43,6 +43,37 @@ def calculate_qparams(self, observed: Tensor) -> Tuple[FloatTensor, IntTensor]:
         :param observed: observed tensor to calculate quantization parameters for
         :return: tuple of scale and zero point derived from the observed tensor
         """
+        # TODO: Add support for full range of quantization Args, only supports 8bit
+        #       per tensor
+
+        # channel wise quantization -- group_size == -1
+        if self.quantization_args.group_size == -1:
+
+            reduce_dims = [1]  # everything thats not zero
+
+            min_vals = observed.amin(dim=reduce_dims, keepdim=True)
+            max_vals = observed.amax(dim=reduce_dims, keepdim=True)
+
+            # update running average
+            if self.counter > 0:
+               # self.min_vals = (self.min_vals * self.counter + min_vals) / (
+                #     self.counter + 1
+                # )
+                # self.max_vals = (self.max_vals * self.counter + max_vals) / (
+                #     self.counter + 1
+                # )
+                self.min_vals = torch.min(min_vals, self.min_vals)
+                self.max_vals = torch.max(max_val, self.max_vals)
+            else:
+                self.min_vals = min_vals
+                self.max_vals = max_vals
+
+            self.counter += 1
+
+            return calculate_qparams(min_vals, max_vals, self.quantization_args)
+
+        # regular quantization
+        # TODO: group size quantization
 
         min_val = torch.tensor([observed.min()])
         max_val = torch.tensor([observed.max()])
@@ -55,9 +86,10 @@ def calculate_qparams(self, observed: Tensor) -> Tuple[FloatTensor, IntTensor]:
             self.min_val = min_val
             self.max_val = max_val
 
+        self.counter += 1
+
         # ensure that the zeros are in the range
         min_val = torch.min(self.min_val, torch.zeros_like(self.min_val))
         max_val = torch.max(self.max_val, torch.zeros_like(self.max_val))
 
-        self.counter += 1
         return calculate_qparams(min_val, max_val, self.quantization_args)