From 02abc9b8c9e16fe1cd251babc9925aee2bf494e5 Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Tue, 2 Jul 2024 20:11:05 +0000
Subject: [PATCH 01/15] actorder

---
 .../quantization/gptq/utils/gptq_wrapper.py   | 45 ++++++++++++++++++-
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index db2afc64a..460d4457a 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -109,6 +109,9 @@ def compress(
         self.H = torch.linalg.cholesky(self.H, upper=True)
         Hinv = self.H
 
+        actorder = False
+        invperm = False
+
         # See section 3.4 of https://arxiv.org/abs/2203.07259
         for i1 in range(0, self.columns, blocksize):
             i2 = min(i1 + blocksize, self.columns)
@@ -140,14 +143,38 @@ def compress(
                     q = torch.dequantize(q)
                 elif hasattr(self.layer, "quantization_scheme"):
                     quant_scheme = self.layer.quantization_scheme
+                    actorder = quant_scheme.weights.actorder
+
                     if quant_scheme.weights is not None:
-                        scale = self.layer.weight_scale
-                        zero_point = self.layer.weight_zero_point
                         from compressed_tensors.quantization import QuantizationStrategy
                         from compressed_tensors.quantization.lifecycle.forward import (
                             fake_quantize,
                         )
 
+                        scale = self.layer.weight_scale
+                        zero_point = self.layer.weight_zero_point
+
+                        if actorder:
+                            perm = torch.argsort(torch.diag(self.H), descending=True)
+                            W = W[:, perm]
+                            self.H = self.H[perm][:, perm]
+                            invperm = torch.argsort(perm)
+
+                        group_size = quant_scheme.weights.group_size
+                        if group_size is None or group_size == -1:
+                            group_size = self.layer.weight.shape[1]
+
+                        if actorder:
+                            indices = torch.arange(self.columns, device=invperm.device)
+                            g_idx = (perm[indices] // group_size).to(dtype=torch.int32)
+                            g_idx = g_idx[invperm]
+                            self.layer.weight_g_idx.data = g_idx
+                        else:
+                            indices = torch.arange(
+                                self.columns, device=W.device, dtype=torch.int32
+                            )
+                            g_idx = indices // group_size
+
                         strategy = quant_scheme.weights.strategy
 
                         if strategy == QuantizationStrategy.TENSOR:
@@ -176,6 +203,17 @@ def compress(
                             # ends up being a channelwise application
                             altered_qargs = copy(quant_scheme.weights)
                             altered_qargs.strategy = QuantizationStrategy.CHANNEL
+
+                            # apply g_idx
+                            if g_idx is not None:
+                                # scale and zp already transformed by group_size
+                                # extract first index of group_idze
+                                indices_to_extract = torch.arange(
+                                    0, g_idx.shape[0], group_size
+                                )
+                                scale = scale[:, g_idx[indices_to_extract]]
+                                zero_point = zero_point[:, g_idx[indices_to_extract]]
+
                             q = fake_quantize(
                                 q,
                                 scale[:, input_dim_group],
@@ -206,6 +244,9 @@ def compress(
         logger.info("time %.2f" % (time.time() - tick))
         logger.info("error %.2f" % torch.sum(Losses).item())
 
+        if actorder:
+            W = W[:, invperm]
+
         if isinstance(self.layer, transformers.Conv1D):
             W = W.t()
         W = W.reshape(final_shape).to(final_dtype)

From 3e7b87539bb7672b848252d202fa6033814ec22d Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Wed, 10 Jul 2024 17:27:10 +0000
Subject: [PATCH 02/15] g_idx fix

---
 .../quantization/gptq/utils/gptq_wrapper.py   | 44 +++++++++----------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index 460d4457a..447849946 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -109,8 +109,23 @@ def compress(
         self.H = torch.linalg.cholesky(self.H, upper=True)
         Hinv = self.H
 
-        actorder = False
-        invperm = False
+        g_idx = None
+        if hasattr(self.layer, "quantization_scheme"):
+            quant_scheme = self.layer.quantization_scheme
+            actorder = quant_scheme.weights.actorder
+            group_size = quant_scheme.weights.group_size
+
+            if actorder:
+                perm = torch.argsort(torch.diag(self.H), descending=True)
+                W = W[:, perm]
+                self.H = self.H[perm][:, perm]
+                invperm = torch.argsort(perm)
+
+                g_idx = torch.Tensor([i // group_size for i in range(self.columns)]).to(
+                    device=invperm.device
+                )
+                g_idx = g_idx[invperm]
+                self.layer.weight_g_idx.data = g_idx
 
         # See section 3.4 of https://arxiv.org/abs/2203.07259
         for i1 in range(0, self.columns, blocksize):
@@ -143,7 +158,6 @@ def compress(
                     q = torch.dequantize(q)
                 elif hasattr(self.layer, "quantization_scheme"):
                     quant_scheme = self.layer.quantization_scheme
-                    actorder = quant_scheme.weights.actorder
 
                     if quant_scheme.weights is not None:
                         from compressed_tensors.quantization import QuantizationStrategy
@@ -154,27 +168,10 @@ def compress(
                         scale = self.layer.weight_scale
                         zero_point = self.layer.weight_zero_point
 
-                        if actorder:
-                            perm = torch.argsort(torch.diag(self.H), descending=True)
-                            W = W[:, perm]
-                            self.H = self.H[perm][:, perm]
-                            invperm = torch.argsort(perm)
-
                         group_size = quant_scheme.weights.group_size
                         if group_size is None or group_size == -1:
                             group_size = self.layer.weight.shape[1]
 
-                        if actorder:
-                            indices = torch.arange(self.columns, device=invperm.device)
-                            g_idx = (perm[indices] // group_size).to(dtype=torch.int32)
-                            g_idx = g_idx[invperm]
-                            self.layer.weight_g_idx.data = g_idx
-                        else:
-                            indices = torch.arange(
-                                self.columns, device=W.device, dtype=torch.int32
-                            )
-                            g_idx = indices // group_size
-
                         strategy = quant_scheme.weights.strategy
 
                         if strategy == QuantizationStrategy.TENSOR:
@@ -207,12 +204,13 @@ def compress(
                             # apply g_idx
                             if g_idx is not None:
                                 # scale and zp already transformed by group_size
-                                # extract first index of group_idze
+                                # extract first index of group_size
                                 indices_to_extract = torch.arange(
                                     0, g_idx.shape[0], group_size
                                 )
-                                scale = scale[:, g_idx[indices_to_extract]]
-                                zero_point = zero_point[:, g_idx[indices_to_extract]]
+                                grouped_indicies = g_idx[indices_to_extract].int()
+                                scale = scale[:, grouped_indicies]
+                                zero_point = zero_point[:, grouped_indicies]
 
                             q = fake_quantize(
                                 q,

From 778b5b51eafce34770a7f52546979e93ccce73ae Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Wed, 10 Jul 2024 20:29:57 +0000
Subject: [PATCH 03/15] fix

---
 .../quantization/gptq/utils/gptq_wrapper.py   | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index 447849946..65f13be8d 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -98,24 +98,14 @@ def compress(
         dead = torch.diag(self.H) == 0
         self.H[dead, dead] = 1
         W[:, dead] = 0
-
-        Losses = torch.zeros(self.rows, device=self.dev)
-
-        damp = percdamp * torch.mean(torch.diag(self.H))
-        diag = torch.arange(self.columns, device=self.dev)
-        self.H[diag, diag] += damp
-        self.H = torch.linalg.cholesky(self.H)
-        self.H = torch.cholesky_inverse(self.H)
-        self.H = torch.linalg.cholesky(self.H, upper=True)
-        Hinv = self.H
-
+        
         g_idx = None
         if hasattr(self.layer, "quantization_scheme"):
             quant_scheme = self.layer.quantization_scheme
             actorder = quant_scheme.weights.actorder
-            group_size = quant_scheme.weights.group_size
-
+            
             if actorder:
+                group_size = quant_scheme.weights.group_size
                 perm = torch.argsort(torch.diag(self.H), descending=True)
                 W = W[:, perm]
                 self.H = self.H[perm][:, perm]
@@ -126,6 +116,16 @@ def compress(
                 )
                 g_idx = g_idx[invperm]
                 self.layer.weight_g_idx.data = g_idx
+        
+        Losses = torch.zeros(self.rows, device=self.dev)
+
+        damp = percdamp * torch.mean(torch.diag(self.H))
+        diag = torch.arange(self.columns, device=self.dev)
+        self.H[diag, diag] += damp
+        self.H = torch.linalg.cholesky(self.H)
+        self.H = torch.cholesky_inverse(self.H)
+        self.H = torch.linalg.cholesky(self.H, upper=True)  
+        Hinv = self.H
 
         # See section 3.4 of https://arxiv.org/abs/2203.07259
         for i1 in range(0, self.columns, blocksize):
@@ -209,6 +209,7 @@ def compress(
                                     0, g_idx.shape[0], group_size
                                 )
                                 grouped_indicies = g_idx[indices_to_extract].int()
+
                                 scale = scale[:, grouped_indicies]
                                 zero_point = zero_point[:, grouped_indicies]
 

From bc08e8d1d74d479bb6e545797e01b2db5890dfbd Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Wed, 10 Jul 2024 21:02:25 +0000
Subject: [PATCH 04/15] lint

---
 .../modifiers/quantization/gptq/utils/gptq_wrapper.py     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index 65f13be8d..f7bea26d8 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -98,12 +98,12 @@ def compress(
         dead = torch.diag(self.H) == 0
         self.H[dead, dead] = 1
         W[:, dead] = 0
-        
+
         g_idx = None
         if hasattr(self.layer, "quantization_scheme"):
             quant_scheme = self.layer.quantization_scheme
             actorder = quant_scheme.weights.actorder
-            
+
             if actorder:
                 group_size = quant_scheme.weights.group_size
                 perm = torch.argsort(torch.diag(self.H), descending=True)
@@ -116,7 +116,7 @@ def compress(
                 )
                 g_idx = g_idx[invperm]
                 self.layer.weight_g_idx.data = g_idx
-        
+
         Losses = torch.zeros(self.rows, device=self.dev)
 
         damp = percdamp * torch.mean(torch.diag(self.H))
@@ -124,7 +124,7 @@ def compress(
         self.H[diag, diag] += damp
         self.H = torch.linalg.cholesky(self.H)
         self.H = torch.cholesky_inverse(self.H)
-        self.H = torch.linalg.cholesky(self.H, upper=True)  
+        self.H = torch.linalg.cholesky(self.H, upper=True)
         Hinv = self.H
 
         # See section 3.4 of https://arxiv.org/abs/2203.07259

From f203537e4dd7d1d5a507a1489b4c3ea06cf416fc Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Thu, 11 Jul 2024 20:33:20 +0000
Subject: [PATCH 05/15] propagagte g_idx with perm

---
 .../modifiers/quantization/gptq/utils/gptq_wrapper.py      | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index f7bea26d8..c47969893 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -111,10 +111,9 @@ def compress(
                 self.H = self.H[perm][:, perm]
                 invperm = torch.argsort(perm)
 
-                g_idx = torch.Tensor([i // group_size for i in range(self.columns)]).to(
-                    device=invperm.device
-                )
-                g_idx = g_idx[invperm]
+                g_idx = torch.Tensor(
+                    [perm[i] // group_size for i in range(self.columns)]
+                ).to(device=invperm.device)
                 self.layer.weight_g_idx.data = g_idx
 
         Losses = torch.zeros(self.rows, device=self.dev)

From 9967a4a6256beb2236398cbde727f252a3ef4f8a Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Fri, 12 Jul 2024 18:20:21 +0000
Subject: [PATCH 06/15] scratch

---
 .../quantization/gptq/utils/gptq_wrapper.py   | 54 +++++++++++++------
 1 file changed, 37 insertions(+), 17 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index c47969893..ebd4f2c97 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -100,6 +100,24 @@ def compress(
         W[:, dead] = 0
 
         g_idx = None
+        # if hasattr(self.layer, "quantization_scheme"):
+        #     quant_scheme = self.layer.quantization_scheme
+        #     actorder = quant_scheme.weights.actorder
+
+        #     if actorder:
+        #         group_size = quant_scheme.weights.group_size
+        #         perm = torch.argsort(torch.diag(self.H), descending=True)
+        #         # W = W[:, perm]
+        #         # self.H = self.H[perm][:, perm]
+        #         invperm = torch.argsort(perm)
+
+        #         # g_idx = torch.Tensor(
+        #         #     [perm[i] // group_size for i in range(self.columns)]
+        #         # ).to(device=invperm.device)
+        #         g_idx = torch.Tensor(
+        #             [i // group_size for i in range(self.columns)]
+        #         ).to(device=invperm.device)
+        #         self.layer.weight_g_idx.data = g_idx
         if hasattr(self.layer, "quantization_scheme"):
             quant_scheme = self.layer.quantization_scheme
             actorder = quant_scheme.weights.actorder
@@ -114,8 +132,11 @@ def compress(
                 g_idx = torch.Tensor(
                     [perm[i] // group_size for i in range(self.columns)]
                 ).to(device=invperm.device)
+                # g_idx = torch.Tensor(
+                #     [i // group_size for i in range(self.columns)]
+                # ).to(device=invperm.device)
                 self.layer.weight_g_idx.data = g_idx
-
+        
         Losses = torch.zeros(self.rows, device=self.dev)
 
         damp = percdamp * torch.mean(torch.diag(self.H))
@@ -200,24 +221,22 @@ def compress(
                             altered_qargs = copy(quant_scheme.weights)
                             altered_qargs.strategy = QuantizationStrategy.CHANNEL
 
-                            # apply g_idx
                             if g_idx is not None:
-                                # scale and zp already transformed by group_size
-                                # extract first index of group_size
-                                indices_to_extract = torch.arange(
-                                    0, g_idx.shape[0], group_size
+                                q = fake_quantize(
+                                    q,
+                                    scale[:, int(g_idx[column_idx])],
+                                    zero_point[:, int(g_idx[column_idx])],
+                                    altered_qargs,
+                                )
+                                
+                            else:
+
+                                q = fake_quantize(
+                                    q,
+                                    scale[:, input_dim_group],
+                                    zero_point[:, input_dim_group],
+                                    altered_qargs,
                                 )
-                                grouped_indicies = g_idx[indices_to_extract].int()
-
-                                scale = scale[:, grouped_indicies]
-                                zero_point = zero_point[:, grouped_indicies]
-
-                            q = fake_quantize(
-                                q,
-                                scale[:, input_dim_group],
-                                zero_point[:, input_dim_group],
-                                altered_qargs,
-                            )
 
                 Q1[:, i] = q
                 Losses1[:, i] = (w - q) ** 2 / d**2
@@ -244,6 +263,7 @@ def compress(
 
         if actorder:
             W = W[:, invperm]
+            self.H = self.H[perm][:, perm]
 
         if isinstance(self.layer, transformers.Conv1D):
             W = W.t()

From 52ef6be284b59482e38718d518eacb62ad6926e6 Mon Sep 17 00:00:00 2001
From: Benjamin <ben@neuralmagic.com>
Date: Thu, 18 Jul 2024 12:07:01 -0400
Subject: [PATCH 07/15] GPTQ - move calibration of quantiztion params to after
 hessian calibration

---
 .../modifiers/quantization/gptq/base.py       | 20 +++++++++++++++++--
 .../quantization/gptq/utils/gptq_wrapper.py   |  8 ++++++++
 .../quantization/quantization/base.py         |  7 +++++--
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/base.py b/src/llmcompressor/modifiers/quantization/gptq/base.py
index 2ccfb114a..d9d7959db 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/base.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/base.py
@@ -1,7 +1,12 @@
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 
 import torch
-from compressed_tensors.quantization import QuantizationScheme
+from compressed_tensors.quantization import (
+    QuantizationScheme,
+    disable_quantization,
+    enable_quantization,
+    freeze_module_quantization,
+)
 from loguru import logger
 from pydantic import Field
 from torch.nn import Module
@@ -163,7 +168,9 @@ def on_initialize(self, state: "State", **kwargs) -> bool:
         if not self.initialized_structure_:
             self.on_initialize_structure(state, **kwargs)
         if self.quantization_modifier_:
-            self.quantization_modifier_.initialize(state, **kwargs)
+            self.quantization_modifier_.initialize(
+                state, freeze_quantization=False, **kwargs
+            )
         if not self.quantize:
             raise ValueError("To use the GPTQModifier, quantization must be enabled.")
 
@@ -178,6 +185,7 @@ def on_initialize(self, state: "State", **kwargs) -> bool:
 
         self.initialize_compression(modifiable_model, calibration_dataloader)
         self.apply_compression(calibration_dataloader)
+        state.model.apply(freeze_module_quantization)
 
         return True
 
@@ -250,6 +258,11 @@ def apply_compression(
         logger.info(
             f"Running {class_name} calibration with " f"{len(dataloader)} samples..."
         )
+
+        # quantization scales and zp are already initialized but we do not
+        # want to calibrate wrt to these
+        self.model.apply(disable_quantization)
+
         if not self.sequential_update:
             # in non-sequential mode we run one forward batch for all modules
             run_calibration_forward(self.model, dataloader, mask_padding=True)
@@ -271,6 +284,9 @@ def apply_compression(
             layer_compressor.revert_layer_wrappers()
             torch.cuda.empty_cache()
 
+        # re-enable quantization
+        self.model.apply(enable_quantization)
+
     def _build_quant_modifier(self):
         """
         Build a quantization modifier based on the specified config_groups,
diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index db2afc64a..5bc3f14f3 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -141,6 +141,14 @@ def compress(
                 elif hasattr(self.layer, "quantization_scheme"):
                     quant_scheme = self.layer.quantization_scheme
                     if quant_scheme.weights is not None:
+                        # fetch latest correct scale and ZP relevant for any changes
+                        # such as activation reordering
+                        from compressed_tensors.quantization import (
+                            update_layer_weight_quant_params,
+                        )
+
+                        update_layer_weight_quant_params(self.layer)
+
                         scale = self.layer.weight_scale
                         zero_point = self.layer.weight_zero_point
                         from compressed_tensors.quantization import QuantizationStrategy
diff --git a/src/llmcompressor/modifiers/quantization/quantization/base.py b/src/llmcompressor/modifiers/quantization/quantization/base.py
index 434f6f2d8..b90ec250f 100644
--- a/src/llmcompressor/modifiers/quantization/quantization/base.py
+++ b/src/llmcompressor/modifiers/quantization/quantization/base.py
@@ -61,7 +61,9 @@ def on_initialize_structure(self, state: State, **kwargs):
         self._apply_modifier_to_model(module)
         module.apply(freeze_module_quantization)
 
-    def on_initialize(self, state: State, **kwargs) -> bool:
+    def on_initialize(
+        self, state: State, freeze_quantization: bool = True, **kwargs
+    ) -> bool:
         if self.end and self.end != -1:
             raise ValueError(
                 "end_epoch is disabled for QuantizationModifier and can only be set to"
@@ -80,7 +82,8 @@ def on_initialize(self, state: State, **kwargs) -> bool:
             self._check_token_distribution(
                 module, threshold=kwargs.get("min_tokens_per_module")
             )
-            module.apply(freeze_module_quantization)
+            if freeze_quantization:
+                module.apply(freeze_module_quantization)
 
         return True
 

From ad7803ea2633adf2865f1a7ab9f1bc28656f2d4d Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Mon, 22 Jul 2024 15:33:31 +0000
Subject: [PATCH 08/15] no recompute

---
 .../quantization/gptq/utils/gptq_wrapper.py   | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index 66c59adbc..cc0bec980 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -136,7 +136,7 @@ def compress(
                 #     [i // group_size for i in range(self.columns)]
                 # ).to(device=invperm.device)
                 self.layer.weight_g_idx.data = g_idx
-        
+
         Losses = torch.zeros(self.rows, device=self.dev)
 
         damp = percdamp * torch.mean(torch.diag(self.H))
@@ -161,6 +161,16 @@ def compress(
             if preserve_zeros:
                 W1_nz_mask = W_nz_mask[:, i1:i2]
 
+            if hasattr(self.layer, "quantization_scheme"):
+                quant_scheme = self.layer.quantization_scheme
+                if quant_scheme.weights is not None:
+                    # such as activation reordering
+                    from compressed_tensors.quantization import (
+                        update_layer_weight_quant_params,
+                    )
+
+                    update_layer_weight_quant_params(self.layer, g_idx)
+
             for i in range(count):
                 w = W1[:, i]
                 d = Hinv1[i, i]
@@ -181,11 +191,6 @@ def compress(
 
                     if quant_scheme.weights is not None:
                         # fetch latest correct scale and ZP relevant for any changes
-                        # such as activation reordering
-                        from compressed_tensors.quantization import (
-                            update_layer_weight_quant_params,
-                        )
-                        update_layer_weight_quant_params(self.layer, g_idx)
 
                         scale = self.layer.weight_scale
                         zero_point = self.layer.weight_zero_point
@@ -237,9 +242,8 @@ def compress(
                                     zero_point[:, int(g_idx[column_idx])],
                                     altered_qargs,
                                 )
-                                
-                            else:
 
+                            else:
                                 q = fake_quantize(
                                     q,
                                     scale[:, input_dim_group],
@@ -266,7 +270,7 @@ def compress(
                 W[:, i2:] -= w_err * W_nz_mask[:, i2:]
             else:
                 W[:, i2:] -= w_err
-
+        print("time %.2f" % (time.time() - tick))
         logger.info("time %.2f" % (time.time() - tick))
         logger.info("error %.2f" % torch.sum(Losses).item())
 

From 84e74cec3d59c7ff7d309fc4324dd77a5d26ab3d Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Mon, 22 Jul 2024 16:27:48 +0000
Subject: [PATCH 09/15] clean up

---
 .../quantization/gptq/utils/gptq_wrapper.py   | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index cc0bec980..b147cc87a 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -161,15 +161,9 @@ def compress(
             if preserve_zeros:
                 W1_nz_mask = W_nz_mask[:, i1:i2]
 
+            is_layer_updated_actorder = False
             if hasattr(self.layer, "quantization_scheme"):
                 quant_scheme = self.layer.quantization_scheme
-                if quant_scheme.weights is not None:
-                    # such as activation reordering
-                    from compressed_tensors.quantization import (
-                        update_layer_weight_quant_params,
-                    )
-
-                    update_layer_weight_quant_params(self.layer, g_idx)
 
             for i in range(count):
                 w = W1[:, i]
@@ -191,6 +185,17 @@ def compress(
 
                     if quant_scheme.weights is not None:
                         # fetch latest correct scale and ZP relevant for any changes
+                        if (
+                            quant_scheme.weights is not None
+                            and not is_layer_updated_actorder
+                        ):
+                            # such as activation reordering
+                            from compressed_tensors.quantization import (
+                                update_layer_weight_quant_params,
+                            )
+
+                            update_layer_weight_quant_params(self.layer, g_idx)
+                            is_layer_updated_actorder = True
 
                         scale = self.layer.weight_scale
                         zero_point = self.layer.weight_zero_point

From fed380f014cea8ee99ac3cea78d76869bc2cb629 Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Mon, 22 Jul 2024 16:33:47 +0000
Subject: [PATCH 10/15] remvoe unwanted code

---
 .../quantization/gptq/utils/gptq_wrapper.py   | 22 -------------------
 1 file changed, 22 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index 4e54d0f67..75b9936d5 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -100,24 +100,6 @@ def compress(
         W[:, dead] = 0
 
         g_idx = None
-        # if hasattr(self.layer, "quantization_scheme"):
-        #     quant_scheme = self.layer.quantization_scheme
-        #     actorder = quant_scheme.weights.actorder
-
-        #     if actorder:
-        #         group_size = quant_scheme.weights.group_size
-        #         perm = torch.argsort(torch.diag(self.H), descending=True)
-        #         # W = W[:, perm]
-        #         # self.H = self.H[perm][:, perm]
-        #         invperm = torch.argsort(perm)
-
-        #         # g_idx = torch.Tensor(
-        #         #     [perm[i] // group_size for i in range(self.columns)]
-        #         # ).to(device=invperm.device)
-        #         g_idx = torch.Tensor(
-        #             [i // group_size for i in range(self.columns)]
-        #         ).to(device=invperm.device)
-        #         self.layer.weight_g_idx.data = g_idx
         if hasattr(self.layer, "quantization_scheme"):
             quant_scheme = self.layer.quantization_scheme
             actorder = quant_scheme.weights.actorder
@@ -132,9 +114,6 @@ def compress(
                 g_idx = torch.Tensor(
                     [perm[i] // group_size for i in range(self.columns)]
                 ).to(device=invperm.device)
-                # g_idx = torch.Tensor(
-                #     [i // group_size for i in range(self.columns)]
-                # ).to(device=invperm.device)
                 self.layer.weight_g_idx.data = g_idx
 
         Losses = torch.zeros(self.rows, device=self.dev)
@@ -273,7 +252,6 @@ def compress(
                 W[:, i2:] -= w_err * W_nz_mask[:, i2:]
             else:
                 W[:, i2:] -= w_err
-        print("time %.2f" % (time.time() - tick))
         logger.info("time %.2f" % (time.time() - tick))
         logger.info("error %.2f" % torch.sum(Losses).item())
 

From 51adb464bb61bf34a1937162bee343fc0f0a1461 Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Fri, 26 Jul 2024 16:18:26 +0000
Subject: [PATCH 11/15] rese observer params before inference

---
 .../modifiers/quantization/gptq/utils/gptq_wrapper.py           | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index 75b9936d5..ad47151a7 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -171,6 +171,8 @@ def compress(
                                 update_layer_weight_quant_params,
                             )
 
+                            observer = getattr(self.layer, "weight_observer", None)
+                            observer.reset()
                             update_layer_weight_quant_params(self.layer, g_idx)
                             is_layer_updated_actorder = True
 

From 543cfdd5f382cf83ee5d977370e13c39f21c44c4 Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Sat, 27 Jul 2024 00:37:39 +0000
Subject: [PATCH 12/15] draft

---
 .../quantization/gptq/utils/gptq_wrapper.py   | 70 +++++++++++++++----
 1 file changed, 58 insertions(+), 12 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index ad47151a7..cf4d1eed0 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -99,7 +99,8 @@ def compress(
         self.H[dead, dead] = 1
         W[:, dead] = 0
 
-        g_idx = None
+        g_idx, g_idx_for_perm_weights = None, None
+        actorder = False
         if hasattr(self.layer, "quantization_scheme"):
             quant_scheme = self.layer.quantization_scheme
             actorder = quant_scheme.weights.actorder
@@ -111,9 +112,27 @@ def compress(
                 self.H = self.H[perm][:, perm]
                 invperm = torch.argsort(perm)
 
+                # # g_idx for the shuffled weights
+                # g_idx = torch.Tensor(
+                #     [i // group_size for i in range(self.columns)]
+                # ).to(device=invperm.device)
+                
+                # g_idx for the original weights
                 g_idx = torch.Tensor(
                     [perm[i] // group_size for i in range(self.columns)]
                 ).to(device=invperm.device)
+                
+                # g_idx for the permutated weights
+                g_idx_for_perm_weights = torch.Tensor(
+                    [i // group_size for i in range(self.columns)]
+                ).to(device=invperm.device)
+                
+                
+                # # g_idx for the original weights
+                # g_idx_original = g_idx.clone()
+                # for i in range(g_idx.shape[0]):
+                #     g_idx_original[perm[i]] = torch.Tensor([i // group_size])
+                
                 self.layer.weight_g_idx.data = g_idx
 
         Losses = torch.zeros(self.rows, device=self.dev)
@@ -173,6 +192,8 @@ def compress(
 
                             observer = getattr(self.layer, "weight_observer", None)
                             observer.reset()
+                            
+                            # update self.layer params with respect to g_idx
                             update_layer_weight_quant_params(self.layer, g_idx)
                             is_layer_updated_actorder = True
 
@@ -183,9 +204,6 @@ def compress(
                             fake_quantize,
                         )
 
-                        scale = self.layer.weight_scale
-                        zero_point = self.layer.weight_zero_point
-
                         group_size = quant_scheme.weights.group_size
                         if group_size is None or group_size == -1:
                             group_size = self.layer.weight.shape[1]
@@ -210,24 +228,25 @@ def compress(
                         else:  # strategy == QuantizationStrategy.GROUP
                             # get the group index for the current column
                             column_idx = i1 + i
-                            input_dim_group = (
-                                column_idx // quant_scheme.weights.group_size
-                            )
-
+ 
                             # Since we're only applying quantization to a slice, this
                             # ends up being a channelwise application
                             altered_qargs = copy(quant_scheme.weights)
                             altered_qargs.strategy = QuantizationStrategy.CHANNEL
 
-                            if g_idx is not None:
+                            if actorder:
                                 q = fake_quantize(
                                     q,
-                                    scale[:, int(g_idx[column_idx])],
-                                    zero_point[:, int(g_idx[column_idx])],
+                                    scale[:, int(g_idx_for_perm_weights[column_idx])],
+                                    zero_point[:, int(g_idx_for_perm_weights[column_idx])],
                                     altered_qargs,
                                 )
 
                             else:
+                                input_dim_group = (
+                                    column_idx // quant_scheme.weights.group_size
+                                )
+
                                 q = fake_quantize(
                                     q,
                                     scale[:, input_dim_group],
@@ -259,7 +278,7 @@ def compress(
 
         if actorder:
             W = W[:, invperm]
-            self.H = self.H[perm][:, perm]
+            # self.H = self.H[perm][:, perm]
 
         if isinstance(self.layer, transformers.Conv1D):
             W = W.t()
@@ -276,3 +295,30 @@ def free(self):
         """
         delattr(self, "H")
         super().free()
+
+
+"""
+# w = torch.randn(5,5)
+t = torch.Tensor([0,3,4,2,1])
+group_size = 2
+
+perm = torch.argsort(t) # tensor([0, 4, 3, 1, 2]) # put zero to 0, put 4th index to the second idx, here and so on
+
+g_idx = torch.Tensor([perm[i] // group_size for i in range(5)]).to(device=invperm.device)
+# tensor([0., 2., 1., 0., 1.], device='cuda:0')
+
+g_idx[perm[i]] = i // group_idx for i in column_size
+
+
+
+Make g_idx 
+
+g_idx = torch.Tensor(
+        [perm[i] // group_size for i in range(self.columns)]
+    ).to(device=invperm.device)
+    
+in this format, make sure it runs in compressed tensors and llm-compressor
+"""
+
+
+

From 757adfefef5e1d8097a9d045ba70531cb6a77c87 Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Wed, 31 Jul 2024 20:36:20 +0000
Subject: [PATCH 13/15] draft

---
 .../quantization/gptq/utils/gptq_wrapper.py   | 79 ++++++++++++++-----
 1 file changed, 58 insertions(+), 21 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index cf4d1eed0..bffd82133 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -108,33 +108,42 @@ def compress(
             if actorder:
                 group_size = quant_scheme.weights.group_size
                 perm = torch.argsort(torch.diag(self.H), descending=True)
+                
+                # index of perm is the min of self.H
+                
+                # # to mimick group size
+                # perm = torch.arange(0, self.columns, dtype=torch.int)
+
+
                 W = W[:, perm]
                 self.H = self.H[perm][:, perm]
                 invperm = torch.argsort(perm)
 
-                # # g_idx for the shuffled weights
-                # g_idx = torch.Tensor(
-                #     [i // group_size for i in range(self.columns)]
-                # ).to(device=invperm.device)
-                
+                # breakpoint()
                 # g_idx for the original weights
-                g_idx = torch.Tensor(
-                    [perm[i] // group_size for i in range(self.columns)]
+                g_idx = torch.tensor(
+                    # [perm[i] // group_size for i in range(self.columns)] 
+                    [i // group_size for i in range(self.columns)], #B
+                    dtype=torch.int,
                 ).to(device=invperm.device)
-                
-                # g_idx for the permutated weights
+
+                # g_idx for the permutated weights # A
                 g_idx_for_perm_weights = torch.Tensor(
                     [i // group_size for i in range(self.columns)]
                 ).to(device=invperm.device)
                 
-                
-                # # g_idx for the original weights
-                # g_idx_original = g_idx.clone()
+                # # # g_idx for the original weights
+                # g_idx = g_idx_for_perm_weights.clone()
                 # for i in range(g_idx.shape[0]):
-                #     g_idx_original[perm[i]] = torch.Tensor([i // group_size])
+                #     g_idx[perm[i]] = torch.Tensor([i // group_size])
                 
-                self.layer.weight_g_idx.data = g_idx
+                g_idx = g_idx[invperm] # A, B
+                # print(g_idx)
+                # breakpoint()
+                # breakpoint()
 
+                self.layer.weight_g_idx.data = g_idx
+                
         Losses = torch.zeros(self.rows, device=self.dev)
 
         damp = percdamp * torch.mean(torch.diag(self.H))
@@ -194,11 +203,19 @@ def compress(
                             observer.reset()
                             
                             # update self.layer params with respect to g_idx
-                            update_layer_weight_quant_params(self.layer, g_idx)
+                            # update_layer_weight_quant_params(self.layer, g_idx)
+                            update_layer_weight_quant_params(self.layer,perm=perm, 
+                                                            # g_idx=g_idx,
+                                                            )
+                            
                             is_layer_updated_actorder = True
 
                         scale = self.layer.weight_scale
                         zero_point = self.layer.weight_zero_point
+                        # breakpoint()
+                        scale = scale[:, g_idx]
+                        zero_point=zero_point[:, g_idx]
+                        
                         from compressed_tensors.quantization import QuantizationStrategy
                         from compressed_tensors.quantization.lifecycle.forward import (
                             fake_quantize,
@@ -235,10 +252,22 @@ def compress(
                             altered_qargs.strategy = QuantizationStrategy.CHANNEL
 
                             if actorder:
+                                input_dim_group = (
+                                    column_idx // quant_scheme.weights.group_size
+                                )
+                                # print(column_idx)
                                 q = fake_quantize(
                                     q,
                                     scale[:, int(g_idx_for_perm_weights[column_idx])],
                                     zero_point[:, int(g_idx_for_perm_weights[column_idx])],
+                                    # scale[:, int(g_idx[column_idx])],
+                                    # zero_point[:, int(g_idx[column_idx])],
+                                    # scale[:, int(g_idx[column_idx])],
+                                    # zero_point[:, int(g_idx[column_idx])],
+                                    # scale[:, input_dim_group],
+                                    # zero_point[:, input_dim_group],
+                                    # scale[:, int(invperm[column_idx] // group_size)], #B
+                                    # zero_point[:, int(invperm[column_idx] // group_size)], #B
                                     altered_qargs,
                                 )
 
@@ -298,16 +327,24 @@ def free(self):
 
 
 """
-# w = torch.randn(5,5)
-t = torch.Tensor([0,3,4,2,1])
+import torch
+
 group_size = 2
+n = 6
+# Hessian
+# H = torch.randperm(n) 
+H = torch.Tensor([5, 0, 1, 3, 2, 4])        # tensor([5, 0, 1, 3, 2, 4])
+perm =  torch.argsort(H, descending=True)   # tensor([0, 5, 3, 4, 2, 1])
+invperm = torch.argsort(perm)               # tensor([0, 5, 4, 2, 3, 1])
+
+# w = torch.randperm(n)
+w = torch.Tensor([0, 4, 1, 3, 5, 2])        # tensor([0, 4, 1, 3, 5, 2])
+W = w.clone()[perm]                         # tensor([0, 2, 3, 5, 1, 4])
 
-perm = torch.argsort(t) # tensor([0, 4, 3, 1, 2]) # put zero to 0, put 4th index to the second idx, here and so on
+g_idx = torch.tensor([int(i // group_size) for i in range(n)], dtype=torch.int)
+g_idx = g_idx[invperm]
 
-g_idx = torch.Tensor([perm[i] // group_size for i in range(5)]).to(device=invperm.device)
-# tensor([0., 2., 1., 0., 1.], device='cuda:0')
 
-g_idx[perm[i]] = i // group_idx for i in column_size
 
 
 

From 814a97e84caba6bdc12eaea11ece0d99f73b6100 Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Thu, 1 Aug 2024 00:58:38 +0000
Subject: [PATCH 14/15] draft

---
 .../quantization/gptq/utils/gptq_wrapper.py   | 22 ++++++-------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index bffd82133..a3c55824e 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -122,8 +122,8 @@ def compress(
                 # breakpoint()
                 # g_idx for the original weights
                 g_idx = torch.tensor(
-                    # [perm[i] // group_size for i in range(self.columns)] 
-                    [i // group_size for i in range(self.columns)], #B
+                    [perm[i] // group_size for i in range(self.columns)] ,
+                    # [i // group_size for i in range(self.columns)], #B
                     dtype=torch.int,
                 ).to(device=invperm.device)
 
@@ -137,7 +137,7 @@ def compress(
                 # for i in range(g_idx.shape[0]):
                 #     g_idx[perm[i]] = torch.Tensor([i // group_size])
                 
-                g_idx = g_idx[invperm] # A, B
+                # g_idx = g_idx[invperm] # A, B
                 # print(g_idx)
                 # breakpoint()
                 # breakpoint()
@@ -204,8 +204,9 @@ def compress(
                             
                             # update self.layer params with respect to g_idx
                             # update_layer_weight_quant_params(self.layer, g_idx)
-                            update_layer_weight_quant_params(self.layer,perm=perm, 
-                                                            # g_idx=g_idx,
+                            update_layer_weight_quant_params(self.layer, 
+                                                            #  perm=perm, 
+                                                            g_idx=g_idx,
                                                             )
                             
                             is_layer_updated_actorder = True
@@ -262,6 +263,7 @@ def compress(
                                     zero_point[:, int(g_idx_for_perm_weights[column_idx])],
                                     # scale[:, int(g_idx[column_idx])],
                                     # zero_point[:, int(g_idx[column_idx])],
+                                    
                                     # scale[:, int(g_idx[column_idx])],
                                     # zero_point[:, int(g_idx[column_idx])],
                                     # scale[:, input_dim_group],
@@ -345,16 +347,6 @@ def free(self):
 g_idx = g_idx[invperm]
 
 
-
-
-
-Make g_idx 
-
-g_idx = torch.Tensor(
-        [perm[i] // group_size for i in range(self.columns)]
-    ).to(device=invperm.device)
-    
-in this format, make sure it runs in compressed tensors and llm-compressor
 """
 
 

From c83f977c41392b66d2dd8391f481957d86b49793 Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Tue, 6 Aug 2024 18:34:28 +0000
Subject: [PATCH 15/15] mimic gptq

---
 .../quantization/gptq/utils/gptq_wrapper.py   | 23 +++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
index a3c55824e..bc86e2cea 100644
--- a/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
+++ b/src/llmcompressor/modifiers/quantization/gptq/utils/gptq_wrapper.py
@@ -122,8 +122,8 @@ def compress(
                 # breakpoint()
                 # g_idx for the original weights
                 g_idx = torch.tensor(
-                    [perm[i] // group_size for i in range(self.columns)] ,
-                    # [i // group_size for i in range(self.columns)], #B
+                    # [perm[i] // group_size for i in range(self.columns)] ,
+                    [i // group_size for i in range(self.columns)], #B
                     dtype=torch.int,
                 ).to(device=invperm.device)
 
@@ -142,8 +142,11 @@ def compress(
                 # breakpoint()
                 # breakpoint()
 
+                # self.layer.weight_g_idx.data = g_idx
+                g_idx = g_idx[invperm]
                 self.layer.weight_g_idx.data = g_idx
                 
+                
         Losses = torch.zeros(self.rows, device=self.dev)
 
         damp = percdamp * torch.mean(torch.diag(self.H))
@@ -205,8 +208,8 @@ def compress(
                             # update self.layer params with respect to g_idx
                             # update_layer_weight_quant_params(self.layer, g_idx)
                             update_layer_weight_quant_params(self.layer, 
-                                                            #  perm=perm, 
-                                                            g_idx=g_idx,
+                                                             perm=perm, 
+                                                            # g_idx=g_idx,
                                                             )
                             
                             is_layer_updated_actorder = True
@@ -214,8 +217,8 @@ def compress(
                         scale = self.layer.weight_scale
                         zero_point = self.layer.weight_zero_point
                         # breakpoint()
-                        scale = scale[:, g_idx]
-                        zero_point=zero_point[:, g_idx]
+                        # scale = scale[:, g_idx]
+                        # zero_point=zero_point[:, g_idx]
                         
                         from compressed_tensors.quantization import QuantizationStrategy
                         from compressed_tensors.quantization.lifecycle.forward import (
@@ -261,8 +264,10 @@ def compress(
                                     q,
                                     scale[:, int(g_idx_for_perm_weights[column_idx])],
                                     zero_point[:, int(g_idx_for_perm_weights[column_idx])],
-                                    # scale[:, int(g_idx[column_idx])],
-                                    # zero_point[:, int(g_idx[column_idx])],
+                                    # scale[:, int(perm[column_idx] // group_size)],
+                                    # zero_point[:, int(perm[column_idx] // group_size)],
+                                    # scale[:, int(perm[column_idx] // group_size)],
+                                    # zero_point[:, int(perm[column_idx] // group_size)],
                                     
                                     # scale[:, int(g_idx[column_idx])],
                                     # zero_point[:, int(g_idx[column_idx])],
@@ -344,7 +349,7 @@ def free(self):
 W = w.clone()[perm]                         # tensor([0, 2, 3, 5, 1, 4])
 
 g_idx = torch.tensor([int(i // group_size) for i in range(n)], dtype=torch.int)
-g_idx = g_idx[invperm]
+g_idx = g_idx[invperm]                      # tensor([0, 2, 2, 1, 1, 0]
 
 
 """