From dd164997c385e77d65fe98e9eec48a4c801f9b3e Mon Sep 17 00:00:00 2001
From: Rahul Tuli <rahul@neuralmagic.com>
Date: Thu, 26 Sep 2024 13:45:29 +0000
Subject: [PATCH] Update folder structure Move tests

Remove unused import
---
 .../compressors/__init__.py                   |  18 +--
 src/compressed_tensors/compressors/base.py    |   6 +-
 src/compressed_tensors/compressors/helpers.py |   2 +-
 .../compressors/model_compressors/__init__.py |  17 ++
 .../model_compressor.py                       |   0
 .../quantized_compressors/__init__.py         |  18 +++
 .../compressors/quantized_compressors/base.py | 146 ++++++++++++++++++
 .../naive_quantized.py                        |  10 +-
 .../pack_quantized.py                         |   2 +-
 .../sparse_compressors/__init__.py            |  18 +++
 .../compressors/sparse_compressors/base.py    | 110 +++++++++++++
 .../{ => sparse_compressors}/dense.py         |   0
 .../sparse_bitmask.py                         |   2 +-
 .../sparse_quantized_compressors/__init__.py  |  16 ++
 .../marlin_24.py                              |   0
 .../model_compressors/__init__.py             |  13 ++
 .../test_model_compressor.py                  |   0
 .../quantized_compressors/__init__.py         |  13 ++
 .../test_fp8_quant.py                         |   0
 .../test_int_quant.py                         |   0
 .../test_pack_quant.py                        |   2 +-
 .../sparse_compressors/__init__.py            |  13 ++
 .../{ => sparse_compressors}/test_bitmask.py  |   0
 .../sparse_quantized_compressors/__init__.py  |  13 ++
 .../test_marlin_24.py                         |   0
 25 files changed, 395 insertions(+), 24 deletions(-)
 create mode 100644 src/compressed_tensors/compressors/model_compressors/__init__.py
 rename src/compressed_tensors/compressors/{ => model_compressors}/model_compressor.py (100%)
 create mode 100644 src/compressed_tensors/compressors/quantized_compressors/__init__.py
 create mode 100644 src/compressed_tensors/compressors/quantized_compressors/base.py
 rename src/compressed_tensors/compressors/{ => quantized_compressors}/naive_quantized.py (93%)
 rename src/compressed_tensors/compressors/{ => quantized_compressors}/pack_quantized.py (99%)
 create mode 100644 src/compressed_tensors/compressors/sparse_compressors/__init__.py
 create mode 100644 src/compressed_tensors/compressors/sparse_compressors/base.py
 rename src/compressed_tensors/compressors/{ => sparse_compressors}/dense.py (100%)
 rename src/compressed_tensors/compressors/{ => sparse_compressors}/sparse_bitmask.py (98%)
 create mode 100644 src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py
 rename src/compressed_tensors/compressors/{ => sparse_quantized_compressors}/marlin_24.py (100%)
 create mode 100644 tests/test_compressors/model_compressors/__init__.py
 rename tests/test_compressors/{ => model_compressors}/test_model_compressor.py (100%)
 create mode 100644 tests/test_compressors/quantized_compressors/__init__.py
 rename tests/test_compressors/{ => quantized_compressors}/test_fp8_quant.py (100%)
 rename tests/test_compressors/{ => quantized_compressors}/test_int_quant.py (100%)
 rename tests/test_compressors/{ => quantized_compressors}/test_pack_quant.py (99%)
 create mode 100644 tests/test_compressors/sparse_compressors/__init__.py
 rename tests/test_compressors/{ => sparse_compressors}/test_bitmask.py (100%)
 create mode 100644 tests/test_compressors/sparse_quantized_compressors/__init__.py
 rename tests/test_compressors/{ => sparse_quantized_compressors}/test_marlin_24.py (100%)

diff --git a/src/compressed_tensors/compressors/__init__.py b/src/compressed_tensors/compressors/__init__.py
index 21b20589..138e3899 100644
--- a/src/compressed_tensors/compressors/__init__.py
+++ b/src/compressed_tensors/compressors/__init__.py
@@ -14,15 +14,9 @@
 
 # flake8: noqa
 
-from .base import BaseCompressor
-from .dense import DenseCompressor
-from .helpers import load_compressed, save_compressed, save_compressed_model
-from .marlin_24 import Marlin24Compressor
-from .model_compressor import ModelCompressor, map_modules_to_quant_args
-from .naive_quantized import (
-    FloatQuantizationCompressor,
-    IntQuantizationCompressor,
-    QuantizationCompressor,
-)
-from .pack_quantized import PackedQuantizationCompressor
-from .sparse_bitmask import BitmaskCompressor, BitmaskTensor
+from .base import *
+from .helpers import *
+from .model_compressors import *
+from .quantized_compressors import *
+from .sparse_compressors import *
+from .sparse_quantized_compressors import *
diff --git a/src/compressed_tensors/compressors/base.py b/src/compressed_tensors/compressors/base.py
index f63cab37..ee751053 100644
--- a/src/compressed_tensors/compressors/base.py
+++ b/src/compressed_tensors/compressors/base.py
@@ -37,18 +37,18 @@ class BaseCompressor(RegistryMixin, ABC):
     Model Load Lifecycle (run_compressed=False):
         - ModelCompressor.decompress()
             - apply_quantization_config()
-            - Compressor.decompress()
+            - BaseCompressor.decompress()
 
     Model Save Lifecycle:
         - ModelCompressor.compress()
-            - Compressor.compress()
+            - BaseCompressor.compress()
 
 
     Module Lifecycle (run_compressed=True):
         - apply_quantization_config()
         - compressed_module = CompressedLinear(module)
             - initialize_module_for_quantization()
-            - Compressor.compression_param_info()
+            - BaseCompressor.compression_param_info()
             - register_parameters()
         - compressed_module.forward()
             -compressed_module.decompress()
diff --git a/src/compressed_tensors/compressors/helpers.py b/src/compressed_tensors/compressors/helpers.py
index 2753621b..7b03a9a1 100644
--- a/src/compressed_tensors/compressors/helpers.py
+++ b/src/compressed_tensors/compressors/helpers.py
@@ -16,7 +16,7 @@
 from typing import Dict, Generator, Optional, Tuple, Union
 
 import torch
-from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.compressors import BaseCompressor
 from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
 from compressed_tensors.utils.safetensors_load import get_weight_mappings
 from safetensors import safe_open
diff --git a/src/compressed_tensors/compressors/model_compressors/__init__.py b/src/compressed_tensors/compressors/model_compressors/__init__.py
new file mode 100644
index 00000000..d9cfa852
--- /dev/null
+++ b/src/compressed_tensors/compressors/model_compressors/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+
+
+from .model_compressor import *
diff --git a/src/compressed_tensors/compressors/model_compressor.py b/src/compressed_tensors/compressors/model_compressors/model_compressor.py
similarity index 100%
rename from src/compressed_tensors/compressors/model_compressor.py
rename to src/compressed_tensors/compressors/model_compressors/model_compressor.py
diff --git a/src/compressed_tensors/compressors/quantized_compressors/__init__.py b/src/compressed_tensors/compressors/quantized_compressors/__init__.py
new file mode 100644
index 00000000..51e8b8e2
--- /dev/null
+++ b/src/compressed_tensors/compressors/quantized_compressors/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+
+from .base import *
+from .naive_quantized import *
+from .pack_quantized import *
diff --git a/src/compressed_tensors/compressors/quantized_compressors/base.py b/src/compressed_tensors/compressors/quantized_compressors/base.py
new file mode 100644
index 00000000..67065aa4
--- /dev/null
+++ b/src/compressed_tensors/compressors/quantized_compressors/base.py
@@ -0,0 +1,146 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Dict, Generator, Tuple
+
+import torch
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.quantization import QuantizationArgs
+from compressed_tensors.utils import get_nested_weight_mappings, merge_names
+from safetensors import safe_open
+from torch import Tensor
+from tqdm import tqdm
+
+
+_LOGGER: logging.Logger = logging.getLogger(__name__)
+
+__all__ = ["BaseQuantizationCompressor"]
+
+
+class BaseQuantizationCompressor(BaseCompressor):
+    """
+    Base class representing a quant compression algorithm. Each child class should
+    implement compression_param_info, compress_weight and decompress_weight.
+
+    Compressors support compressing/decompressing a full module state dict or a single
+    quantized PyTorch leaf module.
+
+    Model Load Lifecycle (run_compressed=False):
+        - ModelCompressor.decompress()
+            - apply_quantization_config()
+            - BaseQuantiazationCompressor.decompress()
+                - BaseQuantizationCompressor.decompress_weight()
+
+    Model Save Lifecycle:
+        - ModelCompressor.compress()
+            - BaseQuantizationCompressor.compress()
+                - BaseQuantizationCompressor.compress_weight()
+
+    Module Lifecycle (run_compressed=True):
+        - apply_quantization_config()
+        - compressed_module = CompressedLinear(module)
+            - initialize_module_for_quantization()
+            - BaseQuantizationCompressor.compression_param_info()
+            - register_parameters()
+        - compressed_module.forward()
+            -compressed_module.decompress()
+
+
+    :param config: config specifying compression parameters
+    """
+
+    def compress(
+        self,
+        model_state: Dict[str, Tensor],
+        names_to_scheme: Dict[str, QuantizationArgs],
+        **kwargs,
+    ) -> Dict[str, Tensor]:
+        """
+        Compresses a dense state dict
+
+        :param model_state: state dict of uncompressed model
+        :param names_to_scheme: quantization args for each quantized weight, needed for
+            quantize function to calculate bit depth
+        :return: compressed state dict
+        """
+        compressed_dict = {}
+        weight_suffix = ".weight"
+        _LOGGER.debug(
+            f"Compressing model with {len(model_state)} parameterized layers..."
+        )
+
+        for name, value in tqdm(model_state.items(), desc="Quantized Compression"):
+            if name.endswith(weight_suffix):
+                prefix = name[: -(len(weight_suffix))]
+                scale = model_state.get(merge_names(prefix, "weight_scale"), None)
+                zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
+                g_idx = model_state.get(merge_names(prefix, "weight_g_idx"), None)
+                if scale is not None:
+                    # weight is quantized, compress it
+                    quant_args = names_to_scheme[prefix]
+                    compressed_data = self.compress_weight(
+                        weight=value,
+                        scale=scale,
+                        zero_point=zp,
+                        g_idx=g_idx,
+                        quantization_args=quant_args,
+                        device="cpu",
+                    )
+                    for key, value in compressed_data.items():
+                        compressed_dict[merge_names(prefix, key)] = value
+                else:
+                    compressed_dict[name] = value.to("cpu")
+            elif name.endswith("zero_point") and torch.all(value == 0):
+                continue
+            elif name.endswith("g_idx") and torch.any(value <= -1):
+                continue
+            else:
+                compressed_dict[name] = value.to("cpu")
+
+        return compressed_dict
+
+    def decompress(
+        self,
+        path_to_model_or_tensors: str,
+        names_to_scheme: Dict[str, QuantizationArgs],
+        device: str = "cpu",
+    ) -> Generator[Tuple[str, Tensor], None, None]:
+        """
+        Reads a compressed state dict located at path_to_model_or_tensors
+        and returns a generator for sequentially decompressing back to a
+        dense state dict
+
+        :param path_to_model_or_tensors: path to compressed safetensors model (directory
+            with one or more safetensors files) or compressed tensors file
+        :param names_to_scheme: quantization args for each quantized weight
+        :param device: optional device to load intermediate weights into
+        :return: compressed state dict
+        """
+        weight_mappings = get_nested_weight_mappings(
+            path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
+        )
+        for weight_name in weight_mappings.keys():
+            weight_data = {}
+            for param_name, safe_path in weight_mappings[weight_name].items():
+                full_name = merge_names(weight_name, param_name)
+                with safe_open(safe_path, framework="pt", device=device) as f:
+                    weight_data[param_name] = f.get_tensor(full_name)
+
+            if "weight_scale" in weight_data:
+                quant_args = names_to_scheme[weight_name]
+                decompressed = self.decompress_weight(
+                    compressed_data=weight_data, quantization_args=quant_args
+                )
+                yield merge_names(weight_name, "weight"), decompressed
diff --git a/src/compressed_tensors/compressors/naive_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py
similarity index 93%
rename from src/compressed_tensors/compressors/naive_quantized.py
rename to src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py
index acc09932..0267aca4 100644
--- a/src/compressed_tensors/compressors/naive_quantized.py
+++ b/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py
@@ -16,7 +16,7 @@
 
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
-from compressed_tensors.compressors.base_quantization_compressor import (
+from compressed_tensors.compressors.quantized_compressors.base import (
     BaseQuantizationCompressor,
 )
 from compressed_tensors.config import CompressionFormat
@@ -27,14 +27,14 @@
 
 
 __all__ = [
-    "QuantizationCompressor",
+    "NaiveQuantizationCompressor",
     "IntQuantizationCompressor",
     "FloatQuantizationCompressor",
 ]
 
 
 @BaseCompressor.register(name=CompressionFormat.naive_quantized.value)
-class QuantizationCompressor(BaseQuantizationCompressor):
+class NaiveQuantizationCompressor(BaseQuantizationCompressor):
     """
     Implements naive compression for quantized models. Weight of each
     quantized layer is converted from its original float type to the closest Pytorch
@@ -123,7 +123,7 @@ def decompress_weight(
 
 
 @BaseCompressor.register(name=CompressionFormat.int_quantized.value)
-class IntQuantizationCompressor(QuantizationCompressor):
+class IntQuantizationCompressor(NaiveQuantizationCompressor):
     """
     Alias for integer quantized models
     """
@@ -132,7 +132,7 @@ class IntQuantizationCompressor(QuantizationCompressor):
 
 
 @BaseCompressor.register(name=CompressionFormat.float_quantized.value)
-class FloatQuantizationCompressor(QuantizationCompressor):
+class FloatQuantizationCompressor(NaiveQuantizationCompressor):
     """
     Alias for fp quantized models
     """
diff --git a/src/compressed_tensors/compressors/pack_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py
similarity index 99%
rename from src/compressed_tensors/compressors/pack_quantized.py
rename to src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py
index 9d63e264..ce9f0a57 100644
--- a/src/compressed_tensors/compressors/pack_quantized.py
+++ b/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py
@@ -17,7 +17,7 @@
 import numpy as np
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
-from compressed_tensors.compressors.base_quantization_compressor import (
+from compressed_tensors.compressors.quantized_compressors.base import (
     BaseQuantizationCompressor,
 )
 from compressed_tensors.config import CompressionFormat
diff --git a/src/compressed_tensors/compressors/sparse_compressors/__init__.py b/src/compressed_tensors/compressors/sparse_compressors/__init__.py
new file mode 100644
index 00000000..de4fd887
--- /dev/null
+++ b/src/compressed_tensors/compressors/sparse_compressors/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+
+from .base import *
+from .dense import *
+from .sparse_bitmask import *
diff --git a/src/compressed_tensors/compressors/sparse_compressors/base.py b/src/compressed_tensors/compressors/sparse_compressors/base.py
new file mode 100644
index 00000000..308ddab2
--- /dev/null
+++ b/src/compressed_tensors/compressors/sparse_compressors/base.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Dict, Generator, Tuple
+
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.utils import get_nested_weight_mappings, merge_names
+from safetensors import safe_open
+from torch import Tensor
+from tqdm import tqdm
+
+
+__all__ = ["BaseSparseCompressor"]
+
+_LOGGER: logging.Logger = logging.getLogger(__name__)
+
+
+class BaseSparseCompressor(BaseCompressor):
+    """
+    Base class representing a sparse compression algorithm. Each child class should
+    implement compression_param_info, compress_weight and decompress_weight.
+
+    Compressors support compressing/decompressing a full module state dict or a single
+    quantized PyTorch leaf module.
+
+    Model Load Lifecycle (run_compressed=False):
+        - ModelCompressor.decompress()
+            - apply_quantization_config()
+            - BaseSparseCompressor.decompress()
+                - BaseSparseCompressor.decompress_weight()
+
+    Model Save Lifecycle:
+        - ModelCompressor.compress()
+            - BaseSparseCompressor.compress()
+                - BaseSparseCompressor.compress_weight()
+
+    Module Lifecycle (run_compressed=True):
+        - apply_quantization_config()
+        - compressed_module = CompressedLinear(module)
+            - initialize_module_for_quantization()
+            - BaseSparseCompressor.compression_param_info()
+            - register_parameters()
+        - compressed_module.forward()
+            -compressed_module.decompress()
+
+
+    :param config: config specifying compression parameters
+    """
+
+    def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
+        """
+        Compresses a dense state dict using bitmask compression
+
+        :param model_state: state dict of uncompressed model
+        :return: compressed state dict
+        """
+        compressed_dict = {}
+        _LOGGER.debug(
+            f"Compressing model with {len(model_state)} parameterized layers..."
+        )
+        for name, value in tqdm(model_state.items(), desc="Compressing model"):
+            compression_data = self.compress_weight(name, value)
+            for key in compression_data.keys():
+                if key in compressed_dict:
+                    _LOGGER.warn(
+                        f"Expected all compressed state_dict keys to be unique, but "
+                        f"found an existing entry for {key}. The existing entry will "
+                        "be replaced."
+                    )
+
+            compressed_dict.update(compression_data)
+
+        return compressed_dict
+
+    def decompress(
+        self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
+    ) -> Generator[Tuple[str, Tensor], None, None]:
+        """
+        Reads a bitmask compressed state dict located
+        at path_to_model_or_tensors and returns a generator
+        for sequentially decompressing back to a dense state dict
+
+        :param model_path: path to compressed safetensors model (directory with
+            one or more safetensors files) or compressed tensors file
+        :param device: device to load decompressed weights onto
+        :return: iterator for generating decompressed weights
+        """
+        weight_mappings = get_nested_weight_mappings(
+            path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
+        )
+        for weight_name in weight_mappings.keys():
+            weight_data = {}
+            for param_name, safe_path in weight_mappings[weight_name].items():
+                full_name = merge_names(weight_name, param_name)
+                with safe_open(safe_path, framework="pt", device=device) as f:
+                    weight_data[param_name] = f.get_tensor(full_name)
+            decompressed = self.decompress_weight(weight_data)
+            yield weight_name, decompressed
diff --git a/src/compressed_tensors/compressors/dense.py b/src/compressed_tensors/compressors/sparse_compressors/dense.py
similarity index 100%
rename from src/compressed_tensors/compressors/dense.py
rename to src/compressed_tensors/compressors/sparse_compressors/dense.py
diff --git a/src/compressed_tensors/compressors/sparse_bitmask.py b/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py
similarity index 98%
rename from src/compressed_tensors/compressors/sparse_bitmask.py
rename to src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py
index 63124163..a950aa64 100644
--- a/src/compressed_tensors/compressors/sparse_bitmask.py
+++ b/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py
@@ -17,7 +17,7 @@
 import numpy
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
-from compressed_tensors.compressors.base_sparsity_compressor import BaseSparseCompressor
+from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
 from compressed_tensors.config import CompressionFormat
 from compressed_tensors.utils import merge_names
 from torch import Tensor
diff --git a/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py b/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py
new file mode 100644
index 00000000..c3615f06
--- /dev/null
+++ b/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+
+from .marlin_24 import Marlin24Compressor
diff --git a/src/compressed_tensors/compressors/marlin_24.py b/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py
similarity index 100%
rename from src/compressed_tensors/compressors/marlin_24.py
rename to src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py
diff --git a/tests/test_compressors/model_compressors/__init__.py b/tests/test_compressors/model_compressors/__init__.py
new file mode 100644
index 00000000..0c44f887
--- /dev/null
+++ b/tests/test_compressors/model_compressors/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/test_compressors/test_model_compressor.py b/tests/test_compressors/model_compressors/test_model_compressor.py
similarity index 100%
rename from tests/test_compressors/test_model_compressor.py
rename to tests/test_compressors/model_compressors/test_model_compressor.py
diff --git a/tests/test_compressors/quantized_compressors/__init__.py b/tests/test_compressors/quantized_compressors/__init__.py
new file mode 100644
index 00000000..0c44f887
--- /dev/null
+++ b/tests/test_compressors/quantized_compressors/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/test_compressors/test_fp8_quant.py b/tests/test_compressors/quantized_compressors/test_fp8_quant.py
similarity index 100%
rename from tests/test_compressors/test_fp8_quant.py
rename to tests/test_compressors/quantized_compressors/test_fp8_quant.py
diff --git a/tests/test_compressors/test_int_quant.py b/tests/test_compressors/quantized_compressors/test_int_quant.py
similarity index 100%
rename from tests/test_compressors/test_int_quant.py
rename to tests/test_compressors/quantized_compressors/test_int_quant.py
diff --git a/tests/test_compressors/test_pack_quant.py b/tests/test_compressors/quantized_compressors/test_pack_quant.py
similarity index 99%
rename from tests/test_compressors/test_pack_quant.py
rename to tests/test_compressors/quantized_compressors/test_pack_quant.py
index bef8adc3..496e8304 100644
--- a/tests/test_compressors/test_pack_quant.py
+++ b/tests/test_compressors/quantized_compressors/test_pack_quant.py
@@ -20,7 +20,7 @@
 import pytest
 import torch
 from compressed_tensors import PackedQuantizationCompressor
-from compressed_tensors.compressors.pack_quantized import (
+from compressed_tensors.compressors.quantized_compressors.pack_quantized import (
     pack_to_int32,
     unpack_from_int32,
 )
diff --git a/tests/test_compressors/sparse_compressors/__init__.py b/tests/test_compressors/sparse_compressors/__init__.py
new file mode 100644
index 00000000..0c44f887
--- /dev/null
+++ b/tests/test_compressors/sparse_compressors/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/test_compressors/test_bitmask.py b/tests/test_compressors/sparse_compressors/test_bitmask.py
similarity index 100%
rename from tests/test_compressors/test_bitmask.py
rename to tests/test_compressors/sparse_compressors/test_bitmask.py
diff --git a/tests/test_compressors/sparse_quantized_compressors/__init__.py b/tests/test_compressors/sparse_quantized_compressors/__init__.py
new file mode 100644
index 00000000..0c44f887
--- /dev/null
+++ b/tests/test_compressors/sparse_quantized_compressors/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/test_compressors/test_marlin_24.py b/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py
similarity index 100%
rename from tests/test_compressors/test_marlin_24.py
rename to tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py