diff --git a/src/compressed_tensors/compressors/dense.py b/src/compressed_tensors/compressors/dense.py index 8f09c8bf..16707acd 100644 --- a/src/compressed_tensors/compressors/dense.py +++ b/src/compressed_tensors/compressors/dense.py @@ -29,6 +29,6 @@ def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor return model_state def decompress( - self, path_to_model_or_tensors: str, device: str = "cpu" + self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs ) -> Generator[Tuple[str, Tensor], None, None]: return iter([]) diff --git a/src/compressed_tensors/compressors/marlin_24.py b/src/compressed_tensors/compressors/marlin_24.py index 1abf0a75..50a3339e 100644 --- a/src/compressed_tensors/compressors/marlin_24.py +++ b/src/compressed_tensors/compressors/marlin_24.py @@ -175,7 +175,7 @@ def compress( return compressed_dict def decompress( - self, path_to_model_or_tensors: str, device: str = "cpu" + self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs ) -> Generator[Tuple[str, Tensor], None, None]: raise NotImplementedError( "Decompression is not implemented for the Marlin24 Compressor." diff --git a/src/compressed_tensors/compressors/naive_quantized.py b/src/compressed_tensors/compressors/naive_quantized.py index f54d78c4..5d46a2e9 100644 --- a/src/compressed_tensors/compressors/naive_quantized.py +++ b/src/compressed_tensors/compressors/naive_quantized.py @@ -93,7 +93,7 @@ def compress( return compressed_dict def decompress( - self, path_to_model_or_tensors: str, device: str = "cpu" + self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs ) -> Generator[Tuple[str, Tensor], None, None]: """ Reads a compressed state dict located at path_to_model_or_tensors diff --git a/src/compressed_tensors/compressors/sparse_bitmask.py b/src/compressed_tensors/compressors/sparse_bitmask.py index c0cc1888..796d2643 100644 --- a/src/compressed_tensors/compressors/sparse_bitmask.py +++ b/src/compressed_tensors/compressors/sparse_bitmask.py @@ -72,7 +72,7 @@ def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]: return compressed_dict def decompress( - self, path_to_model_or_tensors: str, device: str = "cpu" + self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs ) -> Generator[Tuple[str, Tensor], None, None]: """ Reads a bitmask compressed state dict located diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py index fc238e83..f1a6de06 100644 --- a/src/compressed_tensors/quantization/quant_scheme.py +++ b/src/compressed_tensors/quantization/quant_scheme.py @@ -17,7 +17,6 @@ from compressed_tensors.quantization.quant_args import ( QuantizationArgs, - QuantizationStrategy, QuantizationType, ) from pydantic import BaseModel