better engineering (pytorch#695)

* better engineering * cleanup
yanbing-j · Jul 17, 2024 · 46291a9 · 46291a9
1 parent bd62748
commit 46291a9
Show file tree

Hide file tree

Showing 3 changed files with 2 additions and 28 deletions.
diff --git a/build/utils.py b/build/utils.py
@@ -237,11 +237,12 @@ def is_mps_available() -> bool:
 
 def get_device_str(device) -> str:
     if isinstance(device, str) and device == "fast":
-        return (
+        device = (
             "cuda"
             if torch.cuda.is_available()
             else "mps" if is_mps_available() else "cpu"
         )
+        return device
     else:
         return str(device)
 

diff --git a/qops.py b/qops.py
@@ -6,8 +6,6 @@
 
 from build.utils import find_multiple, get_precision, use_et_backend
 
-# from torch.nn.parameter import Parameter
-
 
 def linear_int8_aoti(input, weight, scales):
     n_groups = scales.numel() // scales.shape[0]

diff --git a/quantize.py b/quantize.py
@@ -82,31 +82,6 @@ def quantized_model(self) -> nn.Module:
         return self.model_
 
 
-#########################################################################
-###          QuantHandler wrapper for a8w4dq from torchao             ###
-#
-#
-# class Int8DynActInt4WeightQuantizer(QuantHandler):
-#    def __init__(self, model: nn.Module, device="cpu", tokenizer=None, **kwargs):
-#        import torchao.quantization.quant_api as quant_api
-#
-#        self.model_ = model
-#        self.device = device
-#        self.tokenizer = tokenizer
-#        self.quantizer = quant_api.Int8DynActInt4WeightQuantizer(
-#            **kwargs, precision=get_precision(), scales_precision=get_precision()
-#        )
-#
-#    def create_quantized_state_dict(self) -> Dict:  # "StateDict"
-#        pass
-#
-#    def convert_for_runtime(self) -> nn.Module:
-#        pass
-#
-#    def quantized_model(self) -> nn.Module:
-#        return self.quantizer.quantize(self.model_)
-#
-#
 #########################################################################
 ###           wrapper for setting precision as a QuantHandler         ###