casper-hansen · stsfaroz · Jun 2, 2024
diff --git a/awq/modules/fused/mlp.py b/awq/modules/fused/mlp.py
@@ -2,13 +2,14 @@
 import torch.nn.functional as F
 from awq.modules.linear.gemm import WQLinear_GEMM
 from awq.modules.linear.gemv import WQLinear_GEMV
+import warnings
 
 try:
     import awq_ext  # with CUDA kernels
-
     AWQ_INSTALLED = True
-except:
+except Exception as e:
     AWQ_INSTALLED = False
+    warnings.warn(f"AWQ extension could not be imported. Error: {e}")
 
 
 class QuantFusedMLP(nn.Module):

diff --git a/awq/modules/fused/moe.py b/awq/modules/fused/moe.py
@@ -1,12 +1,13 @@
 import torch
 from typing import Dict
+import warnings
 
 try:
     import awq_ext  # with CUDA kernels
-
     AWQ_INSTALLED = True
-except:
+except Exception as e:
     AWQ_INSTALLED = False
+    warnings.warn(f"AWQ extension could not be imported. Error: {e}")
 
 
 class FusedSparseMoeBlock(torch.nn.Module):

diff --git a/awq/modules/fused/norm.py b/awq/modules/fused/norm.py
@@ -1,13 +1,13 @@
 import torch
 from torch import nn
+import warnings
 
 try:
     import awq_ext  # with CUDA kernels
-
     AWQ_INSTALLED = True
-except:
+except Exception as e:
     AWQ_INSTALLED = False
-
+    warnings.warn(f"AWQ extension could not be imported. Error: {e}")
 
 class FasterTransformerRMSNorm(nn.Module):
     def __init__(self, weight, eps=1e-6):

diff --git a/awq/modules/linear/gemm.py b/awq/modules/linear/gemm.py
@@ -3,13 +3,14 @@
 from torch.autograd import Function
 from awq.utils.utils import get_best_device
 from awq.utils.packing_utils import dequantize_gemm
+import warnings
 
 try:
-    import awq_ext  # with CUDA kernels (AutoAWQ_kernels)
-
+    import awq_ext  # with CUDA kernels
     AWQ_INSTALLED = True
-except:
+except Exception as e:
     AWQ_INSTALLED = False
+    warnings.warn(f"AWQ extension could not be imported. Error: {e}")
 
 
 # Adapted from https://github.com/compressa-ai/AutoAWQ/tree/dev

diff --git a/awq/modules/linear/gemv.py b/awq/modules/linear/gemv.py
@@ -1,13 +1,13 @@
 import torch
 import torch.nn as nn
+import warnings
 
 try:
     import awq_ext  # with CUDA kernels
-
     AWQ_INSTALLED = True
-except:
+except Exception as e:
     AWQ_INSTALLED = False
-
+    warnings.warn(f"AWQ extension could not be imported. Error: {e}")
 
 def make_divisible(c, divisor):
     return (c + divisor - 1) // divisor