mlc-ai · junrushao · Nov 10, 2023 · Nov 9, 2023
diff --git a/python/mlc_chat/cli/convert_weight.py b/python/mlc_chat/cli/convert_weight.py
@@ -31,8 +31,8 @@ def _parse_source(path: Union[str, Path], config_path: Path) -> Path:
         if path == "auto":
             return config_path.parent
         path = Path(path)
-        if not path.is_dir():
-            raise argparse.ArgumentTypeError(f"Directory does not exist: {path}")
+        if not path.exists():
+            raise argparse.ArgumentTypeError(f"Model source does not exist: {path}")
         return path
 
     def _parse_output(path: Union[str, Path]) -> Path:
@@ -60,7 +60,7 @@ def _parse_output(path: Union[str, Path]) -> Path:
     parser.add_argument(
         "--source-format",
         type=str,
-        choices=["auto", "huggingface-torch", "huggingface-safetensor"],
+        choices=["auto", "huggingface-torch", "huggingface-safetensor", "awq"],
         default="auto",
         help="The format of source model weight, infer from `config` if missing. "
         "(default: %(default)s)",

diff --git a/python/mlc_chat/compiler/loader/loader.py b/python/mlc_chat/compiler/loader/loader.py
@@ -8,4 +8,5 @@
 LOADER: Dict[str, Any] = {
     "huggingface-torch": HuggingFaceLoader,
     "huggingface-safetensor": HuggingFaceLoader,
+    "awq": HuggingFaceLoader,
 }
diff --git a/python/mlc_chat/compiler/model/model.py b/python/mlc_chat/compiler/model/model.py
@@ -62,6 +62,7 @@ class Model:
         },
         quantize={
             "group-quant": llama_quantization.group_quant,
+            "awq": llama_quantization.awq_quant,
         },
     )
 }

diff --git a/python/mlc_chat/compiler/quantization/awq_quantization.py b/python/mlc_chat/compiler/quantization/awq_quantization.py
@@ -159,7 +159,7 @@ def _dequantize(
                 tir.subtract(float_weight[i, j], float_zeros[i, j // self.group_size]),
                 scale[i, j // self.group_size],
             ),
-            name="decode",
+            name="dequantize",
         )
 
 
@@ -250,7 +250,7 @@ def forward(self, x: nn.Tensor) -> nn.Tensor:  # pylint: disable=invalid-name
                 scale,
                 [tir.IntImm("int64", self.out_features), tir.IntImm("int64", self.in_features)],
             ),
-            name_hint="decode",
+            name_hint="dequantize",
             args=[self.qweight, self.qzeros, self.scales],
         )
         w = nn.op.permute_dims(w)  # pylint: disable=invalid-name
@@ -356,7 +356,7 @@ def forward(self, x: nn.Tensor) -> Sequence[nn.Tensor]:  # pylint: disable=inval
                     tir.IntImm("int64", self.in_features),
                 ],
             ),
-            name_hint="decode",
+            name_hint="dequantize",
             args=[self.qweight, self.qzeros, self.scales],
         )
         w = nn.op.permute_dims(w)  # pylint: disable=invalid-name

diff --git a/python/mlc_chat/support/auto_weight.py b/python/mlc_chat/support/auto_weight.py
@@ -84,6 +84,8 @@ def detect_weight(
         weight_config_path = check_func(weight_path)
         if not weight_config_path:
             raise ValueError(f"The weight is not in {weight_format} format.")
+    else:
+        weight_config_path = weight_path
     return weight_config_path, weight_format
 
 
@@ -143,5 +145,5 @@ def _check_safetensor(weight_path: Path) -> Optional[Path]:
     "huggingface-safetensor": _check_safetensor,
 }
 
-# "awq", "ggml", "gguf" are not supported yet.
-AVAILABLE_WEIGHT_FORMAT = ["huggingface-torch", "huggingface-safetensor"]
+# "ggml", "gguf" are not supported yet.
+AVAILABLE_WEIGHT_FORMAT = ["huggingface-torch", "huggingface-safetensor", "awq"]
-Original file line number
+Diff line change
@@ Expand Up / @@ -62,6 +62,7 @@ class Model: @@
             },
             quantize={
                 "group-quant": llama_quantization.group_quant,
+                "awq": llama_quantization.awq_quant,
             },
         )
     }
@@ Expand Down @@