[Neural Speed] Improvements to run.py script (#87)

intel · Feb 21, 2024 · 33ffaf0 · 33ffaf0
1 parent 0470b1f
commit 33ffaf0
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 6 deletions.
diff --git a/scripts/convert.py b/scripts/convert.py
@@ -13,8 +13,10 @@
 #  limitations under the License.
 
 import argparse
+import sys
 from pathlib import Path
 from typing import List, Optional
+from huggingface_hub import snapshot_download
 from neural_speed.convert import convert_model
 
 def main(args_in: Optional[List[str]] = None) -> None:
@@ -25,6 +27,11 @@ def main(args_in: Optional[List[str]] = None) -> None:
         help="output format, default: f32",
         default="f32",
     )
+    parser.add_argument(
+        "--token",
+        type=str,
+        help="Access token ID for models that require it (LLaMa2, etc..)",
+    )
     parser.add_argument("--outfile", type=Path, required=True, help="path to write to")
     parser.add_argument("model", type=Path, help="directory containing model file or model id")
     parser.add_argument("--use_quantized_model", action="store_true", help="use quantized model: awq/gptq/autoround")
@@ -33,7 +40,12 @@ def main(args_in: Optional[List[str]] = None) -> None:
     if args.model.exists():
         dir_model = args.model.as_posix()
     else:
-        dir_model = args.model
+        try:
+            dir_model = snapshot_download(repo_id=str(args.model), resume_download=True, token=args.token)
+        except Exception as e:
+            if e.response.status_code == 401:
+                print("You are required to input an acccess token ID for {}, please add it in option --token or download model weights locally".format(args.model))
+            sys.exit(f"{e}")
 
     convert_model(dir_model, args.outfile, args.outtype, use_quantized_model=args.use_quantized_model)
 

diff --git a/scripts/run.py b/scripts/run.py
@@ -18,6 +18,7 @@
 from typing import List, Optional
 from transformers import AutoConfig
 import subprocess
+from huggingface_hub import snapshot_download
 
 model_maps = {"gpt_neox": "gptneox", "gpt_bigcode": "starcoder"}
 build_path = Path(Path(__file__).parent.absolute(), "../build/")
@@ -146,13 +147,24 @@ def main(args_in: Optional[List[str]] = None) -> None:
         action="store_true",
         help="Use ring-buffer and thus do not re-computing after reaching ctx_size (default: False)",
     )
+    parser.add_argument(
+        "--token",
+        type=str,
+        help="Access token ID for models that require it (LLaMa2, etc..)",
+    )
 
     args = parser.parse_args(args_in)
 
     if args.model.exists():
         dir_model = args.model.as_posix()
     else:
-        dir_model = args.model
+        try:
+            dir_model = snapshot_download(repo_id=str(args.model), resume_download=True, token=args.token)
+        # Handles Missing token ID for gated models
+        except Exception as e:
+            if e.response.status_code == 401:
+                print("You are required to input an acccess token ID for {}, please add it in option --token or download model weights locally".format(args.model))
+            sys.exit(f"{e}")
 
     parent_path = Path(__file__).parent.absolute()
     config = AutoConfig.from_pretrained(dir_model)
@@ -166,8 +178,8 @@ def main(args_in: Optional[List[str]] = None) -> None:
     convert_cmd = ["python", path]
     convert_cmd.extend(["--outfile", Path(work_path, "ne_{}_f32.bin".format(model_type))])
     convert_cmd.extend(["--outtype", "f32"])
-    convert_cmd.append(args.model)
-    print("convert model ...")
+    convert_cmd.append(dir_model)
+    print("Convert model ...")
     subprocess.run(convert_cmd)
 
     # 2. quantize
@@ -186,7 +198,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
         quant_cmd.extend(["--use_ggml"])
     quant_cmd.extend(["--build_dir", args.build_dir])
     quant_cmd.extend(["--one_click_run", "True"])
-    print("quantize model ...")
+    print("Quantize model ...")
     subprocess.run(quant_cmd)
 
     # 3. inference
@@ -208,7 +220,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
         infer_cmd.extend(["--shift-roped-k"])
     if (model_type == "baichuan" or model_type == "qwen"):
         infer_cmd.extend(["--tokenizer", dir_model])
-    print("inferce model ...")
+    print("Inference model ...")
     subprocess.run(infer_cmd)