Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
[Neural Speed] Improvements to run.py script (#87)
Browse files Browse the repository at this point in the history
  • Loading branch information
aahouzi authored Feb 21, 2024
1 parent 0470b1f commit 33ffaf0
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 6 deletions.
14 changes: 13 additions & 1 deletion scripts/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
# limitations under the License.

import argparse
import sys
from pathlib import Path
from typing import List, Optional
from huggingface_hub import snapshot_download
from neural_speed.convert import convert_model

def main(args_in: Optional[List[str]] = None) -> None:
Expand All @@ -25,6 +27,11 @@ def main(args_in: Optional[List[str]] = None) -> None:
help="output format, default: f32",
default="f32",
)
parser.add_argument(
"--token",
type=str,
help="Access token ID for models that require it (LLaMa2, etc..)",
)
parser.add_argument("--outfile", type=Path, required=True, help="path to write to")
parser.add_argument("model", type=Path, help="directory containing model file or model id")
parser.add_argument("--use_quantized_model", action="store_true", help="use quantized model: awq/gptq/autoround")
Expand All @@ -33,7 +40,12 @@ def main(args_in: Optional[List[str]] = None) -> None:
if args.model.exists():
dir_model = args.model.as_posix()
else:
dir_model = args.model
try:
dir_model = snapshot_download(repo_id=str(args.model), resume_download=True, token=args.token)
except Exception as e:
if e.response.status_code == 401:
print("You are required to input an acccess token ID for {}, please add it in option --token or download model weights locally".format(args.model))
sys.exit(f"{e}")

convert_model(dir_model, args.outfile, args.outtype, use_quantized_model=args.use_quantized_model)

Expand Down
22 changes: 17 additions & 5 deletions scripts/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from typing import List, Optional
from transformers import AutoConfig
import subprocess
from huggingface_hub import snapshot_download

model_maps = {"gpt_neox": "gptneox", "gpt_bigcode": "starcoder"}
build_path = Path(Path(__file__).parent.absolute(), "../build/")
Expand Down Expand Up @@ -146,13 +147,24 @@ def main(args_in: Optional[List[str]] = None) -> None:
action="store_true",
help="Use ring-buffer and thus do not re-computing after reaching ctx_size (default: False)",
)
parser.add_argument(
"--token",
type=str,
help="Access token ID for models that require it (LLaMa2, etc..)",
)

args = parser.parse_args(args_in)

if args.model.exists():
dir_model = args.model.as_posix()
else:
dir_model = args.model
try:
dir_model = snapshot_download(repo_id=str(args.model), resume_download=True, token=args.token)
# Handles Missing token ID for gated models
except Exception as e:
if e.response.status_code == 401:
print("You are required to input an acccess token ID for {}, please add it in option --token or download model weights locally".format(args.model))
sys.exit(f"{e}")

parent_path = Path(__file__).parent.absolute()
config = AutoConfig.from_pretrained(dir_model)
Expand All @@ -166,8 +178,8 @@ def main(args_in: Optional[List[str]] = None) -> None:
convert_cmd = ["python", path]
convert_cmd.extend(["--outfile", Path(work_path, "ne_{}_f32.bin".format(model_type))])
convert_cmd.extend(["--outtype", "f32"])
convert_cmd.append(args.model)
print("convert model ...")
convert_cmd.append(dir_model)
print("Convert model ...")
subprocess.run(convert_cmd)

# 2. quantize
Expand All @@ -186,7 +198,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
quant_cmd.extend(["--use_ggml"])
quant_cmd.extend(["--build_dir", args.build_dir])
quant_cmd.extend(["--one_click_run", "True"])
print("quantize model ...")
print("Quantize model ...")
subprocess.run(quant_cmd)

# 3. inference
Expand All @@ -208,7 +220,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
infer_cmd.extend(["--shift-roped-k"])
if (model_type == "baichuan" or model_type == "qwen"):
infer_cmd.extend(["--tokenizer", dir_model])
print("inferce model ...")
print("Inference model ...")
subprocess.run(infer_cmd)


Expand Down

0 comments on commit 33ffaf0

Please sign in to comment.