Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

[Model Enabling] llama3-8b-instruct-chat Enabling #225

Merged
merged 10 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion neural_speed/application/quant_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ int main(int argc, char** argv) {
const std::string fname_inp = q_params.model_file;
const std::string fname_out = q_params.out_file;
ne_ftype ftype = quant_params_to_ftype(q_params);
printf("ne_ftype: %d\n", ftype);
printf("%s: quant_params_to_ftype: %d\n", __func__, ftype);
const int nthread = q_params.nthread;

const int64_t t_main_start_us = ne_time_us();
Expand Down
16 changes: 15 additions & 1 deletion neural_speed/convert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
"phi-msft": "phi"
}

llama3_vocab_size = 128256


def convert_model(model, outfile, outtype="f32", format="NE", model_hub="huggingface", use_quantized_model=False):
if model_hub == "modelscope":
Expand All @@ -37,11 +39,23 @@ def convert_model(model, outfile, outtype="f32", format="NE", model_hub="hugging
config = AutoConfig.from_pretrained(model, trust_remote_code=True)
model_type = model_maps.get(config.model_type, config.model_type)

cmd = []
if use_quantized_model:
path = Path(Path(__file__).parent.absolute(), "convert_quantized_{}.py".format(model_type))
else:
path = Path(Path(__file__).parent.absolute(), "convert_{}.py".format(model_type))
cmd = []

if config.vocab_size == llama3_vocab_size:
path = Path(Path(__file__).parent.absolute(), "convert_llama3.py".format(model_type))
cmd.extend(["python", path])
cmd.extend(["--outfile", outfile])
cmd.extend(["--outtype", outtype])
cmd.extend([model])
cmd.extend(["--vocab-type", "bpe"])
print("cmd:", cmd)
subprocess.run(cmd)
return

cmd.extend(["python", path])
cmd.extend(["--outfile", outfile])
cmd.extend(["--outtype", outtype])
Expand Down
Loading
Loading