refactor save_split_checkpoint script and use no-archive format

pytorch · Jul 27, 2023 · 71e1c2e · 71e1c2e
1 parent e9a28d5
commit 71e1c2e
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 59 deletions.
diff --git a/examples/large_models/inferentia2/llama/Readme.md b/examples/large_models/inferentia2/llama/Readme.md
@@ -40,34 +40,34 @@ pip install git+https://github.com/aws-neuron/transformers-neuronx.git transform
 
 ### Step 2: Save the model split checkpoints compatible with `transformers-neuronx`
 
-Navigate up to `large_model/inferentia2/llama` directory.
+Navigate to `large_model/inferentia2/llama` directory.
 
 ```bash
- python save_split_checkpoints.py --model_name decapoda-research/llama-7b-hf --save_path './decapoda_llama_7b_split'
+ python ../util/inf2_save_split_checkpoints.py --model_name decapoda-research/llama-7b-hf --save_path './decapoda_llama_7b_split'
 
 ```
 
 
 ### Step 3: Generate Tar/ MAR file
 
 ```bash
-torch-model-archiver --model-name decapoda_llama_7b --version 1.0 --handler inf2_handler.py --extra-files ./decapoda_llama_7b_split  -r requirements.txt --config-file model-config.yaml --archive-format tgz
+torch-model-archiver --model-name decapoda_llama_7b --version 1.0 --handler inf2_handler.py --extra-files ./decapoda_llama_7b_split  -r requirements.txt --config-file model-config.yaml --archive-format no-archive
 
 ```
 
 ### Step 4: Add the mar file to model store
 
 ```bash
 mkdir model_store
-mv decapoda_llama_7b.tar.gz model_store
+mv decapoda_llama_7b model_store
 ```
 
 ### Step 5: Start torchserve
 
 Update config.properties and start torchserve
 
 ```bash
-torchserve --ncs --start --model-store model_store --models decapoda_llama_7b.tar.gz
+torchserve --ncs --start --model-store model_store --models decapoda_llama_7b
 ```
 
 ### Step 6: Run inference

diff --git a/examples/large_models/inferentia2/llama/save_split_checkpoints.py b/examples/large_models/inferentia2/llama/save_split_checkpoints.py
diff --git a/examples/large_models/inferentia2/opt/Readme.md b/examples/large_models/inferentia2/opt/Readme.md
@@ -40,34 +40,34 @@ pip install git+https://github.com/aws-neuron/transformers-neuronx.git transform
 
 ### Step 2: Save the model split checkpoints compatible with `transformers-neuronx`
 
+Navigate to `large_model/inferentia2/opt` directory.
+
 ```bash
- python save_split_checkpoints.py --model_name facebook/opt-6.7b --save_path './opt-6.7b-split'
+ python ../util/inf2_save_split_checkpoints.py --model_name facebook/opt-6.7b --save_path './opt-6.7b-split'
 
 ```
 
 
 ### Step 3: Generate Tar/ MAR file
 
-Navigate up to `large_model/inferentia2/opt` directory.
-
 ```bash
-torch-model-archiver --model-name opt --version 1.0 --handler inf2_handler.py --extra-files ./opt-6.7b-split  -r requirements.txt --config-file model-config.yaml --archive-format tgz
+torch-model-archiver --model-name opt --version 1.0 --handler inf2_handler.py --extra-files ./opt-6.7b-split  -r requirements.txt --config-file model-config.yaml --archive-format no-archive
 
 ```
 
 ### Step 4: Add the mar file to model store
 
 ```bash
 mkdir model_store
-mv opt.tar.gz model_store
+mv opt model_store
 ```
 
 ### Step 5: Start torchserve
 
 Update config.properties and start torchserve
 
 ```bash
-torchserve --ncs --start --model-store model_store --models opt.tar.gz
+torchserve --ncs --start --model-store model_store --models opt
 ```
 
 ### Step 6: Run inference

diff --git a/...inferentia2/opt/save_split_checkpoints.py → ...ntia2/util/inf2_save_split_checkpoints.py b/...inferentia2/opt/save_split_checkpoints.py → ...ntia2/util/inf2_save_split_checkpoints.py
@@ -2,9 +2,12 @@
 import os
 
 import torch
+from transformers import AutoConfig, AutoModelForCausalLM
 from transformers.models.opt import OPTForCausalLM
 from transformers_neuronx.module import save_pretrained_split
 
+os.environ["NEURON_CC_FLAGS"] = "--model-type=transformer-inference"
+
 
 def create_directory_if_not_exists(path_str: str) -> str:
     """Creates a directory if it doesn't exist, and returns the directory path."""
@@ -17,7 +20,7 @@ def create_directory_if_not_exists(path_str: str) -> str:
         raise NotADirectoryError(path_str)
 
 
-def amp_callback(model: OPTForCausalLM, dtype: torch.dtype) -> None:
+def opt_amp_callback(model: OPTForCausalLM, dtype: torch.dtype) -> None:
     """Casts attention and MLP to low precision only; layernorms stay as f32."""
     for block in model.model.decoder.layers:
         block.self_attn.to(dtype)
@@ -41,11 +44,15 @@ def amp_callback(model: OPTForCausalLM, dtype: torch.dtype) -> None:
 
 save_path = create_directory_if_not_exists(args.save_path)
 
+# Load HuggingFace model config
+hf_model_config = AutoConfig.from_pretrained(args.model_name)
+
 # Load HuggingFace model
-hf_model = OPTForCausalLM.from_pretrained(args.model_name, low_cpu_mem_usage=True)
+hf_model = AutoModelForCausalLM.from_pretrained(args.model_name, low_cpu_mem_usage=True)
 
 # Apply Automatic Mixed Precision (AMP)
-amp_callback(hf_model, torch.float16)
+if hf_model_config.model_type == "opt":
+    opt_amp_callback(hf_model, torch.float16)
 
 # Save the model
 save_pretrained_split(hf_model, args.save_path)