Skip to content

Commit

Permalink
tweak W8A8
Browse files Browse the repository at this point in the history
robertgshaw2-neuralmagic committed Jun 25, 2024
1 parent 62f8011 commit af0be23
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions examples/quantization/example-w8a8-int8.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
from transformers import AutoTokenizer

# Select model and load it.
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
model = SparseAutoModelForCausalLM.from_pretrained(
MODEL_ID, device_map="auto", torch_dtype='auto',
)
@@ -77,6 +77,6 @@ def preprocess(example):
print("==========================================\n\n")

# Save to disk compressed.
SAVE_DIR = MODEL_ID.split("/")[1] + "-W8A8-DYNAMIC-PER-TOKEN"
SAVE_DIR = MODEL_ID.split("/")[1] + "-W8A8-Dynamic-Per-Token"
model.save_pretrained(SAVE_DIR, save_compressed=True)
tokenizer.save_pretrained(SAVE_DIR)

0 comments on commit af0be23

Please sign in to comment.