Skip to content

Commit

Permalink
use lm_eval (#704)
Browse files Browse the repository at this point in the history
  • Loading branch information
ZX-ModelCloud authored Nov 29, 2024
1 parent 6a98dbc commit b3a5fc3
Showing 1 changed file with 5 additions and 21 deletions.
26 changes: 5 additions & 21 deletions tests/models/test_hymba.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,17 @@
from gptqmodel import GPTQModel
from model_test import ModelTest


class TestHymba(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/Hymba-1.5B-Instruct/" # "baichuan-inc/Baichuan2-7B-Chat"
NATIVE_ARC_CHALLENGE_ACC = 0.4104
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.4317
NATIVE_ARC_CHALLENGE_ACC = 0.2073
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.2713
MODEL_MAX_LEN = 8192
TRUST_REMOTE_CODE = True
APPLY_CHAT_TEMPLATE = True
# Hymba currently only supports a batch size of 1.
# See https://huggingface.co/nvidia/Hymba-1.5B-Instruct
BATCH_SIZE = 1

def test_hymba(self):
model, tokenizer = self.quantModel(self.NATIVE_MODEL_ID, trust_remote_code=self.TRUST_REMOTE_CODE,
torch_dtype=self.TORCH_DTYPE)
model.cuda()

prompt = "5+5=?"

messages = [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}]

# Apply chat template
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True,
return_tensors="pt").to('cuda')

outputs = model.generate(input_ids=tokenized_chat, max_new_tokens=56)
input_length = tokenized_chat.shape[1]
response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)

print(f"Model response: {response}")

self.assertTrue("10" in response)
self.quant_lm_eval()

0 comments on commit b3a5fc3

Please sign in to comment.