diff --git a/tests/models/model_test.py b/tests/models/model_test.py index b3eb73ba4..40087fc5a 100644 --- a/tests/models/model_test.py +++ b/tests/models/model_test.py @@ -36,6 +36,9 @@ class ModelTest(unittest.TestCase): MODEL_MAX_LEN = 4096 DELETE_QUANTIZED_MODEL = True + # quant config + DESC_ACT = True + def generate(self, model, tokenizer, prompt=None): if prompt is None: prompt = "I am in Paris and" @@ -83,6 +86,7 @@ def quantModel(self, model_id_or_path, trust_remote_code=False, torch_dtype="aut bits=4, group_size=128, format=FORMAT.GPTQ, + desc_act=self.DESC_ACT, ) model = GPTQModel.load( model_id_or_path, diff --git a/tests/models/test_hymba.py b/tests/models/test_hymba.py index cfe8ee643..fadb3ca82 100644 --- a/tests/models/test_hymba.py +++ b/tests/models/test_hymba.py @@ -4,8 +4,8 @@ class TestHymba(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/Hymba-1.5B-Instruct/" # "baichuan-inc/Baichuan2-7B-Chat" - NATIVE_ARC_CHALLENGE_ACC = 0.2073 - NATIVE_ARC_CHALLENGE_ACC_NORM = 0.2713 + NATIVE_ARC_CHALLENGE_ACC = 0.3814 + NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3686 MODEL_MAX_LEN = 8192 TRUST_REMOTE_CODE = True APPLY_CHAT_TEMPLATE = True @@ -13,5 +13,9 @@ class TestHymba(ModelTest): # See https://huggingface.co/nvidia/Hymba-1.5B-Instruct BATCH_SIZE = 1 + # Hymba currently tests that DESC_ACT=False to get better results. + # If DESC_ACT=False, the output will be terrible. + DESC_ACT = False + def test_hymba(self): self.quant_lm_eval()