ModelCloud · Qubitium · Nov 29, 2024 · Nov 29, 2024
diff --git a/tests/models/model_test.py b/tests/models/model_test.py
@@ -36,6 +36,9 @@ class ModelTest(unittest.TestCase):
     MODEL_MAX_LEN = 4096
     DELETE_QUANTIZED_MODEL = True
 
+    # quant config
+    DESC_ACT = True
+
     def generate(self, model, tokenizer, prompt=None):
         if prompt is None:
             prompt = "I am in Paris and"
@@ -83,6 +86,7 @@ def quantModel(self, model_id_or_path, trust_remote_code=False, torch_dtype="aut
             bits=4,
             group_size=128,
             format=FORMAT.GPTQ,
+            desc_act=self.DESC_ACT,
         )
         model = GPTQModel.load(
             model_id_or_path,

diff --git a/tests/models/test_hymba.py b/tests/models/test_hymba.py
@@ -4,14 +4,18 @@
 
 class TestHymba(ModelTest):
     NATIVE_MODEL_ID = "/monster/data/model/Hymba-1.5B-Instruct/"  # "baichuan-inc/Baichuan2-7B-Chat"
-    NATIVE_ARC_CHALLENGE_ACC = 0.2073
-    NATIVE_ARC_CHALLENGE_ACC_NORM = 0.2713
+    NATIVE_ARC_CHALLENGE_ACC = 0.3814
+    NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3686
     MODEL_MAX_LEN = 8192
     TRUST_REMOTE_CODE = True
     APPLY_CHAT_TEMPLATE = True
     # Hymba currently only supports a batch size of 1.
     # See https://huggingface.co/nvidia/Hymba-1.5B-Instruct
     BATCH_SIZE = 1
 
+    # Hymba currently tests that DESC_ACT=False to get better results.
+    # If DESC_ACT=False, the output will be terrible.
+    DESC_ACT = False
+
     def test_hymba(self):
         self.quant_lm_eval()