From 357aaf067c3a169986ba602531395baa6cc97b4f Mon Sep 17 00:00:00 2001 From: CSY Date: Wed, 4 Dec 2024 20:19:59 +0800 Subject: [PATCH 1/5] update deci delta --- tests/models/test_deci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_deci.py b/tests/models/test_deci.py index 7ac61262f..cfce2037b 100644 --- a/tests/models/test_deci.py +++ b/tests/models/test_deci.py @@ -5,7 +5,7 @@ class TestDeci(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/DeciLM-7B-instruct" # "Deci/DeciLM-7B-instruct" NATIVE_ARC_CHALLENGE_ACC = 0.5239 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5222 - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.55 + QUANT_ARC_MAX_NEGATIVE_DELTA = 0.8 TRUST_REMOTE_CODE = True USE_VLLM = False BATCH_SIZE = 6 From eda8706fcc01f94168a554599c2592cd93d40283 Mon Sep 17 00:00:00 2001 From: CSY Date: Wed, 4 Dec 2024 20:20:36 +0800 Subject: [PATCH 2/5] update cohere delta --- tests/models/test_cohere.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_cohere.py b/tests/models/test_cohere.py index d11fc485d..88cb438d4 100644 --- a/tests/models/test_cohere.py +++ b/tests/models/test_cohere.py @@ -5,7 +5,7 @@ class TestCohere(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/aya-expanse-8b" # "CohereForAI/aya-expanse-8b" NATIVE_ARC_CHALLENGE_ACC = 0.5401 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5640 - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.12 + QUANT_ARC_MAX_NEGATIVE_DELTA = 0.15 BATCH_SIZE = 4 def test_cohere(self): From f5b26f8d16de6a7b8312567c1b725122a023afaa Mon Sep 17 00:00:00 2001 From: CSY Date: Wed, 4 Dec 2024 20:21:17 +0800 Subject: [PATCH 3/5] update longllama delta --- tests/models/test_longllama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_longllama.py b/tests/models/test_longllama.py index 38128c2ce..c54ee051f 100644 --- a/tests/models/test_longllama.py +++ b/tests/models/test_longllama.py @@ -6,7 +6,7 @@ class TestLongLlama(ModelTest): NATIVE_ARC_CHALLENGE_ACC = 0.3515 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3652 TRUST_REMOTE_CODE = True - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.4 + QUANT_ARC_MAX_NEGATIVE_DELTA = 0.5 USE_VLLM = False def test_longllama(self): From 02a36dd6c7397df70a1d4fa17713484a159206b9 Mon Sep 17 00:00:00 2001 From: CSY Date: Wed, 4 Dec 2024 20:22:11 +0800 Subject: [PATCH 4/5] update hymba delta --- tests/models/test_hymba.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/models/test_hymba.py b/tests/models/test_hymba.py index 5596452c0..271dd6760 100644 --- a/tests/models/test_hymba.py +++ b/tests/models/test_hymba.py @@ -5,6 +5,7 @@ class TestHymba(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/Hymba-1.5B-Instruct/" # "baichuan-inc/Baichuan2-7B-Chat" NATIVE_ARC_CHALLENGE_ACC = 0.2073 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.2713 + QUANT_ARC_MAX_NEGATIVE_DELTA = 0.75 MODEL_MAX_LEN = 8192 TRUST_REMOTE_CODE = True APPLY_CHAT_TEMPLATE = True From 87f921522622e228d1e736a98551d86d2620bfde Mon Sep 17 00:00:00 2001 From: CSY Date: Wed, 4 Dec 2024 20:50:26 +0800 Subject: [PATCH 5/5] rename --- tests/models/model_test.py | 4 ++-- tests/models/test_cohere.py | 2 +- tests/models/test_deci.py | 2 +- tests/models/test_falcon.py | 3 ++- tests/models/test_hymba.py | 2 +- tests/models/test_llama3_2.py | 2 +- tests/models/test_longllama.py | 2 +- tests/models/test_qwen2_5.py | 2 +- tests/test_asym_gptq_v1.py | 2 +- 9 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/models/model_test.py b/tests/models/model_test.py index 9a0c74990..8c620a3c4 100644 --- a/tests/models/model_test.py +++ b/tests/models/model_test.py @@ -22,7 +22,7 @@ class ModelTest(unittest.TestCase): TASK_NAME = "arc_challenge" # sub test can modify - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.15 # -15% + QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.15 # -15% QUANT_ARC_MAX_POSITIVE_DELTA = 0.2 # 20% TRUST_REMOTE_CODE = False APPLY_CHAT_TEMPLATE = False @@ -221,7 +221,7 @@ def quant_lm_eval(self): def check_results(self, task_results): for filter, value in task_results.items(): diff_pct = self.calculatorPer(filter=filter, value=value) - negative_pct = 100 * (1 - self.QUANT_ARC_MAX_NEGATIVE_DELTA) + negative_pct = 100 * (1 - self.QUANT_ARC_MAX_DELTA_FLOOR_PERCENT) positive_pct = 100 * (1 + self.QUANT_ARC_MAX_POSITIVE_DELTA) self.assertTrue(negative_pct <= diff_pct <= positive_pct, f"{filter}: {value} diff {diff_pct:.2f}% is out of the expected range [{negative_pct}-{positive_pct}%]") diff --git a/tests/models/test_cohere.py b/tests/models/test_cohere.py index 88cb438d4..8ca23dba9 100644 --- a/tests/models/test_cohere.py +++ b/tests/models/test_cohere.py @@ -5,7 +5,7 @@ class TestCohere(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/aya-expanse-8b" # "CohereForAI/aya-expanse-8b" NATIVE_ARC_CHALLENGE_ACC = 0.5401 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5640 - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.15 + QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.15 BATCH_SIZE = 4 def test_cohere(self): diff --git a/tests/models/test_deci.py b/tests/models/test_deci.py index cfce2037b..ade5e169f 100644 --- a/tests/models/test_deci.py +++ b/tests/models/test_deci.py @@ -5,7 +5,7 @@ class TestDeci(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/DeciLM-7B-instruct" # "Deci/DeciLM-7B-instruct" NATIVE_ARC_CHALLENGE_ACC = 0.5239 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5222 - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.8 + QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.8 TRUST_REMOTE_CODE = True USE_VLLM = False BATCH_SIZE = 6 diff --git a/tests/models/test_falcon.py b/tests/models/test_falcon.py index 091d2c42a..467be5977 100644 --- a/tests/models/test_falcon.py +++ b/tests/models/test_falcon.py @@ -1,4 +1,5 @@ import torch # noqa: E402from tests.model_test import ModelTest + from model_test import ModelTest @@ -9,7 +10,7 @@ class TestFalcon(ModelTest): APPLY_CHAT_TEMPLATE = True TRUST_REMOTE_CODE = True TORCH_DTYPE = torch.float16 - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.52 + QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.52 BATCH_SIZE = 6 USE_VLLM = False diff --git a/tests/models/test_hymba.py b/tests/models/test_hymba.py index 271dd6760..33b466f4a 100644 --- a/tests/models/test_hymba.py +++ b/tests/models/test_hymba.py @@ -5,7 +5,7 @@ class TestHymba(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/Hymba-1.5B-Instruct/" # "baichuan-inc/Baichuan2-7B-Chat" NATIVE_ARC_CHALLENGE_ACC = 0.2073 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.2713 - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.75 + QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.75 MODEL_MAX_LEN = 8192 TRUST_REMOTE_CODE = True APPLY_CHAT_TEMPLATE = True diff --git a/tests/models/test_llama3_2.py b/tests/models/test_llama3_2.py index d94e5f70d..b301d9f18 100644 --- a/tests/models/test_llama3_2.py +++ b/tests/models/test_llama3_2.py @@ -5,7 +5,7 @@ class TestLlama3_2(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/Llama-3.2-1B-Instruct" # "meta-llama/Llama-3.2-1B-Instruct" NATIVE_ARC_CHALLENGE_ACC = 0.3567 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3805 - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.36 + QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.36 APPLY_CHAT_TEMPLATE = True TRUST_REMOTE_CODE = True diff --git a/tests/models/test_longllama.py b/tests/models/test_longllama.py index c54ee051f..7407753c1 100644 --- a/tests/models/test_longllama.py +++ b/tests/models/test_longllama.py @@ -6,7 +6,7 @@ class TestLongLlama(ModelTest): NATIVE_ARC_CHALLENGE_ACC = 0.3515 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3652 TRUST_REMOTE_CODE = True - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.5 + QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.5 USE_VLLM = False def test_longllama(self): diff --git a/tests/models/test_qwen2_5.py b/tests/models/test_qwen2_5.py index 5f92ae5c2..a15fdc470 100644 --- a/tests/models/test_qwen2_5.py +++ b/tests/models/test_qwen2_5.py @@ -3,7 +3,7 @@ class TestQwen2_5(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/Qwen2.5-0.5B-Instruct" - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.2 + QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.2 NATIVE_ARC_CHALLENGE_ACC = 0.2739 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3055 TRUST_REMOTE_CODE = False diff --git a/tests/test_asym_gptq_v1.py b/tests/test_asym_gptq_v1.py index 6eb023a8a..237792604 100644 --- a/tests/test_asym_gptq_v1.py +++ b/tests/test_asym_gptq_v1.py @@ -12,7 +12,7 @@ class Test(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/Llama-3.2-1B-Instruct" # "meta-llama/Llama-3.2-1B-Instruct" NATIVE_ARC_CHALLENGE_ACC = 0.3567 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3805 - QUANT_ARC_MAX_NEGATIVE_DELTA = 0.36 + QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.36 QUANT_FORMAT = FORMAT.GPTQ SYM = False