From 66bc69ec0cb0b9997cd5039e3c7d897a8ba20aa8 Mon Sep 17 00:00:00 2001 From: tracelogfb <48808670+tracelogfb@users.noreply.github.com> Date: Fri, 9 May 2025 19:46:54 -0700 Subject: [PATCH 1/8] fix broken test vllm:test_kernels - test_attention_selector.py::test_flash_attn (#17873) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Stephen Chen Signed-off-by: 汪志鹏 --- tests/kernels/attention/test_attention_selector.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index 436cb430817e..58da01f0ebbf 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -188,8 +188,9 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch): m.setenv(STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL) # Unsupported CUDA arch - monkeypatch.setattr(torch.cuda, "get_device_capability", lambda: - (7, 5)) + monkeypatch.setattr(torch.cuda, + "get_device_capability", + lambda _=None: (7, 5)) backend = get_attn_backend(16, torch.float16, None, 16, False) assert backend.get_name() != STR_FLASH_ATTN_VAL From 282a1af76cc49eb2d835243b77dd6c91147ba3e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= Date: Sat, 10 May 2025 12:12:17 +0800 Subject: [PATCH 2/8] Update config.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪志鹏 --- vllm/transformers_utils/config.py | 51 +++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index f6c2b35535b6..2fbd996dbb0b 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -686,9 +686,54 @@ def recurse_elems(elem: Any): config_dict["hidden_act"] = config_dict.get("activation", "silu") config_dict["tie_word_embeddings"] = config_dict.get( "tie_embeddings", False) - config_dict["max_seq_len"] = config_dict.get("max_seq_len", 128_000) - config_dict["max_position_embeddings"] = config_dict.get( - "max_position_embeddings", 128_000) + # Check if max_position_embeddings is in params.json + mpe_from_params = config_dict.get("max_position_embeddings") + final_mpe_to_set = mpe_from_params + + if final_mpe_to_set is None: + # Not found in params.json, try to get from standard HF AutoConfig + hf_config_for_defaults = None + try: + trust_remote_code_val = kwargs.get("trust_remote_code", False) + token_val = kwargs.get("token") # Passed from get_config + + hf_config_for_defaults = AutoConfig.from_pretrained( + model, + revision=revision, + trust_remote_code=trust_remote_code_val, + token=token_val) + except Exception as e: + error_message = ( + "Invalid repository ID or local directory specified:" + " '{model}'.\nPlease verify the following requirements:\n" + "1. Provide a valid Hugging Face repository ID.\n" + "2. Specify a local directory that contains a recognized " + "configuration file.\n").format(model=model) + + raise ValueError(error_message) from e + + if hf_config_for_defaults: + # Try to get from text_config first, then top-level + mpe_from_hf_config = None + text_config_obj = getattr(hf_config_for_defaults, "text_config", + None) + if text_config_obj and hasattr(text_config_obj, + "max_position_embeddings"): + mpe_from_hf_config = getattr(text_config_obj, + "max_position_embeddings", None) + + if mpe_from_hf_config is None and hasattr( + hf_config_for_defaults, "max_position_embeddings"): + mpe_from_hf_config = getattr(hf_config_for_defaults, + "max_position_embeddings", None) + + if mpe_from_hf_config is not None: + final_mpe_to_set = mpe_from_hf_config + + if final_mpe_to_set is None: # Still not found, use ultimate fallback + final_mpe_to_set = 128_000 + + config_dict["max_position_embeddings"] = final_mpe_to_set if config_dict.get("quantization") is not None: quantization = config_dict.get("quantization", {}) From 7c2e2d9ab51e1a34ca89e24fac10c2a73d320e88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= Date: Tue, 13 May 2025 11:45:11 +0800 Subject: [PATCH 3/8] Revert "Update config.py" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 14c9116eb24cd25aa44369cedaeba8d2521f8916. Signed-off-by: 汪志鹏 --- vllm/transformers_utils/config.py | 51 ++----------------------------- 1 file changed, 3 insertions(+), 48 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 2fbd996dbb0b..f6c2b35535b6 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -686,54 +686,9 @@ def recurse_elems(elem: Any): config_dict["hidden_act"] = config_dict.get("activation", "silu") config_dict["tie_word_embeddings"] = config_dict.get( "tie_embeddings", False) - # Check if max_position_embeddings is in params.json - mpe_from_params = config_dict.get("max_position_embeddings") - final_mpe_to_set = mpe_from_params - - if final_mpe_to_set is None: - # Not found in params.json, try to get from standard HF AutoConfig - hf_config_for_defaults = None - try: - trust_remote_code_val = kwargs.get("trust_remote_code", False) - token_val = kwargs.get("token") # Passed from get_config - - hf_config_for_defaults = AutoConfig.from_pretrained( - model, - revision=revision, - trust_remote_code=trust_remote_code_val, - token=token_val) - except Exception as e: - error_message = ( - "Invalid repository ID or local directory specified:" - " '{model}'.\nPlease verify the following requirements:\n" - "1. Provide a valid Hugging Face repository ID.\n" - "2. Specify a local directory that contains a recognized " - "configuration file.\n").format(model=model) - - raise ValueError(error_message) from e - - if hf_config_for_defaults: - # Try to get from text_config first, then top-level - mpe_from_hf_config = None - text_config_obj = getattr(hf_config_for_defaults, "text_config", - None) - if text_config_obj and hasattr(text_config_obj, - "max_position_embeddings"): - mpe_from_hf_config = getattr(text_config_obj, - "max_position_embeddings", None) - - if mpe_from_hf_config is None and hasattr( - hf_config_for_defaults, "max_position_embeddings"): - mpe_from_hf_config = getattr(hf_config_for_defaults, - "max_position_embeddings", None) - - if mpe_from_hf_config is not None: - final_mpe_to_set = mpe_from_hf_config - - if final_mpe_to_set is None: # Still not found, use ultimate fallback - final_mpe_to_set = 128_000 - - config_dict["max_position_embeddings"] = final_mpe_to_set + config_dict["max_seq_len"] = config_dict.get("max_seq_len", 128_000) + config_dict["max_position_embeddings"] = config_dict.get( + "max_position_embeddings", 128_000) if config_dict.get("quantization") is not None: quantization = config_dict.get("quantization", {}) From 239a8530f8021e7cc63bb10982f391ff7c879598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= Date: Tue, 13 May 2025 11:47:01 +0800 Subject: [PATCH 4/8] Revert "fix broken test vllm:test_kernels - test_attention_selector.py::test_flash_attn (#17873)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit bed409e0ce8ec3f2fec70d1cd9ffb029d80b16f4. Signed-off-by: 汪志鹏 --- tests/kernels/attention/test_attention_selector.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index 58da01f0ebbf..436cb430817e 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -188,9 +188,8 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch): m.setenv(STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL) # Unsupported CUDA arch - monkeypatch.setattr(torch.cuda, - "get_device_capability", - lambda _=None: (7, 5)) + monkeypatch.setattr(torch.cuda, "get_device_capability", lambda: + (7, 5)) backend = get_attn_backend(16, torch.float16, None, 16, False) assert backend.get_name() != STR_FLASH_ATTN_VAL From d07f67c91a606e7d2c884a81d63fe7f5776a373c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= Date: Tue, 13 May 2025 11:54:49 +0800 Subject: [PATCH 5/8] Update config.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪志鹏 --- vllm/transformers_utils/config.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index f6c2b35535b6..b630e4e92c54 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -686,9 +686,25 @@ def recurse_elems(elem: Any): config_dict["hidden_act"] = config_dict.get("activation", "silu") config_dict["tie_word_embeddings"] = config_dict.get( "tie_embeddings", False) - config_dict["max_seq_len"] = config_dict.get("max_seq_len", 128_000) - config_dict["max_position_embeddings"] = config_dict.get( - "max_position_embeddings", 128_000) + + if config_dict.get("max_position_embeddings") is None: + max_position_embeddings = 128_000 + try: + trust_remote_code_val = kwargs.get("trust_remote_code", False) + token_val = kwargs.get("token") + hf_config = AutoConfig.from_pretrained( + model, + revision=revision, + trust_remote_code=trust_remote_code_val, + token=token_val) + if hf_value := hf_config.get_text_config().max_position_embeddings: + max_position_embeddings = hf_value + except Exception: + warning_message = ("Could not read 'max_position_embeddings' " + "from the config for model: " + "'{model}'.\n").format(model=model) + logger.warning(warning_message) + config_dict["max_position_embeddings"] = max_position_embeddings if config_dict.get("quantization") is not None: quantization = config_dict.get("quantization", {}) From c901b70ef3bbda95cc9aed2921ef32d7e44685c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= Date: Wed, 14 May 2025 13:02:19 +0800 Subject: [PATCH 6/8] Update config.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪志鹏 --- vllm/transformers_utils/config.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index b630e4e92c54..310003ce6941 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -699,11 +699,12 @@ def recurse_elems(elem: Any): token=token_val) if hf_value := hf_config.get_text_config().max_position_embeddings: max_position_embeddings = hf_value - except Exception: - warning_message = ("Could not read 'max_position_embeddings' " - "from the config for model: " - "'{model}'.\n").format(model=model) - logger.warning(warning_message) + except Exception as e: + logger.warning( + "The params.json file is missing 'max_position_embeddings'" + " and could not get a value from the HF config." + " Defaulting to 128000", + exc_info=e) config_dict["max_position_embeddings"] = max_position_embeddings if config_dict.get("quantization") is not None: From a5993fc9dbbbc9d915292abb956e72a51c6870d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= Date: Thu, 15 May 2025 10:06:02 +0800 Subject: [PATCH 7/8] Update config.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪志鹏 --- vllm/transformers_utils/config.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 310003ce6941..215b5c858006 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -691,12 +691,11 @@ def recurse_elems(elem: Any): max_position_embeddings = 128_000 try: trust_remote_code_val = kwargs.get("trust_remote_code", False) - token_val = kwargs.get("token") - hf_config = AutoConfig.from_pretrained( - model, - revision=revision, + hf_config = get_config( + model=model, trust_remote_code=trust_remote_code_val, - token=token_val) + revision=revision, + config_format=ConfigFormat.HF) if hf_value := hf_config.get_text_config().max_position_embeddings: max_position_embeddings = hf_value except Exception as e: From 6acb3199268ac9d256703233373196dff75f5245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= Date: Thu, 15 May 2025 10:24:02 +0800 Subject: [PATCH 8/8] Update config.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪志鹏 --- vllm/transformers_utils/config.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 215b5c858006..d7836c260587 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -691,11 +691,10 @@ def recurse_elems(elem: Any): max_position_embeddings = 128_000 try: trust_remote_code_val = kwargs.get("trust_remote_code", False) - hf_config = get_config( - model=model, - trust_remote_code=trust_remote_code_val, - revision=revision, - config_format=ConfigFormat.HF) + hf_config = get_config(model=model, + trust_remote_code=trust_remote_code_val, + revision=revision, + config_format=ConfigFormat.HF) if hf_value := hf_config.get_text_config().max_position_embeddings: max_position_embeddings = hf_value except Exception as e: