From 66bc69ec0cb0b9997cd5039e3c7d897a8ba20aa8 Mon Sep 17 00:00:00 2001
From: tracelogfb <48808670+tracelogfb@users.noreply.github.com>
Date: Fri, 9 May 2025 19:46:54 -0700
Subject: [PATCH 1/8] fix broken test vllm:test_kernels -
 test_attention_selector.py::test_flash_attn (#17873)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Stephen Chen <tracelog@meta.com>
Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com>
---
 tests/kernels/attention/test_attention_selector.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py
index 436cb430817e..58da01f0ebbf 100644
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -188,8 +188,9 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch):
         m.setenv(STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL)
 
         # Unsupported CUDA arch
-        monkeypatch.setattr(torch.cuda, "get_device_capability", lambda:
-                            (7, 5))
+        monkeypatch.setattr(torch.cuda,
+                            "get_device_capability",
+                            lambda _=None: (7, 5))
         backend = get_attn_backend(16, torch.float16, None, 16, False)
         assert backend.get_name() != STR_FLASH_ATTN_VAL
 

From 282a1af76cc49eb2d835243b77dd6c91147ba3e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= <wangzhipeng628@gmail.com>
Date: Sat, 10 May 2025 12:12:17 +0800
Subject: [PATCH 2/8] Update config.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com>
---
 vllm/transformers_utils/config.py | 51 +++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 3 deletions(-)

diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index f6c2b35535b6..2fbd996dbb0b 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -686,9 +686,54 @@ def recurse_elems(elem: Any):
     config_dict["hidden_act"] = config_dict.get("activation", "silu")
     config_dict["tie_word_embeddings"] = config_dict.get(
         "tie_embeddings", False)
-    config_dict["max_seq_len"] = config_dict.get("max_seq_len", 128_000)
-    config_dict["max_position_embeddings"] = config_dict.get(
-        "max_position_embeddings", 128_000)
+    # Check if max_position_embeddings is in params.json
+    mpe_from_params = config_dict.get("max_position_embeddings")
+    final_mpe_to_set = mpe_from_params
+
+    if final_mpe_to_set is None:
+        # Not found in params.json, try to get from standard HF AutoConfig
+        hf_config_for_defaults = None
+        try:
+            trust_remote_code_val = kwargs.get("trust_remote_code", False)
+            token_val = kwargs.get("token")  # Passed from get_config
+
+            hf_config_for_defaults = AutoConfig.from_pretrained(
+                model,
+                revision=revision,
+                trust_remote_code=trust_remote_code_val,
+                token=token_val)
+        except Exception as e:
+            error_message = (
+                "Invalid repository ID or local directory specified:"
+                " '{model}'.\nPlease verify the following requirements:\n"
+                "1. Provide a valid Hugging Face repository ID.\n"
+                "2. Specify a local directory that contains a recognized "
+                "configuration file.\n").format(model=model)
+
+            raise ValueError(error_message) from e
+
+        if hf_config_for_defaults:
+            # Try to get from text_config first, then top-level
+            mpe_from_hf_config = None
+            text_config_obj = getattr(hf_config_for_defaults, "text_config",
+                                      None)
+            if text_config_obj and hasattr(text_config_obj,
+                                           "max_position_embeddings"):
+                mpe_from_hf_config = getattr(text_config_obj,
+                                             "max_position_embeddings", None)
+
+            if mpe_from_hf_config is None and hasattr(
+                    hf_config_for_defaults, "max_position_embeddings"):
+                mpe_from_hf_config = getattr(hf_config_for_defaults,
+                                             "max_position_embeddings", None)
+
+            if mpe_from_hf_config is not None:
+                final_mpe_to_set = mpe_from_hf_config
+
+        if final_mpe_to_set is None:  # Still not found, use ultimate fallback
+            final_mpe_to_set = 128_000
+
+    config_dict["max_position_embeddings"] = final_mpe_to_set
 
     if config_dict.get("quantization") is not None:
         quantization = config_dict.get("quantization", {})

From 7c2e2d9ab51e1a34ca89e24fac10c2a73d320e88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= <wangzhipeng628@gmail.com>
Date: Tue, 13 May 2025 11:45:11 +0800
Subject: [PATCH 3/8] Revert "Update config.py"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 14c9116eb24cd25aa44369cedaeba8d2521f8916.

Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com>
---
 vllm/transformers_utils/config.py | 51 ++-----------------------------
 1 file changed, 3 insertions(+), 48 deletions(-)

diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 2fbd996dbb0b..f6c2b35535b6 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -686,54 +686,9 @@ def recurse_elems(elem: Any):
     config_dict["hidden_act"] = config_dict.get("activation", "silu")
     config_dict["tie_word_embeddings"] = config_dict.get(
         "tie_embeddings", False)
-    # Check if max_position_embeddings is in params.json
-    mpe_from_params = config_dict.get("max_position_embeddings")
-    final_mpe_to_set = mpe_from_params
-
-    if final_mpe_to_set is None:
-        # Not found in params.json, try to get from standard HF AutoConfig
-        hf_config_for_defaults = None
-        try:
-            trust_remote_code_val = kwargs.get("trust_remote_code", False)
-            token_val = kwargs.get("token")  # Passed from get_config
-
-            hf_config_for_defaults = AutoConfig.from_pretrained(
-                model,
-                revision=revision,
-                trust_remote_code=trust_remote_code_val,
-                token=token_val)
-        except Exception as e:
-            error_message = (
-                "Invalid repository ID or local directory specified:"
-                " '{model}'.\nPlease verify the following requirements:\n"
-                "1. Provide a valid Hugging Face repository ID.\n"
-                "2. Specify a local directory that contains a recognized "
-                "configuration file.\n").format(model=model)
-
-            raise ValueError(error_message) from e
-
-        if hf_config_for_defaults:
-            # Try to get from text_config first, then top-level
-            mpe_from_hf_config = None
-            text_config_obj = getattr(hf_config_for_defaults, "text_config",
-                                      None)
-            if text_config_obj and hasattr(text_config_obj,
-                                           "max_position_embeddings"):
-                mpe_from_hf_config = getattr(text_config_obj,
-                                             "max_position_embeddings", None)
-
-            if mpe_from_hf_config is None and hasattr(
-                    hf_config_for_defaults, "max_position_embeddings"):
-                mpe_from_hf_config = getattr(hf_config_for_defaults,
-                                             "max_position_embeddings", None)
-
-            if mpe_from_hf_config is not None:
-                final_mpe_to_set = mpe_from_hf_config
-
-        if final_mpe_to_set is None:  # Still not found, use ultimate fallback
-            final_mpe_to_set = 128_000
-
-    config_dict["max_position_embeddings"] = final_mpe_to_set
+    config_dict["max_seq_len"] = config_dict.get("max_seq_len", 128_000)
+    config_dict["max_position_embeddings"] = config_dict.get(
+        "max_position_embeddings", 128_000)
 
     if config_dict.get("quantization") is not None:
         quantization = config_dict.get("quantization", {})

From 239a8530f8021e7cc63bb10982f391ff7c879598 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= <wangzhipeng628@gmail.com>
Date: Tue, 13 May 2025 11:47:01 +0800
Subject: [PATCH 4/8] Revert "fix broken test vllm:test_kernels -
 test_attention_selector.py::test_flash_attn (#17873)"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit bed409e0ce8ec3f2fec70d1cd9ffb029d80b16f4.

Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com>
---
 tests/kernels/attention/test_attention_selector.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py
index 58da01f0ebbf..436cb430817e 100644
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -188,9 +188,8 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch):
         m.setenv(STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL)
 
         # Unsupported CUDA arch
-        monkeypatch.setattr(torch.cuda,
-                            "get_device_capability",
-                            lambda _=None: (7, 5))
+        monkeypatch.setattr(torch.cuda, "get_device_capability", lambda:
+                            (7, 5))
         backend = get_attn_backend(16, torch.float16, None, 16, False)
         assert backend.get_name() != STR_FLASH_ATTN_VAL
 

From d07f67c91a606e7d2c884a81d63fe7f5776a373c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= <wangzhipeng628@gmail.com>
Date: Tue, 13 May 2025 11:54:49 +0800
Subject: [PATCH 5/8] Update config.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com>
---
 vllm/transformers_utils/config.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index f6c2b35535b6..b630e4e92c54 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -686,9 +686,25 @@ def recurse_elems(elem: Any):
     config_dict["hidden_act"] = config_dict.get("activation", "silu")
     config_dict["tie_word_embeddings"] = config_dict.get(
         "tie_embeddings", False)
-    config_dict["max_seq_len"] = config_dict.get("max_seq_len", 128_000)
-    config_dict["max_position_embeddings"] = config_dict.get(
-        "max_position_embeddings", 128_000)
+
+    if config_dict.get("max_position_embeddings") is None:
+        max_position_embeddings = 128_000
+        try:
+            trust_remote_code_val = kwargs.get("trust_remote_code", False)
+            token_val = kwargs.get("token")
+            hf_config = AutoConfig.from_pretrained(
+                model,
+                revision=revision,
+                trust_remote_code=trust_remote_code_val,
+                token=token_val)
+            if hf_value := hf_config.get_text_config().max_position_embeddings:
+                max_position_embeddings = hf_value
+        except Exception:
+            warning_message = ("Could not read 'max_position_embeddings' "
+                               "from the config for model: "
+                               "'{model}'.\n").format(model=model)
+            logger.warning(warning_message)
+        config_dict["max_position_embeddings"] = max_position_embeddings
 
     if config_dict.get("quantization") is not None:
         quantization = config_dict.get("quantization", {})

From c901b70ef3bbda95cc9aed2921ef32d7e44685c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= <wangzhipeng628@gmail.com>
Date: Wed, 14 May 2025 13:02:19 +0800
Subject: [PATCH 6/8] Update config.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com>
---
 vllm/transformers_utils/config.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index b630e4e92c54..310003ce6941 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -699,11 +699,12 @@ def recurse_elems(elem: Any):
                 token=token_val)
             if hf_value := hf_config.get_text_config().max_position_embeddings:
                 max_position_embeddings = hf_value
-        except Exception:
-            warning_message = ("Could not read 'max_position_embeddings' "
-                               "from the config for model: "
-                               "'{model}'.\n").format(model=model)
-            logger.warning(warning_message)
+        except Exception as e:
+            logger.warning(
+                "The params.json file is missing 'max_position_embeddings'"
+                " and could not get a value from the HF config."
+                " Defaulting to 128000",
+                exc_info=e)
         config_dict["max_position_embeddings"] = max_position_embeddings
 
     if config_dict.get("quantization") is not None:

From a5993fc9dbbbc9d915292abb956e72a51c6870d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= <wangzhipeng628@gmail.com>
Date: Thu, 15 May 2025 10:06:02 +0800
Subject: [PATCH 7/8] Update config.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com>
---
 vllm/transformers_utils/config.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 310003ce6941..215b5c858006 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -691,12 +691,11 @@ def recurse_elems(elem: Any):
         max_position_embeddings = 128_000
         try:
             trust_remote_code_val = kwargs.get("trust_remote_code", False)
-            token_val = kwargs.get("token")
-            hf_config = AutoConfig.from_pretrained(
-                model,
-                revision=revision,
+            hf_config = get_config(
+                model=model,
                 trust_remote_code=trust_remote_code_val,
-                token=token_val)
+                revision=revision,
+                config_format=ConfigFormat.HF)
             if hf_value := hf_config.get_text_config().max_position_embeddings:
                 max_position_embeddings = hf_value
         except Exception as e:

From 6acb3199268ac9d256703233373196dff75f5245 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B1=AA=E5=BF=97=E9=B9=8F?= <wangzhipeng628@gmail.com>
Date: Thu, 15 May 2025 10:24:02 +0800
Subject: [PATCH 8/8] Update config.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com>
---
 vllm/transformers_utils/config.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 215b5c858006..d7836c260587 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -691,11 +691,10 @@ def recurse_elems(elem: Any):
         max_position_embeddings = 128_000
         try:
             trust_remote_code_val = kwargs.get("trust_remote_code", False)
-            hf_config = get_config(
-                model=model,
-                trust_remote_code=trust_remote_code_val,
-                revision=revision,
-                config_format=ConfigFormat.HF)
+            hf_config = get_config(model=model,
+                                   trust_remote_code=trust_remote_code_val,
+                                   revision=revision,
+                                   config_format=ConfigFormat.HF)
             if hf_value := hf_config.get_text_config().max_position_embeddings:
                 max_position_embeddings = hf_value
         except Exception as e: