From 75a2f14ea5d4d280676aa27bd40c4e8a49bfcc7f Mon Sep 17 00:00:00 2001
From: Nick Hill <nhill@redhat.com>
Date: Fri, 13 Jun 2025 14:37:55 -0700
Subject: [PATCH] [BugFix] Don't catch BaseException when dumping execute_model
 errors

dump_engine_exception is intended to dump pertinent state for diagnostic purposes when model_executor.execute_model() raises an error.

We should be catching Exception rather than BaseException here because a non-Exception implies failure due to some external signal such as `SystemExit` or `KeyboardInterrupt`.

Catching SystemExit here has lead to more confusing behaviour when diagnosing problems.

Signed-off-by: Nick Hill <nhill@redhat.com>
---
 vllm/logging_utils/dump_input.py | 18 +++++++-----------
 vllm/v1/engine/core.py           |  7 +++++--
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/vllm/logging_utils/dump_input.py b/vllm/logging_utils/dump_input.py
index d14515f56e54..ad89638e1061 100644
--- a/vllm/logging_utils/dump_input.py
+++ b/vllm/logging_utils/dump_input.py
@@ -59,27 +59,23 @@ def dump_engine_exception(config: VllmConfig,
                           scheduler_stats: Optional[SchedulerStats]):
     # NOTE: ensure we can log extra info without risking raises
     # unexpected errors during logging
-    with contextlib.suppress(BaseException):
+    with contextlib.suppress(Exception):
         _dump_engine_exception(config, scheduler_output, scheduler_stats)
 
 
 def _dump_engine_exception(config: VllmConfig,
                            scheduler_output: SchedulerOutput,
                            scheduler_stats: Optional[SchedulerStats]):
-    logger.error("Dumping input data")
-
     logger.error(
-        "V1 LLM engine (v%s) with config: %s, ",
+        "Dumping input data for V1 LLM engine (v%s) with config: %s, ",
         VLLM_VERSION,
         config,
     )
-
     try:
         dump_obj = prepare_object_to_dump(scheduler_output)
-        logger.error("Dumping scheduler output for model execution:")
-        logger.error(dump_obj)
+        logger.error("Dumping scheduler output for model execution: %s",
+                     dump_obj)
         if scheduler_stats:
-            logger.error(scheduler_stats)
-    except BaseException as exception:
-        logger.error("Error preparing object to dump")
-        logger.error(repr(exception))
+            logger.error("Dumping scheduler stats: %s", scheduler_stats)
+    except Exception:
+        logger.exception("Error preparing object to dump")
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index f36a491a1970..07761bf000a6 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -209,11 +209,14 @@ def abort_requests(self, request_ids: list[str]):
     def execute_model(self, scheduler_output: SchedulerOutput):
         try:
             return self.model_executor.execute_model(scheduler_output)
-        except BaseException as err:
+        except Exception as err:
+            # We do not want to catch BaseException here since we're only
+            # interested in dumping info when the exception is due to an
+            # error from execute_model itself.
+
             # NOTE: This method is exception-free
             dump_engine_exception(self.vllm_config, scheduler_output,
                                   self.scheduler.make_stats())
-            # Re-raise exception
             raise err
 
     def step(self) -> tuple[dict[int, EngineCoreOutputs], bool]: