Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions vllm/v1/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,14 @@ class ModelRunnerOutput:
prompt_logprobs_dict={},
pooler_output=[],
num_nans_in_logits=None)

EMPTY_MODEL_RUNNER_WITH_KVC_OUTPUT = ModelRunnerOutput(
req_ids=[],
req_id_to_index={},
sampled_token_ids=[],
spec_token_ids=None,
logprobs=None,
prompt_logprobs_dict={},
pooler_output=[],
num_nans_in_logits=None,
kv_connector_output=KVConnectorOutput())
7 changes: 4 additions & 3 deletions vllm/v1/worker/gpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from vllm.utils import GiB_bytes, MemorySnapshot, memory_profiling
from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType
from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, ModelRunnerOutput
from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_WITH_KVC_OUTPUT,
ModelRunnerOutput)
from vllm.v1.utils import report_usage_stats
from vllm.v1.worker.gpu_model_runner import GPUModelRunner
from vllm.v1.worker.worker_base import WorkerBase
Expand Down Expand Up @@ -377,9 +378,9 @@ def execute_model(
# kv_connector_output
if (not kv_connector_output.finished_sending
and not kv_connector_output.finished_recving):
return EMPTY_MODEL_RUNNER_OUTPUT
return EMPTY_MODEL_RUNNER_WITH_KVC_OUTPUT

output = copy.copy(EMPTY_MODEL_RUNNER_OUTPUT)
output = copy.copy(EMPTY_MODEL_RUNNER_WITH_KVC_OUTPUT)
output.kv_connector_output = kv_connector_output
return output

Expand Down
8 changes: 4 additions & 4 deletions vllm/v1/worker/kv_connector_model_runner_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase
from vllm.forward_context import get_forward_context, set_forward_context
from vllm.logger import init_logger
from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput,
ModelRunnerOutput)
from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_WITH_KVC_OUTPUT,
KVConnectorOutput, ModelRunnerOutput)

if TYPE_CHECKING:
from vllm.v1.core.sched.output import SchedulerOutput
Expand Down Expand Up @@ -68,9 +68,9 @@ def kv_connector_no_forward(scheduler_output: "SchedulerOutput",

if (not kv_connector_output.finished_sending
and not kv_connector_output.finished_recving):
return EMPTY_MODEL_RUNNER_OUTPUT
return EMPTY_MODEL_RUNNER_WITH_KVC_OUTPUT

output = copy.copy(EMPTY_MODEL_RUNNER_OUTPUT)
output = copy.copy(EMPTY_MODEL_RUNNER_WITH_KVC_OUTPUT)
output.kv_connector_output = kv_connector_output
return output

Expand Down