diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 90f0944a7f3de..ebe5e0fd34135 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -189,6 +189,10 @@ def __init__( self._verify_args() if self.use_beam_search: + # Lazy import to avoid circular imports. + from vllm.usage.usage_lib import set_runtime_usage_data + set_runtime_usage_data("use_beam_search", True) + if not envs.VLLM_NO_DEPRECATION_WARNING: logger.warning( "[IMPORTANT] We plan to discontinue the support for beam " @@ -196,6 +200,7 @@ def __init__( "https://github.com/vllm-project/vllm/issues/6226 for " "more information. Set VLLM_NO_DEPRECATION_WARNING=1 to " "suppress this warning.") + self._verify_beam_search() else: self._verify_non_beam_search() diff --git a/vllm/usage/usage_lib.py b/vllm/usage/usage_lib.py index afb3007a528b4..6907d8b9becd2 100644 --- a/vllm/usage/usage_lib.py +++ b/vllm/usage/usage_lib.py @@ -7,7 +7,7 @@ from enum import Enum from pathlib import Path from threading import Thread -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union from uuid import uuid4 import cpuinfo @@ -25,6 +25,13 @@ _USAGE_STATS_ENABLED = None _USAGE_STATS_SERVER = envs.VLLM_USAGE_STATS_SERVER +_GLOBAL_RUNTIME_DATA: Dict[str, Union[str, int, bool]] = {} + + +def set_runtime_usage_data(key: str, value: Union[str, int, bool]) -> None: + """Set global usage data that will be sent with every usage heartbeat.""" + _GLOBAL_RUNTIME_DATA[key] = value + def is_usage_stats_enabled(): """Determine whether or not we can send usage stats to the server. @@ -187,7 +194,11 @@ def _report_continous_usage(self): """ while True: time.sleep(600) - data = {"uuid": self.uuid, "log_time": _get_current_timestamp_ns()} + data = { + "uuid": self.uuid, + "log_time": _get_current_timestamp_ns(), + } + data.update(_GLOBAL_RUNTIME_DATA) self._write_to_file(data) self._send_to_server(data)