diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt index 95a9be7806633..c358e23b6a37a 100644 --- a/docs/requirements-docs.txt +++ b/docs/requirements-docs.txt @@ -11,5 +11,6 @@ pydantic >= 2.8 torch py-cpuinfo transformers -mistral_common >= 1.3.4 openai # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args +mistral_common >= 1.3.4 +openai # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args \ No newline at end of file diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md index a06c30d9c48c6..b2acde390083c 100644 --- a/docs/source/serving/openai_compatible_server.md +++ b/docs/source/serving/openai_compatible_server.md @@ -111,6 +111,32 @@ directory [here](https://github.com/vllm-project/vllm/tree/main/examples/) :prog: vllm serve ``` +### Config file + +The `serve` module can also accept arguments from a config file in +`yaml` format. The arguments in the yaml must be specified using the +long form of the argument outlined [here](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server): + +For example: + +```yaml +# config.yaml + +host: "127.0.0.1" +port: 6379 +uvicorn-log-level: "info" +``` + +```bash +$ vllm serve SOME_MODEL --config config.yaml +``` +--- +**NOTE** +In case an argument is supplied using command line and the config file, the value from the commandline will take precedence. +The order of priorities is `command line > config file values > defaults`. + +--- + ## Tool calling in the chat completion API vLLM supports only named function calling in the chat completion API. The `tool_choice` options `auto` and `required` are **not yet supported** but on the roadmap. diff --git a/requirements-common.txt b/requirements-common.txt index 61daf99819756..d7e10c7591a79 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -27,3 +27,4 @@ soundfile # Required for audio processing gguf == 0.9.1 importlib_metadata mistral_common >= 1.3.4 +pyyaml \ No newline at end of file diff --git a/tests/data/test_config.yaml b/tests/data/test_config.yaml new file mode 100644 index 0000000000000..20d499624de2e --- /dev/null +++ b/tests/data/test_config.yaml @@ -0,0 +1,2 @@ +port: 12312 +tensor_parallel_size: 2 diff --git a/tests/test_utils.py b/tests/test_utils.py index c157be1c08f81..c7cb663068c0f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -132,6 +132,16 @@ def parser(): return parser +@pytest.fixture +def parser_with_config(): + parser = FlexibleArgumentParser() + parser.add_argument('serve') + parser.add_argument('--config', type=str) + parser.add_argument('--port', type=int) + parser.add_argument('--tensor-parallel-size', type=int) + return parser + + def test_underscore_to_dash(parser): args = parser.parse_args(['--image_input_type', 'pixel_values']) assert args.image_input_type == 'pixel_values' @@ -176,3 +186,37 @@ def test_missing_required_argument(parser): parser.add_argument('--required-arg', required=True) with pytest.raises(SystemExit): parser.parse_args([]) + + +def test_cli_override_to_config(parser_with_config): + args = parser_with_config.parse_args([ + 'serve', '--config', './data/test_config.yaml', + '--tensor-parallel-size', '3' + ]) + assert args.tensor_parallel_size == 3 + args = parser_with_config.parse_args([ + 'serve', '--tensor-parallel-size', '3', '--config', + './data/test_config.yaml' + ]) + assert args.tensor_parallel_size == 3 + + +def test_config_args(parser_with_config): + args = parser_with_config.parse_args( + ['serve', '--config', './data/test_config.yaml']) + assert args.tensor_parallel_size == 2 + + +def test_config_file(parser_with_config): + with pytest.raises(FileNotFoundError): + parser_with_config.parse_args(['serve', '--config', 'test_config.yml']) + + with pytest.raises(ValueError): + parser_with_config.parse_args( + ['serve', '--config', './data/test_config.json']) + + with pytest.raises(ValueError): + parser_with_config.parse_args([ + 'serve', '--tensor-parallel-size', '3', '--config', '--batch-size', + '32' + ]) diff --git a/vllm/scripts.py b/vllm/scripts.py index a9ddfcf864133..e557961a335bf 100644 --- a/vllm/scripts.py +++ b/vllm/scripts.py @@ -125,6 +125,15 @@ def main(): serve_parser.add_argument("model_tag", type=str, help="The model tag to serve") + serve_parser.add_argument( + "--config", + type=str, + default='', + required=False, + help="Read CLI options from a config file." + "Must be a YAML with the following options:" + "https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server" + ) serve_parser = make_arg_parser(serve_parser) serve_parser.set_defaults(dispatch_function=serve) diff --git a/vllm/utils.py b/vllm/utils.py index dab8e5fe04359..657a3ecef696d 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -25,6 +25,7 @@ import psutil import torch import torch.types +import yaml from packaging.version import Version from typing_extensions import ParamSpec, TypeIs, assert_never @@ -1093,6 +1094,9 @@ def parse_args(self, args=None, namespace=None): if args is None: args = sys.argv[1:] + if '--config' in args: + args = FlexibleArgumentParser._pull_args_from_config(args) + # Convert underscores to dashes and vice versa in argument names processed_args = [] for arg in args: @@ -1109,6 +1113,103 @@ def parse_args(self, args=None, namespace=None): return super().parse_args(processed_args, namespace) + @staticmethod + def _pull_args_from_config(args: List[str]) -> List[str]: + """Method to pull arguments specified in the config file + into the command-line args variable. + + The arguments in config file will be inserted between + the argument list. + + example: + ```yaml + port: 12323 + tensor-parallel-size: 4 + ``` + ```python + $: vllm {serve,chat,complete} "facebook/opt-12B" \ + --config config.yaml -tp 2 + $: args = [ + "serve,chat,complete", + "facebook/opt-12B", + '--config', 'config.yaml', + '-tp', '2' + ] + $: args = [ + "serve,chat,complete", + "facebook/opt-12B", + '--port', '12323', + '--tensor-parallel-size', '4', + '-tp', '2' + ] + ``` + + Please note how the config args are inserted after the sub command. + this way the order of priorities is maintained when these are args + parsed by super(). + """ + assert args.count( + '--config') <= 1, "More than one config file specified!" + + index = args.index('--config') + if index == len(args) - 1: + raise ValueError("No config file specified! \ + Please check your command-line arguments.") + + file_path = args[index + 1] + + config_args = FlexibleArgumentParser._load_config_file(file_path) + + # 0th index is for {serve,chat,complete} + # followed by config args + # followed by rest of cli args. + # maintaining this order will enforce the precedence + # of cli > config > defaults + args = [args[0]] + config_args + args[1:index] + args[index + 2:] + + return args + + @staticmethod + def _load_config_file(file_path: str) -> List[str]: + """Loads a yaml file and returns the key value pairs as a + flattened list with argparse like pattern + ```yaml + port: 12323 + tensor-parallel-size: 4 + ``` + returns: + processed_args: list[str] = [ + '--port': '12323', + '--tensor-parallel-size': '4' + ] + + """ + + extension: str = file_path.split('.')[-1] + if extension not in ('yaml', 'yml'): + raise ValueError( + "Config file must be of a yaml/yml type.\ + %s supplied", extension) + + # only expecting a flat dictionary of atomic types + processed_args: List[str] = [] + + config: Dict[str, Union[int, str]] = {} + try: + with open(file_path, 'r') as config_file: + config = yaml.safe_load(config_file) + except Exception as ex: + logger.error( + "Unable to read the config file at %s. \ + Make sure path is correct", file_path) + raise ex + + for key, value in config.items(): + processed_args.append('--' + key) + processed_args.append(str(value)) + + return processed_args + async def _run_task_with_lock(task: Callable, lock: asyncio.Lock, *args, **kwargs):