vllm-project · WoosukKwon · Aug 30, 2024 · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt
@@ -11,5 +11,6 @@ pydantic >= 2.8
 torch
 py-cpuinfo
 transformers
-mistral_common >= 1.3.4
 openai # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args
+mistral_common >= 1.3.4
+openai # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args
diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md
@@ -111,6 +111,32 @@ directory [here](https://github.com/vllm-project/vllm/tree/main/examples/)
 :prog: vllm serve
 ```
 
+### Config file
+
+The `serve` module can also accept arguments from a config file in
+`yaml` format. The arguments in the yaml must be specified using the 
+long form of the argument outlined [here](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server): 
+
+For example:
+
+```yaml
+# config.yaml
+
+host: "127.0.0.1"
+port: 6379
+uvicorn-log-level: "info"
+```
+
+```bash
+$ vllm serve SOME_MODEL --config config.yaml
+```
+---
+**NOTE**  
+In case an argument is supplied using command line and the config file, the value from the commandline will take precedence.
+The order of priorities is `command line > config file values > defaults`.
+
+---
+
 ## Tool calling in the chat completion API
 vLLM supports only named function calling in the chat completion API. The `tool_choice` options `auto` and `required` are **not yet supported** but on the roadmap.
 

diff --git a/requirements-common.txt b/requirements-common.txt
@@ -27,3 +27,4 @@ soundfile # Required for audio processing
 gguf == 0.9.1
 importlib_metadata
 mistral_common >= 1.3.4
+pyyaml
diff --git a/tests/data/test_config.yaml b/tests/data/test_config.yaml
@@ -0,0 +1,2 @@
+port: 12312
+tensor_parallel_size: 2
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -132,6 +132,16 @@ def parser():
     return parser
 
 
+@pytest.fixture
+def parser_with_config():
+    parser = FlexibleArgumentParser()
+    parser.add_argument('serve')
+    parser.add_argument('--config', type=str)
+    parser.add_argument('--port', type=int)
+    parser.add_argument('--tensor-parallel-size', type=int)
+    return parser
+
+
 def test_underscore_to_dash(parser):
     args = parser.parse_args(['--image_input_type', 'pixel_values'])
     assert args.image_input_type == 'pixel_values'
@@ -176,3 +186,37 @@ def test_missing_required_argument(parser):
     parser.add_argument('--required-arg', required=True)
     with pytest.raises(SystemExit):
         parser.parse_args([])
+
+
+def test_cli_override_to_config(parser_with_config):
+    args = parser_with_config.parse_args([
+        'serve', '--config', './data/test_config.yaml',
+        '--tensor-parallel-size', '3'
+    ])
+    assert args.tensor_parallel_size == 3
+    args = parser_with_config.parse_args([
+        'serve', '--tensor-parallel-size', '3', '--config',
+        './data/test_config.yaml'
+    ])
+    assert args.tensor_parallel_size == 3
+
+
+def test_config_args(parser_with_config):
+    args = parser_with_config.parse_args(
+        ['serve', '--config', './data/test_config.yaml'])
+    assert args.tensor_parallel_size == 2
+
+
+def test_config_file(parser_with_config):
+    with pytest.raises(FileNotFoundError):
+        parser_with_config.parse_args(['serve', '--config', 'test_config.yml'])
+
+    with pytest.raises(ValueError):
+        parser_with_config.parse_args(
+            ['serve', '--config', './data/test_config.json'])
+
+    with pytest.raises(ValueError):
+        parser_with_config.parse_args([
+            'serve', '--tensor-parallel-size', '3', '--config', '--batch-size',
+            '32'
+        ])
diff --git a/vllm/scripts.py b/vllm/scripts.py
@@ -125,6 +125,15 @@ def main():
     serve_parser.add_argument("model_tag",
                               type=str,
                               help="The model tag to serve")
+    serve_parser.add_argument(
+        "--config",
+        type=str,
+        default='',
+        required=False,
+        help="Read CLI options from a config file."
+        "Must be a YAML with the following options:"
+        "https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server"
+    )
     serve_parser = make_arg_parser(serve_parser)
     serve_parser.set_defaults(dispatch_function=serve)
 

diff --git a/vllm/utils.py b/vllm/utils.py
@@ -25,6 +25,7 @@
 import psutil
 import torch
 import torch.types
+import yaml
 from packaging.version import Version
 from typing_extensions import ParamSpec, TypeIs, assert_never
 
@@ -1093,6 +1094,9 @@ def parse_args(self, args=None, namespace=None):
         if args is None:
             args = sys.argv[1:]
 
+        if '--config' in args:
+            args = FlexibleArgumentParser._pull_args_from_config(args)
+
         # Convert underscores to dashes and vice versa in argument names
         processed_args = []
         for arg in args:
@@ -1109,6 +1113,103 @@ def parse_args(self, args=None, namespace=None):
 
         return super().parse_args(processed_args, namespace)
 
+    @staticmethod
+    def _pull_args_from_config(args: List[str]) -> List[str]:
+        """Method to pull arguments specified in the config file
+        into the command-line args variable.
+
+        The arguments in config file will be inserted between 
+        the argument list.
+
+        example:
+        ```yaml
+            port: 12323
+            tensor-parallel-size: 4
+        ```
+        ```python
+        $: vllm {serve,chat,complete} "facebook/opt-12B" \
+            --config config.yaml -tp 2
+        $: args = [
+            "serve,chat,complete",
+            "facebook/opt-12B", 
+            '--config', 'config.yaml', 
+            '-tp', '2'
+        ]
+        $: args = [
+            "serve,chat,complete",
+            "facebook/opt-12B", 
+            '--port', '12323', 
+            '--tensor-parallel-size', '4', 
+            '-tp', '2'
+            ]
+        ```
+
+        Please note how the config args are inserted after the sub command.
+        this way the order of priorities is maintained when these are args 
+        parsed by super().
+        """
+        assert args.count(
+            '--config') <= 1, "More than one config file specified!"
+
+        index = args.index('--config')
+        if index == len(args) - 1:
+            raise ValueError("No config file specified! \
+                             Please check your command-line arguments.")
+
+        file_path = args[index + 1]
+
+        config_args = FlexibleArgumentParser._load_config_file(file_path)
+
+        # 0th index is for {serve,chat,complete}
+        # followed by config args
+        # followed by rest of cli args.
+        # maintaining this order will enforce the precedence
+        # of cli > config > defaults
+        args = [args[0]] + config_args + args[1:index] + args[index + 2:]
+
+        return args
+
+    @staticmethod
+    def _load_config_file(file_path: str) -> List[str]:
+        """Loads a yaml file and returns the key value pairs as a 
+        flattened list with argparse like pattern
+        ```yaml
+            port: 12323
+            tensor-parallel-size: 4
+        ```
+        returns:
+            processed_args: list[str] = [
+                '--port': '12323',
+                '--tensor-parallel-size': '4'
+            ]
+
+        """
+
+        extension: str = file_path.split('.')[-1]
+        if extension not in ('yaml', 'yml'):
+            raise ValueError(
+                "Config file must be of a yaml/yml type.\
+                              %s supplied", extension)
+
+        # only expecting a flat dictionary of atomic types
+        processed_args: List[str] = []
+
+        config: Dict[str, Union[int, str]] = {}
+        try:
+            with open(file_path, 'r') as config_file:
+                config = yaml.safe_load(config_file)
+        except Exception as ex:
+            logger.error(
+                "Unable to read the config file at %s. \
+                Make sure path is correct", file_path)
+            raise ex
+
+        for key, value in config.items():
+            processed_args.append('--' + key)
+            processed_args.append(str(value))
+
+        return processed_args
+
 
 async def _run_task_with_lock(task: Callable, lock: asyncio.Lock, *args,
                               **kwargs):