|
20 | 20 | from vllm.executor.executor_base import ExecutorBase |
21 | 21 | from vllm.logger import init_logger |
22 | 22 | from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS |
| 23 | +from vllm.plugins import load_general_plugins |
23 | 24 | from vllm.transformers_utils.utils import check_gguf_file |
24 | 25 | from vllm.usage.usage_lib import UsageContext |
25 | 26 | from vllm.utils import FlexibleArgumentParser, StoreBoolean |
@@ -203,6 +204,8 @@ class EngineArgs: |
203 | 204 |
|
204 | 205 | calculate_kv_scales: Optional[bool] = None |
205 | 206 |
|
| 207 | + additional_config: Optional[Dict[str, Any]] = None |
| 208 | + |
206 | 209 | def __post_init__(self): |
207 | 210 | if not self.tokenizer: |
208 | 211 | self.tokenizer = self.model |
@@ -984,6 +987,14 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: |
984 | 987 | 'be loaded from the model checkpoint if available. ' |
985 | 988 | 'Otherwise, the scales will default to 1.0.') |
986 | 989 |
|
| 990 | + parser.add_argument( |
| 991 | + "--additional-config", |
| 992 | + type=json.loads, |
| 993 | + default=None, |
| 994 | + help="Additional config for specified platform in JSON format. " |
| 995 | + "Different platforms may support different configs. Make sure the " |
| 996 | + "configs are valid for the platform you are using. The input format" |
| 997 | + " is like '{\"config_key\":\"config_value\"}'") |
987 | 998 | return parser |
988 | 999 |
|
989 | 1000 | @classmethod |
@@ -1044,6 +1055,9 @@ def create_load_config(self) -> LoadConfig: |
1044 | 1055 | def create_engine_config(self, |
1045 | 1056 | usage_context: Optional[UsageContext] = None |
1046 | 1057 | ) -> VllmConfig: |
| 1058 | + from vllm.platforms import current_platform |
| 1059 | + current_platform.pre_register_and_update() |
| 1060 | + |
1047 | 1061 | if envs.VLLM_USE_V1: |
1048 | 1062 | self._override_v1_engine_args(usage_context) |
1049 | 1063 |
|
@@ -1287,6 +1301,7 @@ def create_engine_config(self, |
1287 | 1301 | prompt_adapter_config=prompt_adapter_config, |
1288 | 1302 | compilation_config=self.compilation_config, |
1289 | 1303 | kv_transfer_config=self.kv_transfer_config, |
| 1304 | + additional_config=self.additional_config, |
1290 | 1305 | ) |
1291 | 1306 |
|
1292 | 1307 | if envs.VLLM_USE_V1: |
@@ -1347,6 +1362,12 @@ def add_cli_args(parser: FlexibleArgumentParser, |
1347 | 1362 | parser.add_argument('--disable-log-requests', |
1348 | 1363 | action='store_true', |
1349 | 1364 | help='Disable logging requests.') |
| 1365 | + # Initialize plugin to update the parser, for example, The plugin may |
| 1366 | + # adding a new kind of quantization method to --quantization argument or |
| 1367 | + # a new device to --device argument. |
| 1368 | + load_general_plugins() |
| 1369 | + from vllm.platforms import current_platform |
| 1370 | + current_platform.pre_register_and_update(parser) |
1350 | 1371 | return parser |
1351 | 1372 |
|
1352 | 1373 |
|
|
0 commit comments