8989
9090STR_NOT_IMPL_ENC_DEC_SWA = \
9191 "Sliding window attention for encoder/decoder models " + \
92- "is not currently supported."
92+ "is not currently supported."
9393
9494STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE = \
9595 "Prefix caching for encoder/decoder models " + \
96- "is not currently supported."
96+ "is not currently supported."
9797
9898STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL = \
9999 "Chunked prefill for encoder/decoder models " + \
100- "is not currently supported."
100+ "is not currently supported."
101101
102102STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP = (
103103 "Models with logits_soft_cap "
@@ -752,7 +752,7 @@ def _generate_random_fp8(
752752 # to generate random data for fp8 data.
753753 # For example, s.11111.00 in fp8e5m2 format represents Inf.
754754 # | E4M3 | E5M2
755- #-----|-------------|-------------------
755+ # -----|-------------|-------------------
756756 # Inf | N/A | s.11111.00
757757 # NaN | s.1111.111 | s.11111.{01,10,11}
758758 from vllm import _custom_ops as ops
@@ -840,7 +840,6 @@ def create_kv_caches_with_random(
840840 seed : Optional [int ] = None ,
841841 device : Optional [str ] = "cuda" ,
842842) -> tuple [list [torch .Tensor ], list [torch .Tensor ]]:
843-
844843 if cache_dtype == "fp8" and head_size % 16 :
845844 raise ValueError (
846845 f"Does not support key cache of type fp8 with head_size { head_size } "
@@ -1205,7 +1204,6 @@ def deprecate_args(
12051204 is_deprecated : Union [bool , Callable [[], bool ]] = True ,
12061205 additional_message : Optional [str ] = None ,
12071206) -> Callable [[F ], F ]:
1208-
12091207 if not callable (is_deprecated ):
12101208 is_deprecated = partial (identity , is_deprecated )
12111209
@@ -1355,7 +1353,7 @@ def weak_bound(*args, **kwargs) -> None:
13551353 return weak_bound
13561354
13571355
1358- #From: https://stackoverflow.com/a/4104188/2749989
1356+ # From: https://stackoverflow.com/a/4104188/2749989
13591357def run_once (f : Callable [P , None ]) -> Callable [P , None ]:
13601358
13611359 def wrapper (* args : P .args , ** kwargs : P .kwargs ) -> None :
@@ -1474,7 +1472,7 @@ def repl(match: re.Match) -> str:
14741472
14751473 # Convert underscores to dashes and vice versa in argument names
14761474 processed_args = list [str ]()
1477- for arg in args :
1475+ for i , arg in enumerate ( args ) :
14781476 if arg .startswith ('--' ):
14791477 if '=' in arg :
14801478 key , value = arg .split ('=' , 1 )
@@ -1483,10 +1481,17 @@ def repl(match: re.Match) -> str:
14831481 else :
14841482 key = pattern .sub (repl , arg , count = 1 )
14851483 processed_args .append (key )
1486- elif arg .startswith ('-O' ) and arg != '-O' and len (arg ) == 2 :
1487- # allow -O flag to be used without space, e.g. -O3
1488- processed_args .append ('-O' )
1489- processed_args .append (arg [2 :])
1484+ elif arg .startswith ('-O' ) and arg != '-O' and arg [2 ] != '.' :
1485+ # allow -O flag to be used without space, e.g. -O3 or -Odecode
1486+ # -O.<...> handled later
1487+ # also handle -O=<level> here
1488+ level = arg [3 :] if arg [2 ] == '=' else arg [2 :]
1489+ processed_args .append (f'-O.level={ level } ' )
1490+ elif arg == '-O' and i + 1 < len (args ) and args [i + 1 ] in {
1491+ "0" , "1" , "2" , "3"
1492+ }:
1493+ # Convert -O <n> to -O.level <n>
1494+ processed_args .append ('-O.level' )
14901495 else :
14911496 processed_args .append (arg )
14921497
@@ -1504,27 +1509,44 @@ def create_nested_dict(keys: list[str], value: str) -> dict[str, Any]:
15041509 def recursive_dict_update (
15051510 original : dict [str , Any ],
15061511 update : dict [str , Any ],
1507- ):
1508- """Recursively updates a dictionary with another dictionary."""
1512+ ) -> set [str ]:
1513+ """Recursively updates a dictionary with another dictionary.
1514+ Returns a set of duplicate keys that were overwritten.
1515+ """
1516+ duplicates = set [str ]()
15091517 for k , v in update .items ():
15101518 if isinstance (v , dict ) and isinstance (original .get (k ), dict ):
1511- recursive_dict_update (original [k ], v )
1519+ nested_duplicates = recursive_dict_update (original [k ], v )
1520+ duplicates |= {f"{ k } .{ d } " for d in nested_duplicates }
1521+ elif isinstance (v , list ) and isinstance (original .get (k ), list ):
1522+ original [k ] += v
15121523 else :
1524+ if k in original :
1525+ duplicates .add (k )
15131526 original [k ] = v
1527+ return duplicates
15141528
15151529 delete = set [int ]()
15161530 dict_args = defaultdict [str , dict [str , Any ]](dict )
1531+ duplicates = set [str ]()
15171532 for i , processed_arg in enumerate (processed_args ):
1518- if processed_arg .startswith ("--" ) and "." in processed_arg :
1533+ if i in delete : # skip if value from previous arg
1534+ continue
1535+
1536+ if processed_arg .startswith ("-" ) and "." in processed_arg :
15191537 if "=" in processed_arg :
15201538 processed_arg , value_str = processed_arg .split ("=" , 1 )
15211539 if "." not in processed_arg :
1522- # False positive, . was only in the value
1540+ # False positive, '.' was only in the value
15231541 continue
15241542 else :
15251543 value_str = processed_args [i + 1 ]
15261544 delete .add (i + 1 )
15271545
1546+ if processed_arg .endswith ("+" ):
1547+ processed_arg = processed_arg [:- 1 ]
1548+ value_str = json .dumps (list (value_str .split ("," )))
1549+
15281550 key , * keys = processed_arg .split ("." )
15291551 try :
15301552 value = json .loads (value_str )
@@ -1533,12 +1555,17 @@ def recursive_dict_update(
15331555
15341556 # Merge all values with the same key into a single dict
15351557 arg_dict = create_nested_dict (keys , value )
1536- recursive_dict_update (dict_args [key ], arg_dict )
1558+ arg_duplicates = recursive_dict_update (dict_args [key ],
1559+ arg_dict )
1560+ duplicates |= {f'{ key } .{ d } ' for d in arg_duplicates }
15371561 delete .add (i )
15381562 # Filter out the dict args we set to None
15391563 processed_args = [
15401564 a for i , a in enumerate (processed_args ) if i not in delete
15411565 ]
1566+ if duplicates :
1567+ logger .warning ("Found duplicate keys %s" , ", " .join (duplicates ))
1568+
15421569 # Add the dict args back as if they were originally passed as JSON
15431570 for dict_arg , dict_value in dict_args .items ():
15441571 processed_args .append (dict_arg )
@@ -2405,7 +2432,7 @@ def memory_profiling(
24052432 The increase of `torch.cuda.memory_stats()["allocated_bytes.all.peak"]` during profiling gives (b.).
24062433
24072434 The increase of `non_torch_memory` from creating the current vLLM instance until after profiling to get (c.).
2408- """ # noqa
2435+ """ # noqa
24092436 gc .collect ()
24102437 torch .cuda .empty_cache ()
24112438 torch .cuda .reset_peak_memory_stats ()
0 commit comments