@@ -89,16 +89,16 @@ def convert_config(cls, config: dict, target: Literal["prefill", "decode"]) -> d
8989 if target == "prefill" :
9090 # convert prefill worker into decode worker
9191 config ["spec" ]["services" ][
92- WORKER_COMPONENT_NAMES ["vllm" ].decode_worker
92+ WORKER_COMPONENT_NAMES ["vllm" ].decode_worker_k8s_name
9393 ] = config ["spec" ]["services" ][
94- WORKER_COMPONENT_NAMES ["vllm" ].prefill_worker
94+ WORKER_COMPONENT_NAMES ["vllm" ].prefill_worker_k8s_name
9595 ]
9696 del config ["spec" ]["services" ][
97- WORKER_COMPONENT_NAMES ["vllm" ].prefill_worker
97+ WORKER_COMPONENT_NAMES ["vllm" ].prefill_worker_k8s_name
9898 ]
9999
100100 args = config ["spec" ]["services" ][
101- WORKER_COMPONENT_NAMES ["vllm" ].decode_worker
101+ WORKER_COMPONENT_NAMES ["vllm" ].decode_worker_k8s_name
102102 ]["extraPodSpec" ]["mainContainer" ]["args" ]
103103
104104 args = break_arguments (args )
@@ -112,18 +112,18 @@ def convert_config(cls, config: dict, target: Literal["prefill", "decode"]) -> d
112112 if "--no-enable-prefix-caching" not in args :
113113 args = append_argument (args , "--no-enable-prefix-caching" )
114114
115- config ["spec" ]["services" ][WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker ][
116- "extraPodSpec"
117- ]["mainContainer" ]["args" ] = join_arguments (args )
115+ config ["spec" ]["services" ][
116+ WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker_k8s_name
117+ ]["extraPodSpec" ][ " mainContainer" ]["args" ] = join_arguments (args )
118118
119119 elif target == "decode" :
120120 # delete prefill worker
121121 del config ["spec" ]["services" ][
122- WORKER_COMPONENT_NAMES ["vllm" ].prefill_worker
122+ WORKER_COMPONENT_NAMES ["vllm" ].prefill_worker_k8s_name
123123 ]
124124
125125 args = config ["spec" ]["services" ][
126- WORKER_COMPONENT_NAMES ["vllm" ].decode_worker
126+ WORKER_COMPONENT_NAMES ["vllm" ].decode_worker_k8s_name
127127 ]["extraPodSpec" ]["mainContainer" ]["args" ]
128128
129129 args = break_arguments (args )
@@ -134,13 +134,13 @@ def convert_config(cls, config: dict, target: Literal["prefill", "decode"]) -> d
134134 if "--no-enable-prefix-caching" in args :
135135 args .remove ("--no-enable-prefix-caching" )
136136
137- config ["spec" ]["services" ][WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker ][
138- "extraPodSpec"
139- ]["mainContainer" ]["args" ] = join_arguments (args )
137+ config ["spec" ]["services" ][
138+ WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker_k8s_name
139+ ]["extraPodSpec" ][ " mainContainer" ]["args" ] = join_arguments (args )
140140
141141 # set num workers to 1
142142 decode_worker_config = config ["spec" ]["services" ][
143- WORKER_COMPONENT_NAMES ["vllm" ].decode_worker
143+ WORKER_COMPONENT_NAMES ["vllm" ].decode_worker_k8s_name
144144 ]
145145 decode_worker_config ["replicas" ] = 1
146146
@@ -150,16 +150,16 @@ def convert_config(cls, config: dict, target: Literal["prefill", "decode"]) -> d
150150 def set_config_tp_size (cls , config : dict , tp_size : int ):
151151 config = deepcopy (config )
152152
153- config ["spec" ]["services" ][WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker ][
154- "resources"
155- ]["requests" ]["gpu" ] = str (tp_size )
156- config ["spec" ]["services" ][WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker ][
157- "resources"
158- ]["limits" ]["gpu" ] = str (tp_size )
153+ config ["spec" ]["services" ][
154+ WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker_k8s_name
155+ ]["resources" ][ " requests" ]["gpu" ] = str (tp_size )
156+ config ["spec" ]["services" ][
157+ WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker_k8s_name
158+ ]["resources" ][ " limits" ]["gpu" ] = str (tp_size )
159159
160- args = config ["spec" ]["services" ][WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker ][
161- "extraPodSpec"
162- ]["mainContainer" ]["args" ]
160+ args = config ["spec" ]["services" ][
161+ WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker_k8s_name
162+ ]["extraPodSpec" ][ " mainContainer" ]["args" ]
163163
164164 args = break_arguments (args )
165165
@@ -169,15 +169,15 @@ def set_config_tp_size(cls, config: dict, tp_size: int):
169169 except ValueError :
170170 args = append_argument (args , ["--tensor-parallel-size" , str (tp_size )])
171171
172- config ["spec" ]["services" ][WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker ][
173- "extraPodSpec"
174- ]["mainContainer" ]["args" ] = join_arguments (args )
172+ config ["spec" ]["services" ][
173+ WORKER_COMPONENT_NAMES [ "vllm" ]. decode_worker_k8s_name
174+ ]["extraPodSpec" ][ " mainContainer" ]["args" ] = join_arguments (args )
175175
176176 return config
177177
178178 @classmethod
179179 def get_model_name (cls , config : dict ) -> str :
180- worker_name = WORKER_COMPONENT_NAMES ["vllm" ].decode_worker
180+ worker_name = WORKER_COMPONENT_NAMES ["vllm" ].decode_worker_k8s_name
181181 args = config ["spec" ]["services" ][worker_name ]["extraPodSpec" ]["mainContainer" ][
182182 "args"
183183 ]
0 commit comments