File tree Expand file tree Collapse file tree 4 files changed +13
-0
lines changed
_internal/serve/deployments/llm/vllm
cpu/deployments/llm/multiplex Expand file tree Collapse file tree 4 files changed +13
-0
lines changed Original file line number Diff line number Diff line change @@ -919,6 +919,9 @@ def _parse_sampling_params(
919919 frequency_penalty = sampling_params .frequency_penalty
920920 if sampling_params .frequency_penalty is not None
921921 else 0.0 ,
922+ repetition_penalty = sampling_params .repetition_penalty
923+ if sampling_params .repetition_penalty is not None
924+ else 1.0 ,
922925 temperature = sampling_params .temperature
923926 if sampling_params .temperature is not None
924927 else 1.0 ,
Original file line number Diff line number Diff line change @@ -211,11 +211,16 @@ class VLLMSamplingParams(SamplingParams):
211211 Args:
212212 top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering.
213213 seed: Seed for deterministic sampling with temperature>0.
214+ repetition_penalty: Float that penalizes new tokens based on whether they
215+ appear in the prompt and the generated text so far. Values > 1 encourage
216+ the model to use new tokens, while values < 1 encourage the model to repeat
217+ tokens.
214218 """
215219
216220 _ignored_fields = {"best_of" , "n" , "logit_bias" }
217221
218222 top_k : Optional [int ] = None
223+ repetition_penalty : Optional [float ] = None
219224 seed : Optional [int ] = None
220225
221226
Original file line number Diff line number Diff line change 7474 "ignore_eos" : None ,
7575 "presence_penalty" : None ,
7676 "frequency_penalty" : None ,
77+ "repetition_penalty" : None ,
7778 "best_of" : 1 ,
7879 "response_format" : None ,
7980 "top_k" : None ,
@@ -177,6 +178,7 @@ async def test_multiplex_deployment(
177178 "ignore_eos" : None ,
178179 "presence_penalty" : None ,
179180 "frequency_penalty" : None ,
181+ "repetition_penalty" : None ,
180182 "top_k" : None ,
181183 "response_format" : None ,
182184 "logprobs" : None ,
Original file line number Diff line number Diff line change @@ -162,6 +162,9 @@ def _parse_sampling_params(
162162 frequency_penalty = sampling_params .frequency_penalty
163163 if sampling_params .frequency_penalty is not None
164164 else 0.0 ,
165+ repetition_penalty = sampling_params .repetition_penalty
166+ if sampling_params .repetition_penalty is not None
167+ else 1.0 ,
165168 temperature = sampling_params .temperature
166169 if sampling_params .temperature is not None
167170 else 1.0 ,
You can’t perform that action at this time.
0 commit comments