@@ -229,7 +229,6 @@ class ChatCompletionRequest(OpenAIBaseModel):
229229 logit_bias : Optional [dict [str , float ]] = None
230230 logprobs : Optional [bool ] = False
231231 top_logprobs : Optional [int ] = 0
232- # TODO(#9845): remove max_tokens when field is removed from OpenAI API
233232 max_tokens : Optional [int ] = Field (
234233 default = None ,
235234 deprecated =
@@ -433,23 +432,10 @@ class ChatCompletionRequest(OpenAIBaseModel):
433432 }
434433
435434 def to_beam_search_params (
436- self ,
437- default_max_tokens : int ,
438- default_sampling_params : Optional [dict ] = None
439- ) -> BeamSearchParams :
440- # TODO(#9845): remove max_tokens when field is removed from OpenAI API
441- max_tokens = self .max_completion_tokens or self .max_tokens
435+ self , max_tokens : int ,
436+ default_sampling_params : dict ) -> BeamSearchParams :
442437
443- if default_sampling_params is None :
444- default_sampling_params = {}
445438 n = self .n if self .n is not None else 1
446-
447- # Use minimum of context window, user request & server limit.
448- max_tokens = min (
449- val for val in (default_max_tokens , max_tokens ,
450- default_sampling_params .get ("max_tokens" , None ))
451- if val is not None )
452-
453439 if (temperature := self .temperature ) is None :
454440 temperature = default_sampling_params .get (
455441 "temperature" , self ._DEFAULT_SAMPLING_PARAMS ["temperature" ])
@@ -465,21 +451,10 @@ def to_beam_search_params(
465451
466452 def to_sampling_params (
467453 self ,
468- default_max_tokens : int ,
454+ max_tokens : int ,
469455 logits_processor_pattern : Optional [str ],
470- default_sampling_params : Optional [ dict ] = None ,
456+ default_sampling_params : dict ,
471457 ) -> SamplingParams :
472- # TODO(#9845): remove max_tokens when field is removed from OpenAI API
473- max_tokens = self .max_completion_tokens or self .max_tokens
474-
475- if default_sampling_params is None :
476- default_sampling_params = {}
477-
478- # Use minimum of context window, user request & server limit.
479- max_tokens = min (
480- val for val in (default_max_tokens , max_tokens ,
481- default_sampling_params .get ("max_tokens" , None ))
482- if val is not None )
483458
484459 # Default parameters
485460 if (repetition_penalty := self .repetition_penalty ) is None :
@@ -898,22 +873,15 @@ class CompletionRequest(OpenAIBaseModel):
898873 }
899874
900875 def to_beam_search_params (
901- self ,
902- default_max_tokens : int ,
903- default_sampling_params : Optional [dict ] = None
876+ self ,
877+ max_tokens : int ,
878+ default_sampling_params : Optional [dict ] = None ,
904879 ) -> BeamSearchParams :
905- max_tokens = self .max_tokens
906880
907881 if default_sampling_params is None :
908882 default_sampling_params = {}
909883 n = self .n if self .n is not None else 1
910884
911- # Use minimum of context window, user request & server limit.
912- max_tokens = min (
913- val for val in (default_max_tokens , max_tokens ,
914- default_sampling_params .get ("max_tokens" , None ))
915- if val is not None )
916-
917885 if (temperature := self .temperature ) is None :
918886 temperature = default_sampling_params .get ("temperature" , 1.0 )
919887
@@ -928,21 +896,14 @@ def to_beam_search_params(
928896
929897 def to_sampling_params (
930898 self ,
931- default_max_tokens : int ,
899+ max_tokens : int ,
932900 logits_processor_pattern : Optional [str ],
933901 default_sampling_params : Optional [dict ] = None ,
934902 ) -> SamplingParams :
935- max_tokens = self .max_tokens
936903
937904 if default_sampling_params is None :
938905 default_sampling_params = {}
939906
940- # Use minimum of context window, user request & server limit.
941- max_tokens = min (
942- val for val in (default_max_tokens , max_tokens ,
943- default_sampling_params .get ("max_tokens" , None ))
944- if val is not None )
945-
946907 # Default parameters
947908 if (repetition_penalty := self .repetition_penalty ) is None :
948909 repetition_penalty = default_sampling_params .get (
@@ -1813,7 +1774,7 @@ def to_sampling_params(
18131774 self ,
18141775 default_max_tokens : int ,
18151776 default_sampling_params : Optional [dict ] = None ) -> SamplingParams :
1816- # TODO(#9845): remove max_tokens when field is removed from OpenAI API
1777+
18171778 max_tokens = default_max_tokens
18181779
18191780 if default_sampling_params is None :
@@ -2029,7 +1990,7 @@ def to_sampling_params(
20291990 self ,
20301991 default_max_tokens : int ,
20311992 default_sampling_params : Optional [dict ] = None ) -> SamplingParams :
2032- # TODO(#9845): remove max_tokens when field is removed from OpenAI API
1993+
20331994 max_tokens = default_max_tokens
20341995
20351996 if default_sampling_params is None :
0 commit comments