diff --git a/src/transformers/generation_flax_utils.py b/src/transformers/generation_flax_utils.py index 2f80c7fcf27e..cd3e4bf7ba09 100644 --- a/src/transformers/generation_flax_utils.py +++ b/src/transformers/generation_flax_utils.py @@ -208,7 +208,6 @@ def generate( post](https://huggingface.co/blog/how-to-generate). Parameters: - input_ids (`jnp.ndarray` of shape `(batch_size, sequence_length)`): The sequence used as a prompt for the generation. max_length (`int`, *optional*, defaults to `model.config.max_length`): @@ -217,23 +216,38 @@ def generate( the prompt. max_new_tokens (`int`, *optional*): The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt. - do_sample (`bool`, *optional*, defaults to `False`): - Whether or not to use sampling ; use greedy decoding otherwise. - temperature (`float`, *optional*, defaults to 1.0): - The value used to module the next token probabilities. - top_k (`int`, *optional*, defaults to 50): + min_length (`int`, *optional*, defaults to `model.config.min_length` or 10 if the config does not set any + value): The minimum length of the sequence to be generated + do_sample (`bool`, *optional*, defaults to `model.config.do_sample` or `False` if the config does not set + any value): Whether or not to use sampling ; use greedy decoding otherwise. + temperature (`float`, *optional*, defaults to `model.config.temperature` or 1.0 if the config does not set + any value): The value used to module the next token probabilities. + top_k (`int`, *optional*, defaults to `model.config.top_k` or 50 if the config does not set any value): The number of highest probability vocabulary tokens to keep for top-k-filtering. - top_p (`float`, *optional*, defaults to 1.0): + top_p (`float`, *optional*, defaults to `model.config.top_p` or 1.0 if the config does not set any value): If set to float < 1, only the most probable tokens with probabilities that add up to `top_p` or higher are kept for generation. - pad_token_id (`int`, *optional*): + pad_token_id (`int`, *optional*, defaults to `model.config.pad_token_id`): The id of the *padding* token. - bos_token_id (`int`, *optional*): + bos_token_id (`int`, *optional*, defaults to `model.config.bos_token_id`): The id of the *beginning-of-sequence* token. - eos_token_id (`int`, *optional*): + eos_token_id (`int`, *optional*, defaults to `model.config.eos_token_id`): The id of the *end-of-sequence* token. - num_beams (`int`, *optional*, defaults to 1): - Number of beams for beam search. 1 means no beam search. + length_penalty (`float`, *optional*, defaults to `model.config.length_penalty` or 1.0 if the config does + not set any value): + Exponential penalty to the length. 1.0 means that the beam score is penalized by the sequence length. + 0.0 means no penalty. Set to values < 0.0 in order to encourage the model to generate longer + sequences, to a value > 0.0 in order to encourage the model to produce shorter sequences. + no_repeat_ngram_size (`int`, *optional*, defaults to `model.config.no_repeat_ngram_size` or 0 if the config: + does not set any value): If set to int > 0, all ngrams of that size can only occur once. + num_beams (`int`, *optional*, defaults to `model.config.num_beams` or 1 if the config does not set any + value): Number of beams for beam search. 1 means no beam search. + forced_bos_token_id (`int`, *optional*, defaults to `model.config.forced_bos_token_id`): + The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful + for multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be + the target language token. + forced_eos_token_id (`int`, *optional*, defaults to `model.config.forced_eos_token_id`): + The id of the token to force as the last generated token when `max_length` is reached. decoder_start_token_id (`int`, *optional*): If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token. trace (`bool`, *optional*, defaults to `True`): diff --git a/src/transformers/generation_tf_utils.py b/src/transformers/generation_tf_utils.py index ec9a61e90099..343b9c76659a 100644 --- a/src/transformers/generation_tf_utils.py +++ b/src/transformers/generation_tf_utils.py @@ -418,9 +418,7 @@ def generate( post](https://huggingface.co/blog/how-to-generate). Parameters: - - input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`, `(batch_size, sequence_length, - feature_dim)` or `(batch_size, num_channels, height, width)`, *optional*): + input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`, `(batch_size, sequence_length, feature_dim)` or `(batch_size, num_channels, height, width)`, *optional*): The sequence used as a prompt for the generation or as model inputs to the encoder. If `None` the method initializes it with `bos_token_id` and a batch size of 1. For decoder-only models `inputs` should of in the format of `input_ids`. For encoder-decoder models *inputs* can represent any of @@ -431,42 +429,43 @@ def generate( the prompt. max_new_tokens (`int`, *optional*): The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt. - min_length (`int`, *optional*, defaults to 10): + min_length (`int`, *optional*, defaults to `model.config.min_length` or 10 if the config does not set any value): The minimum length of the sequence to be generated. - do_sample (`bool`, *optional*, defaults to `False`): + do_sample (`bool`, *optional*, defaults to `model.config.do_sample` or `False` if the config does not set any value): Whether or not to use sampling ; use greedy decoding otherwise. early_stopping (`bool`, *optional*, defaults to `False`): Whether to stop the beam search when at least `num_beams` sentences are finished per batch or not. - num_beams (`int`, *optional*, defaults to 1): - Number of beams for beam search. 1 means no beam search. - temperature (`float`, *optional*, defaults to 1.0): - The value used to module the next token probabilities. - top_k (`int`, *optional*, defaults to 50): + num_beams (`int`, *optional*, defaults to `model.config.num_beams` or 1 if the config does not set any + value): Number of beams for beam search. 1 means no beam search. + temperature (`float`, *optional*, defaults to `model.config.temperature` or 1.0 if the config does not set + any value): The value used to module the next token probabilities. + top_k (`int`, *optional*, defaults to `model.config.top_k` or 50 if the config does not set any value): The number of highest probability vocabulary tokens to keep for top-k-filtering. - top_p (`float`, *optional*, defaults to 1.0): + top_p (`float`, *optional*, defaults to `model.config.top_p` or 1.0 if the config does not set any value): If set to float < 1, only the most probable tokens with probabilities that add up to `top_p` or higher are kept for generation. - repetition_penalty (`float`, *optional*, defaults to 1.0): + repetition_penalty (`float`, *optional*, defaults to `model.config.repetition_penalty` or 1.0 if the config does not set any value): The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. - pad_token_id (`int`, *optional*): + pad_token_id (`int`, *optional*, defaults to `model.config.pad_token_id`): The id of the *padding* token. - bos_token_id (`int`, *optional*): + bos_token_id (`int`, *optional*, defaults to `model.config.bos_token_id`): The id of the *beginning-of-sequence* token. - eos_token_id (`int`, *optional*): + eos_token_id (`int`, *optional*, defaults to `model.config.eos_token_id`): The id of the *end-of-sequence* token. - length_penalty (`float`, *optional*, defaults to 1.0): + length_penalty (`float`, *optional*, defaults to `model.config.length_penalty` or 1.0 if the config does not set any value): Exponential penalty to the length. 1.0 means no penalty. Set to values < 1.0 in order to encourage the model to generate shorter sequences, to a value > 1.0 in order to encourage the model to produce longer sequences. - no_repeat_ngram_size (`int`, *optional*, defaults to 0): + no_repeat_ngram_size (`int`, *optional*, defaults to `model.config.no_repeat_ngram_size` or 0 if the config does not set any value): If set to int > 0, all ngrams of that size can only occur once. - bad_words_ids(`List[int]`, *optional*): + bad_words_ids(`List[int]`, *optional*, defaults to `model.config.bad_words_ids`): List of token ids that are not allowed to be generated. In order to get the tokens of the words that should not appear in the generated text, use `tokenizer.encode(bad_word, add_prefix_space=True)`. - num_return_sequences(`int`, *optional*, defaults to 1): - The number of independently computed returned sequences for each element in the batch. + num_return_sequences(`int`, *optional*, defaults to `model.config.num_return_sequences` or 1 if the config does not set any value): + The number of independently computed returned sequences for each element in + the batch. attention_mask (`tf.Tensor` of `dtype=tf.int32` and shape `(batch_size, sequence_length)`, *optional*): Mask to avoid performing attention on padding token indices. Mask values are in `[0, 1]`, 1 for tokens that are not masked, and 0 for masked tokens. @@ -479,21 +478,23 @@ def generate( use_cache: (`bool`, *optional*, defaults to `True`): Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. - output_attentions (`bool`, *optional*, defaults to `False`): - Whether or not to return the attentions tensors of all attention layers. See `attentions` under - returned tensors for more details. - output_hidden_states (`bool`, *optional*, defaults to `False`): - Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors + output_attentions (`bool`, *optional*, defaults to `model.config.output_attentions` or `False` if the config does not set any value): + Whether or not to return the attentions tensors of all attention + layers. See `attentions` under returned tensors for more details. + output_hidden_states (`bool`, *optional*, defaults to `model.config.output_hidden_states` or `False` if the config does not set any value): + Whether or not to return the hidden states of all layers. See + `hidden_states` under returned tensors for more details. + output_scores (`bool`, *optional*, defaults to `model.config.output_scores` or `False` if the config does not set any value): + Whether or not to return the prediction scores. See `scores` under returned tensors for more details. - output_scores (`bool`, *optional*, defaults to `False`): - Whether or not to return the prediction scores. See `scores` under returned tensors for more details. - return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. - forced_bos_token_id (`int`, *optional*): + return_dict_in_generate (`bool`, *optional*, defaults to `model.config.return_dict_in_generate` or `False` if the config does not set any value): + Whether or not to return a [`~utils.ModelOutput`] instead of a + plain tuple. + forced_bos_token_id (`int`, *optional*, defaults to `model.config.forced_bos_token_id`): The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target language token. - forced_eos_token_id (`int`, *optional*): + forced_eos_token_id (`int`, *optional*, defaults to `model.config.forced_eos_token_id`): The id of the token to force as the last generated token when `max_length` is reached. model_specific_kwargs: Additional model specific kwargs will be forwarded to the `forward` function of the model. @@ -791,7 +792,6 @@ def generate( attention_mask = tf.gather(attention_mask, expanded_batch_idxs, axis=0) if self.config.is_encoder_decoder: - # create empty decoder_input_ids input_ids = ( tf.ones( @@ -1071,7 +1071,6 @@ def _generate_beam_search( # for each sentence for batch_idx in range(batch_size): - # if we are done with this sentence if done[batch_idx]: assert ( @@ -1336,7 +1335,6 @@ def _generate( post](https://huggingface.co/blog/how-to-generate). Parameters: - input_ids (`tf.Tensor` of `dtype=tf.int32` and shape `(batch_size, sequence_length)`, *optional*): The sequence used as a prompt for the generation. If `None` the method initializes it with `bos_token_id` and a batch size of 1. @@ -1749,7 +1747,6 @@ def _prepare_decoder_input_ids_for_generation( bos_token_id: int = None, model_kwargs: Optional[Dict[str, tf.Tensor]] = None, ) -> tf.Tensor: - # prepare `input_ids` for decoder if model is encoder-decoder if model_kwargs is not None and "decoder_input_ids" in model_kwargs: return model_kwargs.pop("decoder_input_ids") @@ -2069,7 +2066,6 @@ def greedy_search( Generates sequences for models with a language modeling head using greedy decoding. Parameters: - input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`): The sequence used as a prompt for the generation. logits_processor (`TFLogitsProcessorList`, *optional*): @@ -2322,7 +2318,6 @@ def sample( Generates sequences for models with a language modeling head using multinomial sampling. Parameters: - input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`): The sequence used as a prompt for the generation. logits_processor (`TFLogitsProcessorList`, *optional*): @@ -2599,7 +2594,6 @@ def beam_search( Generates sequences for models with a language modeling head using beam search with multinomial sampling. Parameters: - input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`): The sequence used as a prompt for the generation. max_length (`int`, *optional*, defaults to 20): diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py index bb9330de37f0..df10b125ce56 100644 --- a/src/transformers/generation_utils.py +++ b/src/transformers/generation_utils.py @@ -534,7 +534,6 @@ def _prepare_decoder_input_ids_for_generation( model_kwargs: Optional[Dict[str, torch.Tensor]] = None, device: torch.device = None, ) -> torch.LongTensor: - if model_kwargs is not None and "decoder_input_ids" in model_kwargs: return model_kwargs.pop("decoder_input_ids") else: @@ -928,26 +927,27 @@ def generate( the prompt. max_new_tokens (`int`, *optional*): The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt. - min_length (`int`, *optional*, defaults to `model.config.min_length` or 10 if the config does not set any value): - The minimum length of the sequence to be generated. - do_sample (`bool`, *optional*, defaults to `model.config.do_sample` or `False` if the config does not set any value): - Whether or not to use sampling ; use greedy decoding otherwise. + min_length (`int`, *optional*, defaults to `model.config.min_length` or 10 if the config does not set any + value): The minimum length of the sequence to be generated. + do_sample (`bool`, *optional*, defaults to `model.config.do_sample` or `False` if the config does not set + any value): Whether or not to use sampling ; use greedy decoding otherwise. early_stopping (`bool`, *optional*, defaults to `False`): Whether to stop the beam search when at least `num_beams` sentences are finished per batch or not. - num_beams (`int`, *optional*, defaults to `model.config.num_beams` or 1 if the config does not set any value): - Number of beams for beam search. 1 means no beam search. - temperature (`float`, *optional*, defaults to `model.config.temperature` or 1.0 if the config does not set any value): - The value used to module the next token probabilities. + num_beams (`int`, *optional*, defaults to `model.config.num_beams` or 1 if the config does not set any + value): Number of beams for beam search. 1 means no beam search. + temperature (`float`, *optional*, defaults to `model.config.temperature` or 1.0 if the config does not set + any value): The value used to module the next token probabilities. top_k (`int`, *optional*, defaults to `model.config.top_k` or 50 if the config does not set any value): The number of highest probability vocabulary tokens to keep for top-k-filtering. top_p (`float`, *optional*, defaults to `model.config.top_p` or 1.0 if the config does not set any value): If set to float < 1, only the most probable tokens with probabilities that add up to `top_p` or higher are kept for generation. - typical_p (`float`, *optional*, defaults to `model.config.typical_p` or 1.0 if the config does not set any value): - The amount of probability mass from the original distribution to be considered in typical decoding. If - set to 1.0 it takes no effect. See [this paper](https://arxiv.org/pdf/2202.00666.pdf) for more details. - repetition_penalty (`float`, *optional*, defaults to `model.config.repetition_penalty` or 1.0 if the config does not set any value): - The parameter for repetition penalty. 1.0 means no penalty. See [this + typical_p (`float`, *optional*, defaults to `model.config.typical_p` or 1.0 if the config does not set any + value): The amount of probability mass from the original distribution to be considered in typical + decoding. If set to 1.0 it takes no effect. See [this paper](https://arxiv.org/pdf/2202.00666.pdf) for + more details. + repetition_penalty (`float`, *optional*, defaults to `model.config.repetition_penalty` or 1.0 if the config: + does not set any value): The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. pad_token_id (`int`, *optional*, defaults to `model.config.pad_token_id`): The id of the *padding* token. @@ -955,15 +955,16 @@ def generate( The id of the *beginning-of-sequence* token. eos_token_id (`int`, *optional*, defaults to `model.config.eos_token_id`): The id of the *end-of-sequence* token. - length_penalty (`float`, *optional*, defaults to `model.config.length_penalty` or 1.0 if the config does not set any value): + length_penalty (`float`, *optional*, defaults to `model.config.length_penalty` or 1.0 if the config does + not set any value): Exponential penalty to the length. 1.0 means that the beam score is penalized by the sequence length. 0.0 means no penalty. Set to values < 0.0 in order to encourage the model to generate longer sequences, to a value > 0.0 in order to encourage the model to produce shorter sequences. - no_repeat_ngram_size (`int`, *optional*, defaults to `model.config.no_repeat_ngram_size` or 0 if the config does not set any value): - If set to int > 0, all ngrams of that size can only occur once. - encoder_no_repeat_ngram_size (`int`, *optional*, defaults to `model.config.encoder_no_repeat_ngram_size` or 0 if the config does not set any value): - If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the - `decoder_input_ids`. + no_repeat_ngram_size (`int`, *optional*, defaults to `model.config.no_repeat_ngram_size` or 0 if the config: + does not set any value): If set to int > 0, all ngrams of that size can only occur once. + encoder_no_repeat_ngram_size (`int`, *optional*, defaults to `model.config.encoder_no_repeat_ngram_size` + or 0 if the config does not set any value): If set to int > 0, all ngrams of that size that occur in + the `encoder_input_ids` cannot occur in the `decoder_input_ids`. bad_words_ids(`List[List[int]]`, *optional*, defaults to `model.config.bad_words_ids`): List of token ids that are not allowed to be generated. In order to get the token ids of the words that should not appear in the generated text, use `tokenizer(bad_words, add_prefix_space=True, @@ -973,8 +974,9 @@ def generate( list of words that must be included, the opposite to `bad_words_ids`. If given `List[List[List[int]]]`, this triggers a [disjunctive constraint](https://github.com/huggingface/transformers/issues/14081), where one can allow different forms of each word. - num_return_sequences(`int`, *optional*, defaults to `model.config.num_return_sequences` or 1 if the config does not set any value): - The number of independently computed returned sequences for each element in the batch. + num_return_sequences(`int`, *optional*, defaults to `model.config.num_return_sequences` or 1 if the config + does not set any value): The number of independently computed returned sequences for each element in + the batch. max_time(`float`, *optional*): The maximum amount of time you allow the computation to run for in seconds. generation will still finish the current pass after allocated time has been passed. @@ -987,13 +989,13 @@ def generate( use_cache: (`bool`, *optional*, defaults to `True`): Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. - num_beam_groups (`int`, *optional*, defaults to `model.config.num_beam_groups` or 1 if the config does not set any value): - Number of groups to divide `num_beams` into in order to ensure diversity among different groups of - beams. [this paper](https://arxiv.org/pdf/1610.02424.pdf) for more details. - diversity_penalty (`float`, *optional*, defaults to `model.config.diversity_penalty` or 0.0 if the config does not set any value): - This value is subtracted from a beam's score if it generates a token same as any beam from other group - at a particular time. Note that `diversity_penalty` is only effective if `group beam search` is - enabled. + num_beam_groups (`int`, *optional*, defaults to `model.config.num_beam_groups` or 1 if the config does not + set any value): Number of groups to divide `num_beams` into in order to ensure diversity among + different groups of beams. [this paper](https://arxiv.org/pdf/1610.02424.pdf) for more details. + diversity_penalty (`float`, *optional*, defaults to `model.config.diversity_penalty` or 0.0 if the config + does not set any value): This value is subtracted from a beam's score if it generates a token same as + any beam from other group at a particular time. Note that `diversity_penalty` is only effective if + `group beam search` is enabled. prefix_allowed_tokens_fn (`Callable[[int, torch.Tensor], List[int]]`, *optional*): If provided, this function constraints the beam search to allowed tokens only at each step. If not provided no constraint is applied. This function takes 2 arguments: the batch ID `batch_id` and @@ -1016,16 +1018,18 @@ def generate( constraints (`List[Constraint]`, *optional*): Custom constraints that can be added to the generation to ensure that the output will contain the use of certain tokens as defined by `Constraint` objects, in the most sensible way possible. - output_attentions (`bool`, *optional*, defaults to `model.config.output_attentions` or `False` if the config does not set any value): - Whether or not to return the attentions tensors of all attention layers. See `attentions` under - returned tensors for more details. - output_hidden_states (`bool`, *optional*, defaults to `model.config.output_hidden_states` or `False` if the config does not set any value): - Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors + output_attentions (`bool`, *optional*, defaults to `model.config.output_attentions` or `False` if the + config does not set any value): Whether or not to return the attentions tensors of all attention + layers. See `attentions` under returned tensors for more details. + output_hidden_states (`bool`, *optional*, defaults to `model.config.output_hidden_states` or `False` if the: + config does not set any value): Whether or not to return the hidden states of all layers. See + `hidden_states` under returned tensors for more details. + output_scores (`bool`, *optional*, defaults to `model.config.output_scores` or `False` if the config does + not set any value): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. - output_scores (`bool`, *optional*, defaults to `model.config.output_scores` or `False` if the config does not set any value): - Whether or not to return the prediction scores. See `scores` under returned tensors for more details. - return_dict_in_generate (`bool`, *optional*, defaults to `model.config.return_dict_in_generate` or `False` if the config does not set any value): - Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. + return_dict_in_generate (`bool`, *optional*, defaults to `model.config.return_dict_in_generate` or `False` + if the config does not set any value): Whether or not to return a [`~utils.ModelOutput`] instead of a + plain tuple. forced_bos_token_id (`int`, *optional*, defaults to `model.config.forced_bos_token_id`): The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be @@ -1037,10 +1041,11 @@ def generate( crash. Note that using `remove_invalid_values` can slow down generation. synced_gpus (`bool`, *optional*, defaults to `False`): Whether to continue running the while loop until max_length (needed for ZeRO stage 3) - exponential_decay_length_penalty (`tuple(int, float)`, *optional*, defaults to `model.config.exponential_decay_length_penalty`): - This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been - generated. The tuple shall consist of: `(start_index, decay_factor)` where `start_index` indicates - where penalty starts and `decay_factor` represents the factor of exponential decay + exponential_decay_length_penalty (`tuple(int, float)`, *optional*, + defaults to `model.config.exponential_decay_length_penalty`): This Tuple adds an exponentially + increasing length penalty, after a certain amount of tokens have been generated. The tuple shall + consist of: `(start_index, decay_factor)` where `start_index` indicates where penalty starts and + `decay_factor` represents the factor of exponential decay model_kwargs: Additional model specific kwargs will be forwarded to the `forward` function of the model. If the model @@ -1555,7 +1560,6 @@ def greedy_search( used for text-decoder, text-to-text, speech-to-text, and vision-to-text models. Parameters: - input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): The sequence used as a prompt for the generation. logits_processor (`LogitsProcessorList`, *optional*): @@ -1671,7 +1675,6 @@ def greedy_search( this_peer_finished = False # used by synced_gpus only while True: - if synced_gpus: # Under synced_gpus the `forward` call must continue until all gpus complete their sequence. # The following logic allows an early break if all peers finished generating their sequence @@ -1789,7 +1792,6 @@ def sample( can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models. Parameters: - input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): The sequence used as a prompt for the generation. logits_processor (`LogitsProcessorList`, *optional*): @@ -1926,7 +1928,6 @@ def sample( this_peer_finished = False # used by synced_gpus only # auto-regressive generation while True: - if synced_gpus: # Under synced_gpus the `forward` call must continue until all gpus complete their sequence. # The following logic allows an early break if all peers finished generating their sequence @@ -2046,7 +2047,6 @@ def beam_search( can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models. Parameters: - input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): The sequence used as a prompt for the generation. beam_scorer (`BeamScorer`): @@ -2195,7 +2195,6 @@ def beam_search( this_peer_finished = False # used by synced_gpus only while True: - if synced_gpus: # Under synced_gpus the `forward` call must continue until all gpus complete their sequence. # The following logic allows an early break if all peers finished generating their sequence @@ -2355,7 +2354,6 @@ def beam_sample( sampling** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models. Parameters: - input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): The sequence used as a prompt for the generation. beam_scorer (`BeamScorer`): @@ -2508,7 +2506,6 @@ def beam_sample( this_peer_finished = False # used by synced_gpus only while True: - if synced_gpus: # Under synced_gpus the `forward` call must continue until all gpus complete their sequence. # The following logic allows an early break if all peers finished generating their sequence @@ -2672,7 +2669,6 @@ def group_beam_search( decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models. Parameters: - input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): The sequence used as a prompt for the generation. beam_scorer (`BeamScorer`): @@ -2832,7 +2828,6 @@ def group_beam_search( this_peer_finished = False # used by synced_gpus only while True: - if synced_gpus: # Under synced_gpus the `forward` call must continue until all gpus complete their sequence. # The following logic allows an early break if all peers finished generating their sequence @@ -3030,7 +3025,6 @@ def constrained_beam_search( synced_gpus: Optional[bool] = None, **model_kwargs, ) -> Union[BeamSearchOutput, torch.LongTensor]: - r""" Generates sequences of token ids for models with a language modeling head using **constrained beam search decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models. @@ -3192,7 +3186,6 @@ def constrained_beam_search( this_peer_finished = False # used by synced_gpus only while True: - if synced_gpus: # Under synced_gpus the `forward` call must continue until all gpus complete their sequence. # The following logic allows an early break if all peers finished generating their sequence