diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py index ee976e9ece0a6c..f2d0f136790922 100644 --- a/src/transformers/pipelines/automatic_speech_recognition.py +++ b/src/transformers/pipelines/automatic_speech_recognition.py @@ -311,14 +311,14 @@ def _sanitize_parameters( forward_params = defaultdict(dict) if max_new_tokens is not None: - forward_params["generate_kwargs"]["max_new_tokens"] = max_new_tokens + forward_params["max_new_tokens"] = max_new_tokens if generate_kwargs is not None: if max_new_tokens is not None and "max_new_tokens" in generate_kwargs: raise ValueError( "`max_new_tokens` is defined both as an argument and inside `generate_kwargs` argument, please use" " only 1 version" ) - forward_params["generate_kwargs"].update(generate_kwargs) + forward_params.update(generate_kwargs) postprocess_params = {} if decoder_kwargs is not None: @@ -456,10 +456,7 @@ def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None): processed["stride"] = stride yield {"is_last": True, **processed, **extra} - def _forward(self, model_inputs, return_timestamps=False, generate_kwargs=None): - if generate_kwargs is None: - generate_kwargs = {} - + def _forward(self, model_inputs, return_timestamps=False, **generate_kwargs): attention_mask = model_inputs.pop("attention_mask", None) stride = model_inputs.pop("stride", None) is_last = model_inputs.pop("is_last") diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 758484107b76f2..b07db7ea64c62f 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -885,6 +885,16 @@ def __init__( self._num_workers = kwargs.pop("num_workers", None) self._preprocess_params, self._forward_params, self._postprocess_params = self._sanitize_parameters(**kwargs) + # Pipelines calling `generate`: if the tokenizer has a pad token but the model doesn't, set it in the + # forward params so that `generate` is aware of the pad token. + if ( + self.tokenizer is not None + and self.model.can_generate() + and self.tokenizer.pad_token_id is not None + and self.model.generation_config.pad_token_id is None + ): + self._forward_params["pad_token_id"] = self.tokenizer.pad_token_id + if self.image_processor is None and self.feature_extractor is not None: if isinstance(self.feature_extractor, BaseImageProcessor): # Backward compatible change, if users called diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py index 65afd6d40e0e4f..257f693c9d2ea3 100644 --- a/src/transformers/pipelines/conversational.py +++ b/src/transformers/pipelines/conversational.py @@ -196,9 +196,7 @@ def new_user_input(self): build_pipeline_init_args(has_tokenizer=True), r""" min_length_for_response (`int`, *optional*, defaults to 32): - The minimum length (in number of tokens) for a response. - minimum_tokens (`int`, *optional*, defaults to 10): - The minimum length of tokens to leave for a response.""", + The minimum length (in number of tokens) for a response.""", ) class ConversationalPipeline(Pipeline): """ @@ -241,17 +239,13 @@ def __init__(self, *args, **kwargs): if self.tokenizer.pad_token_id is None: self.tokenizer.pad_token = self.tokenizer.eos_token - def _sanitize_parameters( - self, min_length_for_response=None, minimum_tokens=None, clean_up_tokenization_spaces=None, **generate_kwargs - ): + def _sanitize_parameters(self, min_length_for_response=None, clean_up_tokenization_spaces=None, **generate_kwargs): preprocess_params = {} forward_params = {} postprocess_params = {} if min_length_for_response is not None: preprocess_params["min_length_for_response"] = min_length_for_response - if minimum_tokens is not None: - forward_params["minimum_tokens"] = minimum_tokens if "max_length" in generate_kwargs: forward_params["max_length"] = generate_kwargs["max_length"] @@ -304,7 +298,7 @@ def preprocess(self, conversation: Conversation, min_length_for_response=32) -> input_ids = tf.constant([input_ids]) return {"input_ids": input_ids, "conversation": conversation} - def _forward(self, model_inputs, minimum_tokens=10, **generate_kwargs): + def _forward(self, model_inputs, **generate_kwargs): n = model_inputs["input_ids"].shape[1] conversation = model_inputs.pop("conversation") if "max_length" not in generate_kwargs and "max_new_tokens" not in generate_kwargs: diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py index ab73aca2c19039..64714390b04f1d 100644 --- a/src/transformers/pipelines/document_question_answering.py +++ b/src/transformers/pipelines/document_question_answering.py @@ -419,14 +419,14 @@ def preprocess( "is_last": span_idx == num_spans - 1, } - def _forward(self, model_inputs): + def _forward(self, model_inputs, **generate_kwargs): p_mask = model_inputs.pop("p_mask", None) word_ids = model_inputs.pop("word_ids", None) words = model_inputs.pop("words", None) is_last = model_inputs.pop("is_last", False) if self.model_type == ModelType.VisionEncoderDecoder: - model_outputs = self.model.generate(**model_inputs) + model_outputs = self.model.generate(**model_inputs, **generate_kwargs) else: model_outputs = self.model(**model_inputs) diff --git a/src/transformers/pipelines/image_to_text.py b/src/transformers/pipelines/image_to_text.py index 26698ecf0cebc0..4a9a3744d841a0 100644 --- a/src/transformers/pipelines/image_to_text.py +++ b/src/transformers/pipelines/image_to_text.py @@ -74,7 +74,7 @@ def __init__(self, *args, **kwargs): ) def _sanitize_parameters(self, max_new_tokens=None, generate_kwargs=None, prompt=None, timeout=None): - forward_kwargs = {} + forward_params = {} preprocess_params = {} if prompt is not None: @@ -82,18 +82,17 @@ def _sanitize_parameters(self, max_new_tokens=None, generate_kwargs=None, prompt if timeout is not None: preprocess_params["timeout"] = timeout - if generate_kwargs is not None: - forward_kwargs["generate_kwargs"] = generate_kwargs if max_new_tokens is not None: - if "generate_kwargs" not in forward_kwargs: - forward_kwargs["generate_kwargs"] = {} - if "max_new_tokens" in forward_kwargs["generate_kwargs"]: + forward_params["max_new_tokens"] = max_new_tokens + if generate_kwargs is not None: + if max_new_tokens is not None and "max_new_tokens" in generate_kwargs: raise ValueError( - "'max_new_tokens' is defined twice, once in 'generate_kwargs' and once as a direct parameter," - " please use only one" + "`max_new_tokens` is defined both as an argument and inside `generate_kwargs` argument, please use" + " only 1 version" ) - forward_kwargs["generate_kwargs"]["max_new_tokens"] = max_new_tokens - return preprocess_params, forward_kwargs, {} + forward_params.update(generate_kwargs) + + return preprocess_params, forward_params, {} def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs): """ @@ -164,7 +163,7 @@ def preprocess(self, image, prompt=None, timeout=None): return model_inputs - def _forward(self, model_inputs, generate_kwargs=None): + def _forward(self, model_inputs, **generate_kwargs): # Git model sets `model_inputs["input_ids"] = None` in `preprocess` (when `prompt=None`). In batch model, the # pipeline will group them into a list of `None`, which fail `_forward`. Avoid this by checking it first. if ( @@ -174,8 +173,6 @@ def _forward(self, model_inputs, generate_kwargs=None): ): model_inputs["input_ids"] = None - if generate_kwargs is None: - generate_kwargs = {} # FIXME: We need to pop here due to a difference in how `generation.py` and `generation.tf_utils.py` # parse inputs. In the Tensorflow version, `generate` raises an error if we don't use `input_ids` whereas # the PyTorch version matches it with `self.model.main_input_name` or `self.model.encoder.main_input_name` diff --git a/src/transformers/pipelines/table_question_answering.py b/src/transformers/pipelines/table_question_answering.py index 8a6996a7187756..702a47b7c3cbed 100644 --- a/src/transformers/pipelines/table_question_answering.py +++ b/src/transformers/pipelines/table_question_answering.py @@ -376,7 +376,7 @@ def preprocess(self, pipeline_input, sequential=None, padding=True, truncation=N inputs["table"] = table return inputs - def _forward(self, model_inputs, sequential=False): + def _forward(self, model_inputs, sequential=False, **generate_kwargs): table = model_inputs.pop("table") if self.type == "tapas": @@ -385,7 +385,7 @@ def _forward(self, model_inputs, sequential=False): else: outputs = self.batch_inference(**model_inputs) else: - outputs = self.model.generate(**model_inputs) + outputs = self.model.generate(**model_inputs, **generate_kwargs) model_outputs = {"model_inputs": model_inputs, "table": table, "outputs": outputs} return model_outputs