huggingface · gante · Mar 15, 2024 · Mar 12, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py
@@ -456,10 +456,7 @@ def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None):
                 processed["stride"] = stride
             yield {"is_last": True, **processed, **extra}
 
-    def _forward(self, model_inputs, return_timestamps=False, generate_kwargs=None):
-        if generate_kwargs is None:
-            generate_kwargs = {}
-
+    def _forward(self, model_inputs, return_timestamps=False, **generate_kwargs):
         attention_mask = model_inputs.pop("attention_mask", None)
         stride = model_inputs.pop("stride", None)
         is_last = model_inputs.pop("is_last")

diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py
@@ -1091,6 +1091,16 @@ def get_inference_context(self):
         return torch.no_grad
 
     def forward(self, model_inputs, **forward_params):
+        # Pipelines calling `generate`: if the tokenizer has a pad token but the model doesn't, set it in the
+        # forward params.
+        if (
+            self.tokenizer is not None
+            and self.model.can_generate()
+            and self.tokenizer.pad_token_id is not None
+            and self.model.generation_config.pad_token_id is None
+        ):
+            forward_params["pad_token_id"] = self.tokenizer.pad_token_id
+
         with self.device_placement():
             if self.framework == "tf":
                 model_inputs["training"] = False

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
@@ -196,9 +196,7 @@ def new_user_input(self):
     build_pipeline_init_args(has_tokenizer=True),
     r"""
         min_length_for_response (`int`, *optional*, defaults to 32):
-            The minimum length (in number of tokens) for a response.
-        minimum_tokens (`int`, *optional*, defaults to 10):
-            The minimum length of tokens to leave for a response.""",
+            The minimum length (in number of tokens) for a response.""",
 )
 class ConversationalPipeline(Pipeline):
     """
@@ -241,17 +239,13 @@ def __init__(self, *args, **kwargs):
         if self.tokenizer.pad_token_id is None:
             self.tokenizer.pad_token = self.tokenizer.eos_token
 
-    def _sanitize_parameters(
-        self, min_length_for_response=None, minimum_tokens=None, clean_up_tokenization_spaces=None, **generate_kwargs
-    ):
+    def _sanitize_parameters(self, min_length_for_response=None, clean_up_tokenization_spaces=None, **generate_kwargs):
         preprocess_params = {}
         forward_params = {}
         postprocess_params = {}
 
         if min_length_for_response is not None:
             preprocess_params["min_length_for_response"] = min_length_for_response
-        if minimum_tokens is not None:
-            forward_params["minimum_tokens"] = minimum_tokens
 
         if "max_length" in generate_kwargs:
             forward_params["max_length"] = generate_kwargs["max_length"]
@@ -304,7 +298,7 @@ def preprocess(self, conversation: Conversation, min_length_for_response=32) ->
             input_ids = tf.constant([input_ids])
         return {"input_ids": input_ids, "conversation": conversation}
 
-    def _forward(self, model_inputs, minimum_tokens=10, **generate_kwargs):
+    def _forward(self, model_inputs, **generate_kwargs):
         n = model_inputs["input_ids"].shape[1]
         conversation = model_inputs.pop("conversation")
         if "max_length" not in generate_kwargs and "max_new_tokens" not in generate_kwargs:

diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py
@@ -419,14 +419,14 @@ def preprocess(
                     "is_last": span_idx == num_spans - 1,
                 }
 
-    def _forward(self, model_inputs):
+    def _forward(self, model_inputs, **generate_kwargs):
         p_mask = model_inputs.pop("p_mask", None)
         word_ids = model_inputs.pop("word_ids", None)
         words = model_inputs.pop("words", None)
         is_last = model_inputs.pop("is_last", False)
 
         if self.model_type == ModelType.VisionEncoderDecoder:
-            model_outputs = self.model.generate(**model_inputs)
+            model_outputs = self.model.generate(**model_inputs, **generate_kwargs)
         else:
             model_outputs = self.model(**model_inputs)
 

diff --git a/src/transformers/pipelines/image_to_text.py b/src/transformers/pipelines/image_to_text.py
@@ -164,7 +164,7 @@ def preprocess(self, image, prompt=None, timeout=None):
 
         return model_inputs
 
-    def _forward(self, model_inputs, generate_kwargs=None):
+    def _forward(self, model_inputs, **generate_kwargs):
         # Git model sets `model_inputs["input_ids"] = None` in `preprocess` (when `prompt=None`). In batch model, the
         # pipeline will group them into a list of `None`, which fail `_forward`. Avoid this by checking it first.
         if (
@@ -174,8 +174,6 @@ def _forward(self, model_inputs, generate_kwargs=None):
         ):
             model_inputs["input_ids"] = None
 
-        if generate_kwargs is None:
-            generate_kwargs = {}
         # FIXME: We need to pop here due to a difference in how `generation.py` and `generation.tf_utils.py`
         #  parse inputs. In the Tensorflow version, `generate` raises an error if we don't use `input_ids` whereas
         #  the PyTorch version matches it with `self.model.main_input_name` or `self.model.encoder.main_input_name`

diff --git a/src/transformers/pipelines/table_question_answering.py b/src/transformers/pipelines/table_question_answering.py
@@ -377,7 +377,7 @@ def preprocess(self, pipeline_input, sequential=None, padding=True, truncation=N
         inputs["table"] = table
         return inputs
 
-    def _forward(self, model_inputs, sequential=False):
+    def _forward(self, model_inputs, sequential=False, **generate_kwargs):
         table = model_inputs.pop("table")
 
         if self.type == "tapas":
@@ -386,7 +386,7 @@ def _forward(self, model_inputs, sequential=False):
             else:
                 outputs = self.batch_inference(**model_inputs)
         else:
-            outputs = self.model.generate(**model_inputs)
+            outputs = self.model.generate(**model_inputs, **generate_kwargs)
         model_outputs = {"model_inputs": model_inputs, "table": table, "outputs": outputs}
         return model_outputs