From 3f3ca17b92866a01eb0157473d09c91a61d7d675 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Tue, 12 Mar 2024 15:24:03 +0000 Subject: [PATCH 1/6] fix warning --- .../pipelines/automatic_speech_recognition.py | 4 +++ src/transformers/pipelines/conversational.py | 5 ++++ .../pipelines/document_question_answering.py | 8 ++++- src/transformers/pipelines/image_to_text.py | 5 ++++ .../pipelines/table_question_answering.py | 8 ++++- .../pipelines/text2text_generation.py | 5 ++++ src/transformers/pipelines/text_generation.py | 4 +++ src/transformers/pipelines/text_to_audio.py | 4 +++ .../pipelines/visual_question_answering.py | 4 +++ .../test_pipelines_text_generation.py | 30 +++++++++++++++---- 10 files changed, 69 insertions(+), 8 deletions(-) diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py index ee976e9ece0a6c..21b948cd4e36a0 100644 --- a/src/transformers/pipelines/automatic_speech_recognition.py +++ b/src/transformers/pipelines/automatic_speech_recognition.py @@ -496,6 +496,10 @@ def _forward(self, model_inputs, return_timestamps=False, generate_kwargs=None): else: generate_kwargs["encoder_outputs"] = encoder(inputs, attention_mask=attention_mask) + # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call + if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: + generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id + tokens = self.model.generate( attention_mask=attention_mask, **generate_kwargs, diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py index 65afd6d40e0e4f..2dfa458dad3321 100644 --- a/src/transformers/pipelines/conversational.py +++ b/src/transformers/pipelines/conversational.py @@ -309,6 +309,11 @@ def _forward(self, model_inputs, minimum_tokens=10, **generate_kwargs): conversation = model_inputs.pop("conversation") if "max_length" not in generate_kwargs and "max_new_tokens" not in generate_kwargs: generate_kwargs["max_new_tokens"] = 256 + + # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call + if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: + generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id + output_ids = self.model.generate(**model_inputs, **generate_kwargs) if self.model.config.is_encoder_decoder: start_position = 1 diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py index ab73aca2c19039..6e4f4c97e5a673 100644 --- a/src/transformers/pipelines/document_question_answering.py +++ b/src/transformers/pipelines/document_question_answering.py @@ -426,7 +426,13 @@ def _forward(self, model_inputs): is_last = model_inputs.pop("is_last", False) if self.model_type == ModelType.VisionEncoderDecoder: - model_outputs = self.model.generate(**model_inputs) + generate_kwargs = {} + + # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call + if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: + generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id + + model_outputs = self.model.generate(**model_inputs, **generate_kwargs) else: model_outputs = self.model(**model_inputs) diff --git a/src/transformers/pipelines/image_to_text.py b/src/transformers/pipelines/image_to_text.py index 26698ecf0cebc0..12038ea63a432f 100644 --- a/src/transformers/pipelines/image_to_text.py +++ b/src/transformers/pipelines/image_to_text.py @@ -181,6 +181,11 @@ def _forward(self, model_inputs, generate_kwargs=None): # the PyTorch version matches it with `self.model.main_input_name` or `self.model.encoder.main_input_name` # in the `_prepare_model_inputs` method. inputs = model_inputs.pop(self.model.main_input_name) + + # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call + if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: + generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id + model_outputs = self.model.generate(inputs, **model_inputs, **generate_kwargs) return model_outputs diff --git a/src/transformers/pipelines/table_question_answering.py b/src/transformers/pipelines/table_question_answering.py index f737ac7b3b408a..72cf442f968e52 100644 --- a/src/transformers/pipelines/table_question_answering.py +++ b/src/transformers/pipelines/table_question_answering.py @@ -386,7 +386,13 @@ def _forward(self, model_inputs, sequential=False): else: outputs = self.batch_inference(**model_inputs) else: - outputs = self.model.generate(**model_inputs) + generate_kwargs = {} + + # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call + if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: + generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id + + outputs = self.model.generate(**model_inputs, **generate_kwargs) model_outputs = {"model_inputs": model_inputs, "table": table, "outputs": outputs} return model_outputs diff --git a/src/transformers/pipelines/text2text_generation.py b/src/transformers/pipelines/text2text_generation.py index bb8abdfcf7f500..50b49c7c6ddaa7 100644 --- a/src/transformers/pipelines/text2text_generation.py +++ b/src/transformers/pipelines/text2text_generation.py @@ -188,6 +188,11 @@ def _forward(self, model_inputs, **generate_kwargs): generate_kwargs.get("min_length", self.model.config.min_length), generate_kwargs.get("max_length", self.model.config.max_length), ) + + # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call + if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: + generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id + output_ids = self.model.generate(**model_inputs, **generate_kwargs) out_b = output_ids.shape[0] if self.framework == "pt": diff --git a/src/transformers/pipelines/text_generation.py b/src/transformers/pipelines/text_generation.py index 0b358291717ee0..48fd642afebb42 100644 --- a/src/transformers/pipelines/text_generation.py +++ b/src/transformers/pipelines/text_generation.py @@ -323,6 +323,10 @@ def _forward(self, model_inputs, **generate_kwargs): if not has_min_new_tokens and "min_length" in generate_kwargs: generate_kwargs["min_length"] += prefix_length + # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call + if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: + generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id + # BS x SL generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs) out_b = generated_sequence.shape[0] diff --git a/src/transformers/pipelines/text_to_audio.py b/src/transformers/pipelines/text_to_audio.py index 58c21cc1216869..c026db68019f04 100644 --- a/src/transformers/pipelines/text_to_audio.py +++ b/src/transformers/pipelines/text_to_audio.py @@ -140,6 +140,10 @@ def _forward(self, model_inputs, **kwargs): # generate_kwargs get priority over forward_params forward_params.update(generate_kwargs) + # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call + if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: + forward_params["pad_token_id"] = self.tokenizer.pad_token_id + output = self.model.generate(**model_inputs, **forward_params) else: if len(generate_kwargs): diff --git a/src/transformers/pipelines/visual_question_answering.py b/src/transformers/pipelines/visual_question_answering.py index 9106b19d33671a..b452e5644cfe43 100644 --- a/src/transformers/pipelines/visual_question_answering.py +++ b/src/transformers/pipelines/visual_question_answering.py @@ -125,6 +125,10 @@ def preprocess(self, inputs, padding=False, truncation=False, timeout=None): def _forward(self, model_inputs, **generate_kwargs): if self.model.can_generate(): + # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call + if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: + generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id + model_outputs = self.model.generate(**model_inputs, **generate_kwargs) else: model_outputs = self.model(**model_inputs) diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index ada04c7dbeda64..55d2ec67cfadcf 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -17,7 +17,9 @@ from transformers import ( MODEL_FOR_CAUSAL_LM_MAPPING, TF_MODEL_FOR_CAUSAL_LM_MAPPING, + AutoTokenizer, TextGenerationPipeline, + is_torch_available, logging, pipeline, ) @@ -36,6 +38,12 @@ from .test_pipelines_common import ANY +if is_torch_available(): + import torch + + from transformers import AutoModelForCausalLM + + @is_pipeline_test @require_torch_or_tf class TextGenerationPipelineTests(unittest.TestCase): @@ -379,8 +387,6 @@ def run_pipeline_test(self, text_generator, _): @require_accelerate @require_torch_gpu def test_small_model_pt_bloom_accelerate(self): - import torch - # Classic `model_kwargs` pipe = pipeline( model="hf-internal-testing/tiny-random-bloom", @@ -435,8 +441,6 @@ def test_small_model_pt_bloom_accelerate(self): @require_torch @require_torch_accelerator def test_small_model_fp16(self): - import torch - pipe = pipeline( model="hf-internal-testing/tiny-random-bloom", device=torch_device, @@ -448,8 +452,6 @@ def test_small_model_fp16(self): @require_accelerate @require_torch_accelerator def test_pipeline_accelerate_top_p(self): - import torch - pipe = pipeline( model="hf-internal-testing/tiny-random-bloom", device_map=torch_device, torch_dtype=torch.float16 ) @@ -477,3 +479,19 @@ def test_pipeline_length_setting_warning(self): with CaptureLogger(logger) as cl: _ = text_generator(prompt, max_length=10) self.assertNotIn(logger_msg, cl.out) + + @require_torch + def test_pipeline_tokenizer_has_pad_but_model_doesnt(self): + # When the tokenizer pad_token_id is set but the model pad_token_id is not, we pass the pad_token_id to + # `generate`. This prevents a warning from being raised, which this test checks. + model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2") + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") + model.config.pad_token_id = None + model.generation_config.pad_token_id = None + tokenizer.pad_token_id = tokenizer.eos_token_id + + llm = pipeline(task="text-generation", model=model, tokenizer=tokenizer, framework="pt") + with self.assertRaises(AssertionError) as exc: + with self.assertLogs("transformers", level="WARNING"): + llm("The capital of France ") + self.assertIn("no logs of level WARNING or higher triggered", str(exc.exception)) From 0acda97416b68be5101dcba67fc652e2d3377603 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 14 Mar 2024 12:01:17 +0000 Subject: [PATCH 2/6] set pad_token_id in the generalist forward; remove test --- .../pipelines/automatic_speech_recognition.py | 9 +----- src/transformers/pipelines/base.py | 9 ++++++ src/transformers/pipelines/conversational.py | 6 +--- .../pipelines/document_question_answering.py | 8 +---- src/transformers/pipelines/image_to_text.py | 8 +---- .../pipelines/table_question_answering.py | 8 +---- .../pipelines/text2text_generation.py | 4 --- src/transformers/pipelines/text_generation.py | 4 --- src/transformers/pipelines/text_to_audio.py | 4 --- .../pipelines/visual_question_answering.py | 4 --- .../test_pipelines_text_generation.py | 30 ++++--------------- 11 files changed, 20 insertions(+), 74 deletions(-) diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py index 21b948cd4e36a0..fe0216632f08bf 100644 --- a/src/transformers/pipelines/automatic_speech_recognition.py +++ b/src/transformers/pipelines/automatic_speech_recognition.py @@ -456,10 +456,7 @@ def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None): processed["stride"] = stride yield {"is_last": True, **processed, **extra} - def _forward(self, model_inputs, return_timestamps=False, generate_kwargs=None): - if generate_kwargs is None: - generate_kwargs = {} - + def _forward(self, model_inputs, return_timestamps=False, **generate_kwargs): attention_mask = model_inputs.pop("attention_mask", None) stride = model_inputs.pop("stride", None) is_last = model_inputs.pop("is_last") @@ -496,10 +493,6 @@ def _forward(self, model_inputs, return_timestamps=False, generate_kwargs=None): else: generate_kwargs["encoder_outputs"] = encoder(inputs, attention_mask=attention_mask) - # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call - if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: - generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id - tokens = self.model.generate( attention_mask=attention_mask, **generate_kwargs, diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 758484107b76f2..73ac1d4dca7305 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -1091,6 +1091,15 @@ def get_inference_context(self): return torch.no_grad def forward(self, model_inputs, **forward_params): + # Pipelines calling `generate`: if the tokenizer has a pad token but the model doesn't, set it in the + # forward params. + if ( + self.tokenizer is not None + and self.tokenizer.pad_token_id is not None + and self.model.generation_config.pad_token_id is None + ): + forward_params["pad_token_id"] = self.tokenizer.pad_token_id + with self.device_placement(): if self.framework == "tf": model_inputs["training"] = False diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py index 2dfa458dad3321..040c4d17f4251e 100644 --- a/src/transformers/pipelines/conversational.py +++ b/src/transformers/pipelines/conversational.py @@ -304,16 +304,12 @@ def preprocess(self, conversation: Conversation, min_length_for_response=32) -> input_ids = tf.constant([input_ids]) return {"input_ids": input_ids, "conversation": conversation} - def _forward(self, model_inputs, minimum_tokens=10, **generate_kwargs): + def _forward(self, model_inputs, **generate_kwargs): n = model_inputs["input_ids"].shape[1] conversation = model_inputs.pop("conversation") if "max_length" not in generate_kwargs and "max_new_tokens" not in generate_kwargs: generate_kwargs["max_new_tokens"] = 256 - # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call - if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: - generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id - output_ids = self.model.generate(**model_inputs, **generate_kwargs) if self.model.config.is_encoder_decoder: start_position = 1 diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py index 6e4f4c97e5a673..64714390b04f1d 100644 --- a/src/transformers/pipelines/document_question_answering.py +++ b/src/transformers/pipelines/document_question_answering.py @@ -419,19 +419,13 @@ def preprocess( "is_last": span_idx == num_spans - 1, } - def _forward(self, model_inputs): + def _forward(self, model_inputs, **generate_kwargs): p_mask = model_inputs.pop("p_mask", None) word_ids = model_inputs.pop("word_ids", None) words = model_inputs.pop("words", None) is_last = model_inputs.pop("is_last", False) if self.model_type == ModelType.VisionEncoderDecoder: - generate_kwargs = {} - - # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call - if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: - generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id - model_outputs = self.model.generate(**model_inputs, **generate_kwargs) else: model_outputs = self.model(**model_inputs) diff --git a/src/transformers/pipelines/image_to_text.py b/src/transformers/pipelines/image_to_text.py index 12038ea63a432f..764400f8c73f3b 100644 --- a/src/transformers/pipelines/image_to_text.py +++ b/src/transformers/pipelines/image_to_text.py @@ -164,7 +164,7 @@ def preprocess(self, image, prompt=None, timeout=None): return model_inputs - def _forward(self, model_inputs, generate_kwargs=None): + def _forward(self, model_inputs, **generate_kwargs): # Git model sets `model_inputs["input_ids"] = None` in `preprocess` (when `prompt=None`). In batch model, the # pipeline will group them into a list of `None`, which fail `_forward`. Avoid this by checking it first. if ( @@ -174,18 +174,12 @@ def _forward(self, model_inputs, generate_kwargs=None): ): model_inputs["input_ids"] = None - if generate_kwargs is None: - generate_kwargs = {} # FIXME: We need to pop here due to a difference in how `generation.py` and `generation.tf_utils.py` # parse inputs. In the Tensorflow version, `generate` raises an error if we don't use `input_ids` whereas # the PyTorch version matches it with `self.model.main_input_name` or `self.model.encoder.main_input_name` # in the `_prepare_model_inputs` method. inputs = model_inputs.pop(self.model.main_input_name) - # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call - if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: - generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id - model_outputs = self.model.generate(inputs, **model_inputs, **generate_kwargs) return model_outputs diff --git a/src/transformers/pipelines/table_question_answering.py b/src/transformers/pipelines/table_question_answering.py index 72cf442f968e52..6a1e6b1c8776a5 100644 --- a/src/transformers/pipelines/table_question_answering.py +++ b/src/transformers/pipelines/table_question_answering.py @@ -377,7 +377,7 @@ def preprocess(self, pipeline_input, sequential=None, padding=True, truncation=N inputs["table"] = table return inputs - def _forward(self, model_inputs, sequential=False): + def _forward(self, model_inputs, sequential=False, **generate_kwargs): table = model_inputs.pop("table") if self.type == "tapas": @@ -386,12 +386,6 @@ def _forward(self, model_inputs, sequential=False): else: outputs = self.batch_inference(**model_inputs) else: - generate_kwargs = {} - - # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call - if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: - generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id - outputs = self.model.generate(**model_inputs, **generate_kwargs) model_outputs = {"model_inputs": model_inputs, "table": table, "outputs": outputs} return model_outputs diff --git a/src/transformers/pipelines/text2text_generation.py b/src/transformers/pipelines/text2text_generation.py index 50b49c7c6ddaa7..8b59752c27dd02 100644 --- a/src/transformers/pipelines/text2text_generation.py +++ b/src/transformers/pipelines/text2text_generation.py @@ -189,10 +189,6 @@ def _forward(self, model_inputs, **generate_kwargs): generate_kwargs.get("max_length", self.model.config.max_length), ) - # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call - if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: - generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id - output_ids = self.model.generate(**model_inputs, **generate_kwargs) out_b = output_ids.shape[0] if self.framework == "pt": diff --git a/src/transformers/pipelines/text_generation.py b/src/transformers/pipelines/text_generation.py index 48fd642afebb42..0b358291717ee0 100644 --- a/src/transformers/pipelines/text_generation.py +++ b/src/transformers/pipelines/text_generation.py @@ -323,10 +323,6 @@ def _forward(self, model_inputs, **generate_kwargs): if not has_min_new_tokens and "min_length" in generate_kwargs: generate_kwargs["min_length"] += prefix_length - # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call - if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: - generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id - # BS x SL generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs) out_b = generated_sequence.shape[0] diff --git a/src/transformers/pipelines/text_to_audio.py b/src/transformers/pipelines/text_to_audio.py index c026db68019f04..58c21cc1216869 100644 --- a/src/transformers/pipelines/text_to_audio.py +++ b/src/transformers/pipelines/text_to_audio.py @@ -140,10 +140,6 @@ def _forward(self, model_inputs, **kwargs): # generate_kwargs get priority over forward_params forward_params.update(generate_kwargs) - # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call - if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: - forward_params["pad_token_id"] = self.tokenizer.pad_token_id - output = self.model.generate(**model_inputs, **forward_params) else: if len(generate_kwargs): diff --git a/src/transformers/pipelines/visual_question_answering.py b/src/transformers/pipelines/visual_question_answering.py index b452e5644cfe43..9106b19d33671a 100644 --- a/src/transformers/pipelines/visual_question_answering.py +++ b/src/transformers/pipelines/visual_question_answering.py @@ -125,10 +125,6 @@ def preprocess(self, inputs, padding=False, truncation=False, timeout=None): def _forward(self, model_inputs, **generate_kwargs): if self.model.can_generate(): - # If the tokenizer has a pad token but the model doesn't, we add it to the `generate` call - if self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None: - generate_kwargs["pad_token_id"] = self.tokenizer.pad_token_id - model_outputs = self.model.generate(**model_inputs, **generate_kwargs) else: model_outputs = self.model(**model_inputs) diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index 55d2ec67cfadcf..ada04c7dbeda64 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -17,9 +17,7 @@ from transformers import ( MODEL_FOR_CAUSAL_LM_MAPPING, TF_MODEL_FOR_CAUSAL_LM_MAPPING, - AutoTokenizer, TextGenerationPipeline, - is_torch_available, logging, pipeline, ) @@ -38,12 +36,6 @@ from .test_pipelines_common import ANY -if is_torch_available(): - import torch - - from transformers import AutoModelForCausalLM - - @is_pipeline_test @require_torch_or_tf class TextGenerationPipelineTests(unittest.TestCase): @@ -387,6 +379,8 @@ def run_pipeline_test(self, text_generator, _): @require_accelerate @require_torch_gpu def test_small_model_pt_bloom_accelerate(self): + import torch + # Classic `model_kwargs` pipe = pipeline( model="hf-internal-testing/tiny-random-bloom", @@ -441,6 +435,8 @@ def test_small_model_pt_bloom_accelerate(self): @require_torch @require_torch_accelerator def test_small_model_fp16(self): + import torch + pipe = pipeline( model="hf-internal-testing/tiny-random-bloom", device=torch_device, @@ -452,6 +448,8 @@ def test_small_model_fp16(self): @require_accelerate @require_torch_accelerator def test_pipeline_accelerate_top_p(self): + import torch + pipe = pipeline( model="hf-internal-testing/tiny-random-bloom", device_map=torch_device, torch_dtype=torch.float16 ) @@ -479,19 +477,3 @@ def test_pipeline_length_setting_warning(self): with CaptureLogger(logger) as cl: _ = text_generator(prompt, max_length=10) self.assertNotIn(logger_msg, cl.out) - - @require_torch - def test_pipeline_tokenizer_has_pad_but_model_doesnt(self): - # When the tokenizer pad_token_id is set but the model pad_token_id is not, we pass the pad_token_id to - # `generate`. This prevents a warning from being raised, which this test checks. - model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2") - tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - model.config.pad_token_id = None - model.generation_config.pad_token_id = None - tokenizer.pad_token_id = tokenizer.eos_token_id - - llm = pipeline(task="text-generation", model=model, tokenizer=tokenizer, framework="pt") - with self.assertRaises(AssertionError) as exc: - with self.assertLogs("transformers", level="WARNING"): - llm("The capital of France ") - self.assertIn("no logs of level WARNING or higher triggered", str(exc.exception)) From 658f3d05d2b792ab16f172c1ca0cb19e538731b2 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 14 Mar 2024 12:02:53 +0000 Subject: [PATCH 3/6] extra condition --- src/transformers/pipelines/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 73ac1d4dca7305..e3b5afa5af5f06 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -1095,6 +1095,7 @@ def forward(self, model_inputs, **forward_params): # forward params. if ( self.tokenizer is not None + and self.model.can_generate() and self.tokenizer.pad_token_id is not None and self.model.generation_config.pad_token_id is None ): From 30c3b2e29a82c39cee0caf5796fe9031ee367919 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 14 Mar 2024 12:08:01 +0000 Subject: [PATCH 4/6] remove unused arg; remove unwanted diff --- src/transformers/pipelines/conversational.py | 11 ++--------- src/transformers/pipelines/image_to_text.py | 1 - src/transformers/pipelines/text2text_generation.py | 1 - 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py index 040c4d17f4251e..257f693c9d2ea3 100644 --- a/src/transformers/pipelines/conversational.py +++ b/src/transformers/pipelines/conversational.py @@ -196,9 +196,7 @@ def new_user_input(self): build_pipeline_init_args(has_tokenizer=True), r""" min_length_for_response (`int`, *optional*, defaults to 32): - The minimum length (in number of tokens) for a response. - minimum_tokens (`int`, *optional*, defaults to 10): - The minimum length of tokens to leave for a response.""", + The minimum length (in number of tokens) for a response.""", ) class ConversationalPipeline(Pipeline): """ @@ -241,17 +239,13 @@ def __init__(self, *args, **kwargs): if self.tokenizer.pad_token_id is None: self.tokenizer.pad_token = self.tokenizer.eos_token - def _sanitize_parameters( - self, min_length_for_response=None, minimum_tokens=None, clean_up_tokenization_spaces=None, **generate_kwargs - ): + def _sanitize_parameters(self, min_length_for_response=None, clean_up_tokenization_spaces=None, **generate_kwargs): preprocess_params = {} forward_params = {} postprocess_params = {} if min_length_for_response is not None: preprocess_params["min_length_for_response"] = min_length_for_response - if minimum_tokens is not None: - forward_params["minimum_tokens"] = minimum_tokens if "max_length" in generate_kwargs: forward_params["max_length"] = generate_kwargs["max_length"] @@ -309,7 +303,6 @@ def _forward(self, model_inputs, **generate_kwargs): conversation = model_inputs.pop("conversation") if "max_length" not in generate_kwargs and "max_new_tokens" not in generate_kwargs: generate_kwargs["max_new_tokens"] = 256 - output_ids = self.model.generate(**model_inputs, **generate_kwargs) if self.model.config.is_encoder_decoder: start_position = 1 diff --git a/src/transformers/pipelines/image_to_text.py b/src/transformers/pipelines/image_to_text.py index 764400f8c73f3b..96705ab5e56ac5 100644 --- a/src/transformers/pipelines/image_to_text.py +++ b/src/transformers/pipelines/image_to_text.py @@ -179,7 +179,6 @@ def _forward(self, model_inputs, **generate_kwargs): # the PyTorch version matches it with `self.model.main_input_name` or `self.model.encoder.main_input_name` # in the `_prepare_model_inputs` method. inputs = model_inputs.pop(self.model.main_input_name) - model_outputs = self.model.generate(inputs, **model_inputs, **generate_kwargs) return model_outputs diff --git a/src/transformers/pipelines/text2text_generation.py b/src/transformers/pipelines/text2text_generation.py index 8b59752c27dd02..bb8abdfcf7f500 100644 --- a/src/transformers/pipelines/text2text_generation.py +++ b/src/transformers/pipelines/text2text_generation.py @@ -188,7 +188,6 @@ def _forward(self, model_inputs, **generate_kwargs): generate_kwargs.get("min_length", self.model.config.min_length), generate_kwargs.get("max_length", self.model.config.max_length), ) - output_ids = self.model.generate(**model_inputs, **generate_kwargs) out_b = output_ids.shape[0] if self.framework == "pt": From 399da6d2b51186cb6bea2ebda3cef0c806c4298b Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 14 Mar 2024 12:14:52 +0000 Subject: [PATCH 5/6] move to init --- src/transformers/pipelines/base.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index e3b5afa5af5f06..079c5a8a8c61c6 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -885,6 +885,16 @@ def __init__( self._num_workers = kwargs.pop("num_workers", None) self._preprocess_params, self._forward_params, self._postprocess_params = self._sanitize_parameters(**kwargs) + # Pipelines calling `generate`: if the tokenizer has a pad token but the model doesn't, set it in the + # forward params. + if ( + self.tokenizer is not None + and self.model.can_generate() + and self.tokenizer.pad_token_id is not None + and self.model.generation_config.pad_token_id is None + ): + self._forward_params["pad_token_id"] = self.tokenizer.pad_token_id + if self.image_processor is None and self.feature_extractor is not None: if isinstance(self.feature_extractor, BaseImageProcessor): # Backward compatible change, if users called @@ -1091,16 +1101,6 @@ def get_inference_context(self): return torch.no_grad def forward(self, model_inputs, **forward_params): - # Pipelines calling `generate`: if the tokenizer has a pad token but the model doesn't, set it in the - # forward params. - if ( - self.tokenizer is not None - and self.model.can_generate() - and self.tokenizer.pad_token_id is not None - and self.model.generation_config.pad_token_id is None - ): - forward_params["pad_token_id"] = self.tokenizer.pad_token_id - with self.device_placement(): if self.framework == "tf": model_inputs["training"] = False From cef3ae028a8c8295820d694c566317872e9172b0 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 14 Mar 2024 12:36:45 +0000 Subject: [PATCH 6/6] standardize interface --- .../pipelines/automatic_speech_recognition.py | 4 ++-- src/transformers/pipelines/base.py | 2 +- src/transformers/pipelines/image_to_text.py | 19 +++++++++---------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py index fe0216632f08bf..f2d0f136790922 100644 --- a/src/transformers/pipelines/automatic_speech_recognition.py +++ b/src/transformers/pipelines/automatic_speech_recognition.py @@ -311,14 +311,14 @@ def _sanitize_parameters( forward_params = defaultdict(dict) if max_new_tokens is not None: - forward_params["generate_kwargs"]["max_new_tokens"] = max_new_tokens + forward_params["max_new_tokens"] = max_new_tokens if generate_kwargs is not None: if max_new_tokens is not None and "max_new_tokens" in generate_kwargs: raise ValueError( "`max_new_tokens` is defined both as an argument and inside `generate_kwargs` argument, please use" " only 1 version" ) - forward_params["generate_kwargs"].update(generate_kwargs) + forward_params.update(generate_kwargs) postprocess_params = {} if decoder_kwargs is not None: diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 079c5a8a8c61c6..b07db7ea64c62f 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -886,7 +886,7 @@ def __init__( self._preprocess_params, self._forward_params, self._postprocess_params = self._sanitize_parameters(**kwargs) # Pipelines calling `generate`: if the tokenizer has a pad token but the model doesn't, set it in the - # forward params. + # forward params so that `generate` is aware of the pad token. if ( self.tokenizer is not None and self.model.can_generate() diff --git a/src/transformers/pipelines/image_to_text.py b/src/transformers/pipelines/image_to_text.py index 96705ab5e56ac5..4a9a3744d841a0 100644 --- a/src/transformers/pipelines/image_to_text.py +++ b/src/transformers/pipelines/image_to_text.py @@ -74,7 +74,7 @@ def __init__(self, *args, **kwargs): ) def _sanitize_parameters(self, max_new_tokens=None, generate_kwargs=None, prompt=None, timeout=None): - forward_kwargs = {} + forward_params = {} preprocess_params = {} if prompt is not None: @@ -82,18 +82,17 @@ def _sanitize_parameters(self, max_new_tokens=None, generate_kwargs=None, prompt if timeout is not None: preprocess_params["timeout"] = timeout - if generate_kwargs is not None: - forward_kwargs["generate_kwargs"] = generate_kwargs if max_new_tokens is not None: - if "generate_kwargs" not in forward_kwargs: - forward_kwargs["generate_kwargs"] = {} - if "max_new_tokens" in forward_kwargs["generate_kwargs"]: + forward_params["max_new_tokens"] = max_new_tokens + if generate_kwargs is not None: + if max_new_tokens is not None and "max_new_tokens" in generate_kwargs: raise ValueError( - "'max_new_tokens' is defined twice, once in 'generate_kwargs' and once as a direct parameter," - " please use only one" + "`max_new_tokens` is defined both as an argument and inside `generate_kwargs` argument, please use" + " only 1 version" ) - forward_kwargs["generate_kwargs"]["max_new_tokens"] = max_new_tokens - return preprocess_params, forward_kwargs, {} + forward_params.update(generate_kwargs) + + return preprocess_params, forward_params, {} def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs): """