From 1739434eda78e5dd6323c52cc0e602186b34f107 Mon Sep 17 00:00:00 2001 From: yonigozlan Date: Fri, 20 Sep 2024 02:56:56 +0000 Subject: [PATCH] Remove unnecessary test override --- .../models/altclip/test_processor_altclip.py | 115 +-------------- .../test_processor_chinese_clip.py | 126 ---------------- .../models/pixtral/test_processor_pixtral.py | 137 ++---------------- tests/test_processing_common.py | 12 +- 4 files changed, 17 insertions(+), 373 deletions(-) diff --git a/tests/models/altclip/test_processor_altclip.py b/tests/models/altclip/test_processor_altclip.py index 1aca2280969404..33bff9c77ad263 100644 --- a/tests/models/altclip/test_processor_altclip.py +++ b/tests/models/altclip/test_processor_altclip.py @@ -18,7 +18,7 @@ import unittest from transformers import XLMRobertaTokenizer, XLMRobertaTokenizerFast -from transformers.testing_utils import require_torch, require_vision +from transformers.testing_utils import require_vision from transformers.utils import is_vision_available from ...test_processing_common import ProcessorTesterMixin @@ -50,116 +50,3 @@ def get_rust_tokenizer(self, **kwargs): def get_image_processor(self, **kwargs): return CLIPImageProcessor.from_pretrained(self.model_id, **kwargs) - - @require_torch - @require_vision - def test_unstructured_kwargs_batched(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = ["lower newer", "upper older longer string"] - image_input = self.prepare_image_inputs() * 2 - inputs = processor( - text=input_str, - images=image_input, - return_tensors="pt", - crop_size={"height": 214, "width": 214}, - padding="longest", - max_length=76, - ) - self.assertEqual(inputs["pixel_values"].shape[2], 214) - - self.assertEqual(len(inputs["input_ids"][0]), 7) - - def test_structured_kwargs_nested(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = "lower newer" - image_input = self.prepare_image_inputs() - - # Define the kwargs for each modality - all_kwargs = { - "common_kwargs": {"return_tensors": "pt"}, - "images_kwargs": {"crop_size": {"height": 214, "width": 214}}, - "text_kwargs": {"padding": "max_length", "max_length": 76}, - } - - inputs = processor(text=input_str, images=image_input, **all_kwargs) - self.skip_processor_without_typed_kwargs(processor) - - self.assertEqual(inputs["pixel_values"].shape[2], 214) - - self.assertEqual(len(inputs["input_ids"][0]), 76) - - def test_structured_kwargs_nested_from_dict(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - input_str = "lower newer" - image_input = self.prepare_image_inputs() - - # Define the kwargs for each modality - all_kwargs = { - "common_kwargs": {"return_tensors": "pt"}, - "images_kwargs": {"crop_size": {"height": 214, "width": 214}}, - "text_kwargs": {"padding": "max_length", "max_length": 76}, - } - - inputs = processor(text=input_str, images=image_input, **all_kwargs) - self.assertEqual(inputs["pixel_values"].shape[2], 214) - - self.assertEqual(len(inputs["input_ids"][0]), 76) - - def test_unstructured_kwargs(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = "lower newer" - image_input = self.prepare_image_inputs() - inputs = processor( - text=input_str, - images=image_input, - return_tensors="pt", - crop_size={"height": 214, "width": 214}, - padding="max_length", - max_length=76, - ) - - self.assertEqual(inputs["pixel_values"].shape[2], 214) - self.assertEqual(len(inputs["input_ids"][0]), 76) - - def test_image_processor_defaults_preserved_by_image_kwargs(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor", crop_size=(234, 234)) - tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = "lower newer" - image_input = self.prepare_image_inputs() - - inputs = processor(text=input_str, images=image_input) - self.assertEqual(len(inputs["pixel_values"][0][0]), 234) diff --git a/tests/models/chinese_clip/test_processor_chinese_clip.py b/tests/models/chinese_clip/test_processor_chinese_clip.py index 5b191ce2df0894..e433c38f789104 100644 --- a/tests/models/chinese_clip/test_processor_chinese_clip.py +++ b/tests/models/chinese_clip/test_processor_chinese_clip.py @@ -206,129 +206,3 @@ def test_model_input_names(self): inputs = processor(text=input_str, images=image_input) self.assertListEqual(list(inputs.keys()), processor.model_input_names) - - def test_unstructured_kwargs_batched(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = ["lower newer", "upper older longer string"] - image_input = self.prepare_image_inputs() * 2 - inputs = processor( - text=input_str, - images=image_input, - return_tensors="pt", - crop_size={"height": 214, "width": 214}, - padding="longest", - max_length=76, - ) - self.assertEqual(inputs["pixel_values"].shape[2], 214) - - self.assertEqual(len(inputs["input_ids"][0]), 6) - - def test_structured_kwargs_nested(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = "lower newer" - image_input = self.prepare_image_inputs() - - # Define the kwargs for each modality - all_kwargs = { - "common_kwargs": {"return_tensors": "pt"}, - "images_kwargs": {"crop_size": {"height": 214, "width": 214}}, - "text_kwargs": {"padding": "max_length", "max_length": 76}, - } - - inputs = processor(text=input_str, images=image_input, **all_kwargs) - self.skip_processor_without_typed_kwargs(processor) - - self.assertEqual(inputs["pixel_values"].shape[2], 214) - - self.assertEqual(len(inputs["input_ids"][0]), 76) - - def test_structured_kwargs_nested_from_dict(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - input_str = "lower newer" - image_input = self.prepare_image_inputs() - - # Define the kwargs for each modality - all_kwargs = { - "common_kwargs": {"return_tensors": "pt"}, - "images_kwargs": {"crop_size": {"height": 214, "width": 214}}, - "text_kwargs": {"padding": "max_length", "max_length": 76}, - } - - inputs = processor(text=input_str, images=image_input, **all_kwargs) - self.assertEqual(inputs["pixel_values"].shape[2], 214) - - self.assertEqual(len(inputs["input_ids"][0]), 76) - - def test_unstructured_kwargs(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = "lower newer" - image_input = self.prepare_image_inputs() - inputs = processor( - text=input_str, - images=image_input, - return_tensors="pt", - crop_size={"height": 214, "width": 214}, - padding="max_length", - max_length=76, - ) - - self.assertEqual(inputs["pixel_values"].shape[2], 214) - self.assertEqual(len(inputs["input_ids"][0]), 76) - - def test_image_processor_defaults_preserved_by_image_kwargs(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor", crop_size=(234, 234)) - tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = "lower newer" - image_input = self.prepare_image_inputs() - - inputs = processor(text=input_str, images=image_input) - self.assertEqual(len(inputs["pixel_values"][0][0]), 234) - - def test_kwargs_overrides_default_image_processor_kwargs(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor", crop_size=(234, 234)) - tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = "lower newer" - image_input = self.prepare_image_inputs() - - inputs = processor(text=input_str, images=image_input, crop_size=[224, 224]) - self.assertEqual(len(inputs["pixel_values"][0][0]), 224) diff --git a/tests/models/pixtral/test_processor_pixtral.py b/tests/models/pixtral/test_processor_pixtral.py index 04aa3ee8a38b4e..29575b49367268 100644 --- a/tests/models/pixtral/test_processor_pixtral.py +++ b/tests/models/pixtral/test_processor_pixtral.py @@ -19,7 +19,6 @@ import torch from transformers.testing_utils import ( - require_torch, require_vision, ) from transformers.utils import is_vision_available @@ -248,144 +247,28 @@ def test_processor_with_multiple_images_multiple_lists(self): ) # fmt: on - # Override all tests requiring shape as returning tensor batches is not supported by PixtralProcessor - - @require_torch - @require_vision - def test_image_processor_defaults_preserved_by_image_kwargs(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor", size={"height": 240, "width": 240}) - tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = "lower newer" - image_input = self.prepare_image_inputs() - - inputs = processor(text=input_str, images=image_input) - # Added dimension by pixtral image processor - self.assertEqual(len(inputs["pixel_values"][0][0][0][0]), 240) - - @require_torch - @require_vision - def test_kwargs_overrides_default_image_processor_kwargs(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor", size={"height": 400, "width": 400}) - tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = "lower newer" - image_input = self.prepare_image_inputs() - - inputs = processor(text=input_str, images=image_input, size={"height": 240, "width": 240}) - self.assertEqual(len(inputs["pixel_values"][0][0][0][0]), 240) - - @require_torch - @require_vision - def test_structured_kwargs_nested(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = "lower newer" - image_input = self.prepare_image_inputs() - - # Define the kwargs for each modality - all_kwargs = { - "common_kwargs": {"return_tensors": "pt"}, - "images_kwargs": {"size": {"height": 240, "width": 240}}, - "text_kwargs": {"padding": "max_length", "max_length": 76}, - } - - inputs = processor(text=input_str, images=image_input, **all_kwargs) - self.skip_processor_without_typed_kwargs(processor) - - self.assertEqual(inputs["pixel_values"][0][0].shape[-1], 240) - - self.assertEqual(len(inputs["input_ids"][0]), 76) - - @require_torch - @require_vision - def test_structured_kwargs_nested_from_dict(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - input_str = "lower newer" - image_input = self.prepare_image_inputs() - - # Define the kwargs for each modality - all_kwargs = { - "common_kwargs": {"return_tensors": "pt"}, - "images_kwargs": {"size": {"height": 240, "width": 240}}, - "text_kwargs": {"padding": "max_length", "max_length": 76}, - } - - inputs = processor(text=input_str, images=image_input, **all_kwargs) - self.assertEqual(inputs["pixel_values"][0][0].shape[-1], 240) - - self.assertEqual(len(inputs["input_ids"][0]), 76) - - @require_torch - @require_vision - def test_unstructured_kwargs(self): - if "image_processor" not in self.processor_class.attributes: - self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) - self.skip_processor_without_typed_kwargs(processor) - - input_str = "lower newer" - image_input = self.prepare_image_inputs() - inputs = processor( - text=input_str, - images=image_input, - return_tensors="pt", - size={"height": 240, "width": 240}, - padding="max_length", - max_length=76, - ) - - self.assertEqual(inputs["pixel_values"][0][0].shape[-1], 240) - self.assertEqual(len(inputs["input_ids"][0]), 76) - - @require_torch - @require_vision + # Override as PixtralProcessor needs nested images to work properly with batched inputs def test_unstructured_kwargs_batched(self): if "image_processor" not in self.processor_class.attributes: self.skipTest(f"image_processor attribute not present in {self.processor_class}") - image_processor = self.get_component("image_processor") - tokenizer = self.get_component("tokenizer") - - processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) + processor_components = self.prepare_components() + processor = self.processor_class(**processor_components) self.skip_processor_without_typed_kwargs(processor) input_str = ["lower newer", "upper older longer string"] - # images needs to be nested to detect multiple prompts image_input = [self.prepare_image_inputs()] * 2 inputs = processor( text=input_str, images=image_input, return_tensors="pt", - size={"height": 240, "width": 240}, + do_rescale=True, + rescale_factor=-1, padding="longest", max_length=76, ) - self.assertEqual(inputs["pixel_values"][0][0].shape[-1], 240) - self.assertEqual(len(inputs["input_ids"][0]), 4) + self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0) + self.assertTrue( + len(inputs[self.text_input_name][0]) == len(inputs[self.text_input_name][1]) + and len(inputs[self.text_input_name][1]) < 76 + ) diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py index 8e58cb1c69080c..3e909f2eb42792 100644 --- a/tests/test_processing_common.py +++ b/tests/test_processing_common.py @@ -173,7 +173,7 @@ def test_image_processor_defaults_preserved_by_image_kwargs(self): image_input = self.prepare_image_inputs() inputs = processor(text=input_str, images=image_input, return_tensors="pt") - self.assertLessEqual(inputs[self.images_input_name].mean(), 0) + self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0) def test_kwargs_overrides_default_tokenizer_kwargs(self): if "image_processor" not in self.processor_class.attributes: @@ -206,7 +206,7 @@ def test_kwargs_overrides_default_image_processor_kwargs(self): image_input = self.prepare_image_inputs() inputs = processor(text=input_str, images=image_input, do_rescale=True, rescale_factor=-1, return_tensors="pt") - self.assertLessEqual(inputs[self.images_input_name].mean(), 0) + self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0) def test_unstructured_kwargs(self): if "image_processor" not in self.processor_class.attributes: @@ -227,7 +227,7 @@ def test_unstructured_kwargs(self): max_length=76, ) - self.assertLessEqual(inputs[self.images_input_name].mean(), 0) + self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0) self.assertEqual(inputs[self.text_input_name].shape[-1], 76) def test_unstructured_kwargs_batched(self): @@ -249,7 +249,7 @@ def test_unstructured_kwargs_batched(self): max_length=76, ) - self.assertLessEqual(inputs[self.images_input_name].mean(), 0) + self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0) self.assertTrue( len(inputs[self.text_input_name][0]) == len(inputs[self.text_input_name][1]) and len(inputs[self.text_input_name][1]) < 76 @@ -293,7 +293,7 @@ def test_structured_kwargs_nested(self): inputs = processor(text=input_str, images=image_input, **all_kwargs) self.skip_processor_without_typed_kwargs(processor) - self.assertLessEqual(inputs[self.images_input_name].mean(), 0) + self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0) self.assertEqual(inputs[self.text_input_name].shape[-1], 76) def test_structured_kwargs_nested_from_dict(self): @@ -313,7 +313,7 @@ def test_structured_kwargs_nested_from_dict(self): } inputs = processor(text=input_str, images=image_input, **all_kwargs) - self.assertLessEqual(inputs[self.images_input_name].mean(), 0) + self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0) self.assertEqual(inputs[self.text_input_name].shape[-1], 76) # TODO: the same test, but for audio + text processors that have strong overlap in kwargs