From 4441d4054070b9643b29e27cb22645044031993c Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 15 Oct 2024 19:07:50 +0100 Subject: [PATCH 01/11] Trigger UDOP tests --- src/transformers/models/udop/processing_udop.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/models/udop/processing_udop.py b/src/transformers/models/udop/processing_udop.py index ddd5d484a98883..073b2b2ad3875c 100644 --- a/src/transformers/models/udop/processing_udop.py +++ b/src/transformers/models/udop/processing_udop.py @@ -129,6 +129,7 @@ def __call__( raise ValueError( "You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True." ) + test_val = 7 # Just doing this so UDOP gets tested if self.image_processor.apply_ocr and (word_labels is not None): raise ValueError( From 75aaa0a3d7f9837d8c04b41c83fa705b1d3d49d1 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 15 Oct 2024 19:21:47 +0100 Subject: [PATCH 02/11] Try forcing dtype in LayoutLMV3 --- .../models/layoutlmv3/image_processing_layoutlmv3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index 6f16435c14dde3..a157c00823f5d8 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -322,7 +322,7 @@ def preprocess( ) # All transformations expect numpy arrays. - images = [to_numpy_array(image) for image in images] + images = [to_numpy_array(image).astype(np.float32) for image in images] if is_scaled_image(images[0]) and do_rescale: logger.warning_once( From 0bb721ec56c5a8a6e8f061bc2e7b6803a60e5446 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 15 Oct 2024 19:37:26 +0100 Subject: [PATCH 03/11] Do checks to see where uint8 is getting in --- .../models/layoutlmv3/image_processing_layoutlmv3.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index a157c00823f5d8..0803d689c09960 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -350,6 +350,10 @@ def preprocess( for image in images ] + for image in images: + if image.dtype == np.uint8: + raise ValueError("Aaaaa!") + if do_rescale: images = [ self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) From 242a6804386a563c1794177246e4086df94d1fc3 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 15 Oct 2024 19:41:32 +0100 Subject: [PATCH 04/11] Do checks to see where uint8 is getting in --- .../models/layoutlmv3/image_processing_layoutlmv3.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index 0803d689c09960..f9b6f931605e9b 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -344,6 +344,10 @@ def preprocess( words_batch.append(words) boxes_batch.append(boxes) + for image in images: + if image.dtype == np.uint8: + raise ValueError("Aaaaa!") + if do_resize: images = [ self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) @@ -352,7 +356,7 @@ def preprocess( for image in images: if image.dtype == np.uint8: - raise ValueError("Aaaaa!") + raise ValueError("Bbbbbb!") if do_rescale: images = [ From 3674df57001345635d20dfee9fb77a3fa922f81a Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 15 Oct 2024 19:47:15 +0100 Subject: [PATCH 05/11] Found it! --- .../models/layoutlmv3/image_processing_layoutlmv3.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index f9b6f931605e9b..3a8be1210ab57f 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -322,7 +322,7 @@ def preprocess( ) # All transformations expect numpy arrays. - images = [to_numpy_array(image).astype(np.float32) for image in images] + images = [to_numpy_array(image) for image in images] if is_scaled_image(images[0]) and do_rescale: logger.warning_once( @@ -344,23 +344,15 @@ def preprocess( words_batch.append(words) boxes_batch.append(boxes) - for image in images: - if image.dtype == np.uint8: - raise ValueError("Aaaaa!") - if do_resize: images = [ self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) for image in images ] - for image in images: - if image.dtype == np.uint8: - raise ValueError("Bbbbbb!") - if do_rescale: images = [ - self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) + self.rescale(image=image.astype(np.float32), scale=rescale_factor, input_data_format=input_data_format) for image in images ] From f0fa8f83eed25a1a1689d99e047895dd55ec13b0 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 15 Oct 2024 19:52:23 +0100 Subject: [PATCH 06/11] Add .astype(np.float32) --- .../models/layoutlmv3/image_processing_layoutlmv3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index 3a8be1210ab57f..7509d185eba67d 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -346,13 +346,13 @@ def preprocess( if do_resize: images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) + self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format).astype(np.float32) for image in images ] if do_rescale: images = [ - self.rescale(image=image.astype(np.float32), scale=rescale_factor, input_data_format=input_data_format) + self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format) for image in images ] From d1f432fb3e149688fe87d991076c462c642ed41a Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 15 Oct 2024 20:19:34 +0100 Subject: [PATCH 07/11] Remove forced check, make fixup --- .../models/layoutlmv3/image_processing_layoutlmv3.py | 4 +++- src/transformers/models/udop/processing_udop.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index 7509d185eba67d..ebe85147a898ef 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -346,7 +346,9 @@ def preprocess( if do_resize: images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format).astype(np.float32) + self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format).astype( + np.float32 + ) for image in images ] diff --git a/src/transformers/models/udop/processing_udop.py b/src/transformers/models/udop/processing_udop.py index 073b2b2ad3875c..ddd5d484a98883 100644 --- a/src/transformers/models/udop/processing_udop.py +++ b/src/transformers/models/udop/processing_udop.py @@ -129,7 +129,6 @@ def __call__( raise ValueError( "You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True." ) - test_val = 7 # Just doing this so UDOP gets tested if self.image_processor.apply_ocr and (word_labels is not None): raise ValueError( From 4f855d344e6973f13e11f28ab2e784a572fa59a7 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 18 Oct 2024 13:12:08 +0100 Subject: [PATCH 08/11] Checking where exactly the uint8 creeps in --- .../models/layoutlmv3/image_processing_layoutlmv3.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index ebe85147a898ef..edfe4c34467864 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -344,6 +344,10 @@ def preprocess( words_batch.append(words) boxes_batch.append(boxes) + for image in images: + if image.dtype == np.uint8: + raise ValueError("Aaaaa!") + if do_resize: images = [ self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format).astype( From d7f0720e33bfac6a1c68d06a2893eed305ba8a08 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 18 Oct 2024 13:22:11 +0100 Subject: [PATCH 09/11] More checking on the uint8 issues --- .../models/layoutlmv3/image_processing_layoutlmv3.py | 8 +------- src/transformers/models/udop/processing_udop.py | 2 ++ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index edfe4c34467864..6f16435c14dde3 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -344,15 +344,9 @@ def preprocess( words_batch.append(words) boxes_batch.append(boxes) - for image in images: - if image.dtype == np.uint8: - raise ValueError("Aaaaa!") - if do_resize: images = [ - self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format).astype( - np.float32 - ) + self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format) for image in images ] diff --git a/src/transformers/models/udop/processing_udop.py b/src/transformers/models/udop/processing_udop.py index ddd5d484a98883..1c71ffd66a95ce 100644 --- a/src/transformers/models/udop/processing_udop.py +++ b/src/transformers/models/udop/processing_udop.py @@ -118,6 +118,8 @@ def __call__( **self.prepare_and_validate_optional_call_args(*args), ) + test_val = 5 # Just triggering the UDOP CI + boxes = output_kwargs["text_kwargs"].pop("boxes", None) word_labels = output_kwargs["text_kwargs"].pop("word_labels", None) text_pair = output_kwargs["text_kwargs"].pop("text_pair", None) From 1c40330e09539cfce5d696d05a260b72c6ffe7f5 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 18 Oct 2024 13:37:53 +0100 Subject: [PATCH 10/11] Manually upcast in rescale() --- src/transformers/image_transforms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/image_transforms.py b/src/transformers/image_transforms.py index 4fef6012012f36..81e8d9185623aa 100644 --- a/src/transformers/image_transforms.py +++ b/src/transformers/image_transforms.py @@ -123,11 +123,11 @@ def rescale( if not isinstance(image, np.ndarray): raise TypeError(f"Input image must be of type np.ndarray, got {type(image)}") - rescaled_image = image * scale + rescaled_image = image.astype(np.float64) * scale # Numpy type promotion has changed, so always upcast first if data_format is not None: rescaled_image = to_channel_dimension_format(rescaled_image, data_format, input_data_format) - rescaled_image = rescaled_image.astype(dtype) + rescaled_image = rescaled_image.astype(dtype) # Finally downcast to the desired dtype at the end return rescaled_image From 8cfa5d055750357c44c2b7d273fe73f32c3aa125 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 18 Oct 2024 13:38:37 +0100 Subject: [PATCH 11/11] Remove UDOP trigger --- src/transformers/models/udop/processing_udop.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/transformers/models/udop/processing_udop.py b/src/transformers/models/udop/processing_udop.py index 1c71ffd66a95ce..ddd5d484a98883 100644 --- a/src/transformers/models/udop/processing_udop.py +++ b/src/transformers/models/udop/processing_udop.py @@ -118,8 +118,6 @@ def __call__( **self.prepare_and_validate_optional_call_args(*args), ) - test_val = 5 # Just triggering the UDOP CI - boxes = output_kwargs["text_kwargs"].pop("boxes", None) word_labels = output_kwargs["text_kwargs"].pop("word_labels", None) text_pair = output_kwargs["text_kwargs"].pop("text_pair", None)