Don't use LayoutLMv2 and LayoutLMv3 in some pipeline tests (#22774)

* fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
huggingface · Apr 17, 2023 · 5269718 · 5269718
1 parent ea7b0a5
commit 5269718
Show file tree

Hide file tree

Showing 6 changed files with 39 additions and 26 deletions.
diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
@@ -273,33 +273,11 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
         {
             "document-question-answering": LayoutLMv2ForQuestionAnswering,
             "feature-extraction": LayoutLMv2Model,
-            "question-answering": LayoutLMv2ForQuestionAnswering,
-            "text-classification": LayoutLMv2ForSequenceClassification,
-            "token-classification": LayoutLMv2ForTokenClassification,
-            "zero-shot": LayoutLMv2ForSequenceClassification,
         }
         if is_torch_available()
         else {}
     )
 
-    # TODO: Fix the failed tests
-    def is_pipeline_test_to_skip(
-        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
-    ):
-        if pipeline_test_casse_name in [
-            "QAPipelineTests",
-            "TextClassificationPipelineTests",
-            "TokenClassificationPipelineTests",
-            "ZeroShotClassificationPipelineTests",
-        ]:
-            # `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
-            # config. With new tiny model creation, it is available, but we need to fix the failed tests.
-            return True
-
-        return super().is_pipeline_test_to_skip(
-            pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
-        )
-
     def setUp(self):
         self.model_tester = LayoutLMv2ModelTester(self)
         self.config_tester = ConfigTester(self, config_class=LayoutLMv2Config, hidden_size=37)

diff --git a/tests/models/layoutlmv3/test_modeling_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_layoutlmv3.py
@@ -289,10 +289,6 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
         {
             "document-question-answering": LayoutLMv3ForQuestionAnswering,
             "feature-extraction": LayoutLMv3Model,
-            "question-answering": LayoutLMv3ForQuestionAnswering,
-            "text-classification": LayoutLMv3ForSequenceClassification,
-            "token-classification": LayoutLMv3ForTokenClassification,
-            "zero-shot": LayoutLMv3ForSequenceClassification,
         }
         if is_torch_available()
         else {}
@@ -302,6 +298,10 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
     def is_pipeline_test_to_skip(
         self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
     ):
+        # `DocumentQuestionAnsweringPipeline` is expected to work with this model, but it combines the text and visual
+        # embedding along the sequence dimension (dim 1), which causes an error during post-processing as `p_mask` has
+        # the sequence dimension of the text embedding only.
+        # (see the line `embedding_output = torch.cat([embedding_output, visual_embeddings], dim=1)`)
         return True
 
     def setUp(self):

diff --git a/tests/pipelines/test_pipelines_question_answering.py b/tests/pipelines/test_pipelines_question_answering.py
@@ -34,11 +34,20 @@
 from .test_pipelines_common import ANY
 
 
+# These 2 model types require different inputs than those of the usual text models.
+_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
+
+
 @is_pipeline_test
 class QAPipelineTests(unittest.TestCase):
     model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
     tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING
 
+    if model_mapping is not None:
+        model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
+    if tf_model_mapping is not None:
+        tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
+
     def get_test_pipeline(self, model, tokenizer, processor):
         if isinstance(model.config, LxmertConfig):
             # This is an bimodal model, we need to find a more consistent way

diff --git a/tests/pipelines/test_pipelines_text_classification.py b/tests/pipelines/test_pipelines_text_classification.py
@@ -25,11 +25,20 @@
 from .test_pipelines_common import ANY
 
 
+# These 2 model types require different inputs than those of the usual text models.
+_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
+
+
 @is_pipeline_test
 class TextClassificationPipelineTests(unittest.TestCase):
     model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
     tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
 
+    if model_mapping is not None:
+        model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
+    if tf_model_mapping is not None:
+        tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
+
     @require_torch
     def test_small_model_pt(self):
         text_classifier = pipeline(

diff --git a/tests/pipelines/test_pipelines_token_classification.py b/tests/pipelines/test_pipelines_token_classification.py
@@ -39,12 +39,20 @@
 
 VALID_INPUTS = ["A simple string", ["list of strings", "A simple string that is quite a bit longer"]]
 
+# These 2 model types require different inputs than those of the usual text models.
+_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
+
 
 @is_pipeline_test
 class TokenClassificationPipelineTests(unittest.TestCase):
     model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
     tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
 
+    if model_mapping is not None:
+        model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
+    if tf_model_mapping is not None:
+        tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
+
     def get_test_pipeline(self, model, tokenizer, processor):
         token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
         return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]

diff --git a/tests/pipelines/test_pipelines_zero_shot.py b/tests/pipelines/test_pipelines_zero_shot.py
@@ -26,11 +26,20 @@
 from .test_pipelines_common import ANY
 
 
+# These 2 model types require different inputs than those of the usual text models.
+_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
+
+
 @is_pipeline_test
 class ZeroShotClassificationPipelineTests(unittest.TestCase):
     model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
     tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
 
+    if model_mapping is not None:
+        model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
+    if tf_model_mapping is not None:
+        tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
+
     def get_test_pipeline(self, model, tokenizer, processor):
         classifier = ZeroShotClassificationPipeline(
             model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]