Skip to content

Commit

Permalink
Don't use LayoutLMv2 and LayoutLMv3 in some pipeline tests (#22774)
Browse files Browse the repository at this point in the history
* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
  • Loading branch information
ydshieh and ydshieh authored Apr 17, 2023
1 parent ea7b0a5 commit 5269718
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 26 deletions.
22 changes: 0 additions & 22 deletions tests/models/layoutlmv2/test_modeling_layoutlmv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,33 +273,11 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
{
"document-question-answering": LayoutLMv2ForQuestionAnswering,
"feature-extraction": LayoutLMv2Model,
"question-answering": LayoutLMv2ForQuestionAnswering,
"text-classification": LayoutLMv2ForSequenceClassification,
"token-classification": LayoutLMv2ForTokenClassification,
"zero-shot": LayoutLMv2ForSequenceClassification,
}
if is_torch_available()
else {}
)

# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name in [
"QAPipelineTests",
"TextClassificationPipelineTests",
"TokenClassificationPipelineTests",
"ZeroShotClassificationPipelineTests",
]:
# `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
# config. With new tiny model creation, it is available, but we need to fix the failed tests.
return True

return super().is_pipeline_test_to_skip(
pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
)

def setUp(self):
self.model_tester = LayoutLMv2ModelTester(self)
self.config_tester = ConfigTester(self, config_class=LayoutLMv2Config, hidden_size=37)
Expand Down
8 changes: 4 additions & 4 deletions tests/models/layoutlmv3/test_modeling_layoutlmv3.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,6 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
{
"document-question-answering": LayoutLMv3ForQuestionAnswering,
"feature-extraction": LayoutLMv3Model,
"question-answering": LayoutLMv3ForQuestionAnswering,
"text-classification": LayoutLMv3ForSequenceClassification,
"token-classification": LayoutLMv3ForTokenClassification,
"zero-shot": LayoutLMv3ForSequenceClassification,
}
if is_torch_available()
else {}
Expand All @@ -302,6 +298,10 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
# `DocumentQuestionAnsweringPipeline` is expected to work with this model, but it combines the text and visual
# embedding along the sequence dimension (dim 1), which causes an error during post-processing as `p_mask` has
# the sequence dimension of the text embedding only.
# (see the line `embedding_output = torch.cat([embedding_output, visual_embeddings], dim=1)`)
return True

def setUp(self):
Expand Down
9 changes: 9 additions & 0 deletions tests/pipelines/test_pipelines_question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,20 @@
from .test_pipelines_common import ANY


# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}


@is_pipeline_test
class QAPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING

if model_mapping is not None:
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
if tf_model_mapping is not None:
tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}

def get_test_pipeline(self, model, tokenizer, processor):
if isinstance(model.config, LxmertConfig):
# This is an bimodal model, we need to find a more consistent way
Expand Down
9 changes: 9 additions & 0 deletions tests/pipelines/test_pipelines_text_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,20 @@
from .test_pipelines_common import ANY


# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}


@is_pipeline_test
class TextClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING

if model_mapping is not None:
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
if tf_model_mapping is not None:
tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}

@require_torch
def test_small_model_pt(self):
text_classifier = pipeline(
Expand Down
8 changes: 8 additions & 0 deletions tests/pipelines/test_pipelines_token_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,20 @@

VALID_INPUTS = ["A simple string", ["list of strings", "A simple string that is quite a bit longer"]]

# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}


@is_pipeline_test
class TokenClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING

if model_mapping is not None:
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
if tf_model_mapping is not None:
tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}

def get_test_pipeline(self, model, tokenizer, processor):
token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]
Expand Down
9 changes: 9 additions & 0 deletions tests/pipelines/test_pipelines_zero_shot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,20 @@
from .test_pipelines_common import ANY


# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}


@is_pipeline_test
class ZeroShotClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING

if model_mapping is not None:
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
if tf_model_mapping is not None:
tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}

def get_test_pipeline(self, model, tokenizer, processor):
classifier = ZeroShotClassificationPipeline(
model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]
Expand Down

0 comments on commit 5269718

Please sign in to comment.