Skip to content
This repository has been archived by the owner on Jan 31, 2024. It is now read-only.

Add document token classification pipeline #1

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/source/en/main_classes/pipelines.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,12 @@ Pipelines available for multimodal tasks include the following.
- __call__
- all

### DocumentTokenClassificationPipeline

[[autodoc]] DocumentTokenClassificationPipeline
- __call__
- all

### FeatureExtractionPipeline

[[autodoc]] FeatureExtractionPipeline
Expand Down
8 changes: 8 additions & 0 deletions docs/source/en/model_doc/auto.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,14 @@ The following auto classes are available for the following multimodal tasks.

[[autodoc]] TFAutoModelForDocumentQuestionAnswering

### AutoModelForDocumentTokenClassification

[[autodoc]] AutoModelForDocumentTokenClassification

### TFAutoModelForDocumentTokenClassification

[[autodoc]] TFAutoModelForDocumentTokenClassification

### AutoModelForVisualQuestionAnswering

[[autodoc]] AutoModelForVisualQuestionAnswering
Expand Down
10 changes: 10 additions & 0 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@
"CsvPipelineDataFormat",
"DepthEstimationPipeline",
"DocumentQuestionAnsweringPipeline",
"DocumentTokenClassificationPipeline",
"FeatureExtractionPipeline",
"FillMaskPipeline",
"ImageClassificationPipeline",
Expand Down Expand Up @@ -938,6 +939,7 @@
"MODEL_FOR_CTC_MAPPING",
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
"MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
Expand Down Expand Up @@ -970,6 +972,7 @@
"AutoModelForCTC",
"AutoModelForDepthEstimation",
"AutoModelForDocumentQuestionAnswering",
"AutoModelForDocumentTokenClassification",
"AutoModelForImageClassification",
"AutoModelForImageSegmentation",
"AutoModelForInstanceSegmentation",
Expand Down Expand Up @@ -2531,6 +2534,7 @@
[
"TF_MODEL_FOR_CAUSAL_LM_MAPPING",
"TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
"TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
"TF_MODEL_FOR_MASKED_LM_MAPPING",
Expand All @@ -2550,6 +2554,7 @@
"TFAutoModel",
"TFAutoModelForCausalLM",
"TFAutoModelForDocumentQuestionAnswering",
"TFAutoModelForDocumentTokenClassification",
"TFAutoModelForImageClassification",
"TFAutoModelForMaskedLM",
"TFAutoModelForMultipleChoice",
Expand Down Expand Up @@ -3796,6 +3801,7 @@
CsvPipelineDataFormat,
DepthEstimationPipeline,
DocumentQuestionAnsweringPipeline,
DocumentTokenClassificationPipeline,
FeatureExtractionPipeline,
FillMaskPipeline,
ImageClassificationPipeline,
Expand Down Expand Up @@ -4183,6 +4189,7 @@
MODEL_FOR_CTC_MAPPING,
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
Expand Down Expand Up @@ -4215,6 +4222,7 @@
AutoModelForCTC,
AutoModelForDepthEstimation,
AutoModelForDocumentQuestionAnswering,
AutoModelForDocumentTokenClassification,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForInstanceSegmentation,
Expand Down Expand Up @@ -5497,6 +5505,7 @@
from .models.auto import (
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
TF_MODEL_FOR_MASKED_LM_MAPPING,
Expand All @@ -5516,6 +5525,7 @@
TFAutoModel,
TFAutoModelForCausalLM,
TFAutoModelForDocumentQuestionAnswering,
TFAutoModelForDocumentTokenClassification,
TFAutoModelForImageClassification,
TFAutoModelForMaskedLM,
TFAutoModelForMultipleChoice,
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/models/auto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"MODEL_FOR_CAUSAL_LM_MAPPING",
"MODEL_FOR_CTC_MAPPING",
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
Expand Down Expand Up @@ -103,6 +104,7 @@
"AutoModelForVision2Seq",
"AutoModelForVisualQuestionAnswering",
"AutoModelForDocumentQuestionAnswering",
"AutoModelForDocumentTokenClassification",
"AutoModelWithLMHead",
"AutoModelForZeroShotObjectDetection",
]
Expand All @@ -123,6 +125,7 @@
"TF_MODEL_FOR_PRETRAINING_MAPPING",
"TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING",
"TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"TF_MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING",
"TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING",
"TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING",
Expand All @@ -140,6 +143,7 @@
"TFAutoModelForNextSentencePrediction",
"TFAutoModelForPreTraining",
"TFAutoModelForDocumentQuestionAnswering",
"TFAutoModelForDocumentTokenClassification",
"TFAutoModelForQuestionAnswering",
"TFAutoModelForSemanticSegmentation",
"TFAutoModelForSeq2SeqLM",
Expand Down Expand Up @@ -208,6 +212,7 @@
MODEL_FOR_CTC_MAPPING,
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
Expand Down Expand Up @@ -240,6 +245,7 @@
AutoModelForCTC,
AutoModelForDepthEstimation,
AutoModelForDocumentQuestionAnswering,
AutoModelForDocumentTokenClassification,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForInstanceSegmentation,
Expand Down Expand Up @@ -273,6 +279,7 @@
from .modeling_tf_auto import (
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
TF_MODEL_FOR_MASKED_LM_MAPPING,
Expand All @@ -292,6 +299,7 @@
TFAutoModel,
TFAutoModelForCausalLM,
TFAutoModelForDocumentQuestionAnswering,
TFAutoModelForDocumentTokenClassification,
TFAutoModelForImageClassification,
TFAutoModelForMaskedLM,
TFAutoModelForMultipleChoice,
Expand Down
18 changes: 18 additions & 0 deletions src/transformers/models/auto/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,12 @@
]
)

MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
[
("layoutlmv3", "LayoutLMv3ForTokenClassification"),
]
)

MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
[
# Model for Token Classification mapping
Expand Down Expand Up @@ -926,6 +932,9 @@
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES
)
MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES
)
MODEL_FOR_MASKED_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES)
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES
Expand Down Expand Up @@ -1060,6 +1069,15 @@ class AutoModelForDocumentQuestionAnswering(_BaseAutoModelClass):
checkpoint_for_example='impira/layoutlm-document-qa", revision="52e01b3',
)

class AutoModelForDocumentTokenClassification(_BaseAutoModelClass):
_model_mapping = MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING

AutoModelForDocumentTokenClassification = auto_class_update(
AutoModelForDocumentTokenClassification,
head_doc="document token classification",
checkpoint_for_example='microsoft/layoutlmv3-base", revision="07c9b08',
)


class AutoModelForTokenClassification(_BaseAutoModelClass):
_model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
Expand Down
16 changes: 16 additions & 0 deletions src/transformers/models/auto/modeling_tf_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,11 @@
]
)

TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
[
("layoutlmv3", "TFLayoutLMv3ForTokenClassification"),
]
)

TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES = OrderedDict(
[
Expand Down Expand Up @@ -442,6 +447,9 @@
TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES
)
TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES
)
TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES
)
Expand Down Expand Up @@ -561,6 +569,14 @@ class TFAutoModelForDocumentQuestionAnswering(_BaseAutoModelClass):
checkpoint_for_example='impira/layoutlm-document-qa", revision="52e01b3',
)

class TFAutoModelForDocumentTokenClassification(_BaseAutoModelClass):
_model_mapping = TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING

TFAutoModelForDocumentTokenClassification = auto_class_update(
TFAutoModelForDocumentTokenClassification,
head_doc="document token classification",
checkpoint_for_example='microsoft/layoutlmv3-base", revision="07c9b08',
)

class TFAutoModelForTableQuestionAnswering(_BaseAutoModelClass):
_model_mapping = TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
Expand Down
14 changes: 14 additions & 0 deletions src/transformers/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
from .conversational import Conversation, ConversationalPipeline
from .depth_estimation import DepthEstimationPipeline
from .document_question_answering import DocumentQuestionAnsweringPipeline
from .document_token_classification import DocumentTokenClassificationPipeline
from .feature_extraction import FeatureExtractionPipeline
from .fill_mask import FillMaskPipeline
from .image_classification import ImageClassificationPipeline
Expand Down Expand Up @@ -123,6 +124,7 @@
AutoModelForCausalLM,
AutoModelForCTC,
AutoModelForDocumentQuestionAnswering,
AutoModelForDocumentTokenClassification,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForMaskedLM,
Expand Down Expand Up @@ -240,6 +242,18 @@
},
"type": "multimodal",
},
"document-token-classification": {
"impl": DocumentTokenClassificationPipeline,
"pt": (AutoModelForDocumentTokenClassification,) if is_torch_available() else (),
"tf": (),
"default": {
"model": {
"pt": ("microsoft/layoutlmv3-base", "07c9b08"),
"tf": ("microsoft/layoutlmv3-base", "07c9b08"),
},
},
"type": "multimodal",
},
"fill-mask": {
"impl": FillMaskPipeline,
"tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (),
Expand Down
Loading