Skip to content
This repository has been archived by the owner on Jan 31, 2024. It is now read-only.

Add document token classification pipeline #1

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/source/en/main_classes/pipelines.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,12 @@ Pipelines available for multimodal tasks include the following.
- __call__
- all

### DocumentTokenClassificationPipeline

[[autodoc]] DocumentTokenClassificationPipeline
- __call__
- all

### FeatureExtractionPipeline

[[autodoc]] FeatureExtractionPipeline
Expand Down
8 changes: 8 additions & 0 deletions docs/source/en/model_doc/auto.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,14 @@ The following auto classes are available for the following multimodal tasks.

[[autodoc]] TFAutoModelForDocumentQuestionAnswering

### AutoModelForDocumentTokenClassification

[[autodoc]] AutoModelForDocumentTokenClassification

### TFAutoModelForDocumentTokenClassification

[[autodoc]] TFAutoModelForDocumentTokenClassification

### AutoModelForVisualQuestionAnswering

[[autodoc]] AutoModelForVisualQuestionAnswering
Expand Down
10 changes: 10 additions & 0 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@
"CsvPipelineDataFormat",
"DepthEstimationPipeline",
"DocumentQuestionAnsweringPipeline",
"DocumentTokenClassificationPipeline",
"FeatureExtractionPipeline",
"FillMaskPipeline",
"ImageClassificationPipeline",
Expand Down Expand Up @@ -938,6 +939,7 @@
"MODEL_FOR_CTC_MAPPING",
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
"MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
Expand Down Expand Up @@ -970,6 +972,7 @@
"AutoModelForCTC",
"AutoModelForDepthEstimation",
"AutoModelForDocumentQuestionAnswering",
"AutoModelForDocumentTokenClassification",
"AutoModelForImageClassification",
"AutoModelForImageSegmentation",
"AutoModelForInstanceSegmentation",
Expand Down Expand Up @@ -2531,6 +2534,7 @@
[
"TF_MODEL_FOR_CAUSAL_LM_MAPPING",
"TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
"TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
"TF_MODEL_FOR_MASKED_LM_MAPPING",
Expand All @@ -2550,6 +2554,7 @@
"TFAutoModel",
"TFAutoModelForCausalLM",
"TFAutoModelForDocumentQuestionAnswering",
"TFAutoModelForDocumentTokenClassification",
"TFAutoModelForImageClassification",
"TFAutoModelForMaskedLM",
"TFAutoModelForMultipleChoice",
Expand Down Expand Up @@ -3796,6 +3801,7 @@
CsvPipelineDataFormat,
DepthEstimationPipeline,
DocumentQuestionAnsweringPipeline,
DocumentTokenClassificationPipeline,
FeatureExtractionPipeline,
FillMaskPipeline,
ImageClassificationPipeline,
Expand Down Expand Up @@ -4183,6 +4189,7 @@
MODEL_FOR_CTC_MAPPING,
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
Expand Down Expand Up @@ -4215,6 +4222,7 @@
AutoModelForCTC,
AutoModelForDepthEstimation,
AutoModelForDocumentQuestionAnswering,
AutoModelForDocumentTokenClassification,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForInstanceSegmentation,
Expand Down Expand Up @@ -5497,6 +5505,7 @@
from .models.auto import (
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
TF_MODEL_FOR_MASKED_LM_MAPPING,
Expand All @@ -5516,6 +5525,7 @@
TFAutoModel,
TFAutoModelForCausalLM,
TFAutoModelForDocumentQuestionAnswering,
TFAutoModelForDocumentTokenClassification,
TFAutoModelForImageClassification,
TFAutoModelForMaskedLM,
TFAutoModelForMultipleChoice,
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/models/auto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"MODEL_FOR_CAUSAL_LM_MAPPING",
"MODEL_FOR_CTC_MAPPING",
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
Expand Down Expand Up @@ -103,6 +104,7 @@
"AutoModelForVision2Seq",
"AutoModelForVisualQuestionAnswering",
"AutoModelForDocumentQuestionAnswering",
"AutoModelForDocumentTokenClassification",
"AutoModelWithLMHead",
"AutoModelForZeroShotObjectDetection",
]
Expand All @@ -123,6 +125,7 @@
"TF_MODEL_FOR_PRETRAINING_MAPPING",
"TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING",
"TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"TF_MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING",
"TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING",
"TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING",
Expand All @@ -140,6 +143,7 @@
"TFAutoModelForNextSentencePrediction",
"TFAutoModelForPreTraining",
"TFAutoModelForDocumentQuestionAnswering",
"TFAutoModelForDocumentTokenClassification",
"TFAutoModelForQuestionAnswering",
"TFAutoModelForSemanticSegmentation",
"TFAutoModelForSeq2SeqLM",
Expand Down Expand Up @@ -208,6 +212,7 @@
MODEL_FOR_CTC_MAPPING,
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
Expand Down Expand Up @@ -240,6 +245,7 @@
AutoModelForCTC,
AutoModelForDepthEstimation,
AutoModelForDocumentQuestionAnswering,
AutoModelForDocumentTokenClassification,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForInstanceSegmentation,
Expand Down Expand Up @@ -273,6 +279,7 @@
from .modeling_tf_auto import (
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
TF_MODEL_FOR_MASKED_LM_MAPPING,
Expand All @@ -292,6 +299,7 @@
TFAutoModel,
TFAutoModelForCausalLM,
TFAutoModelForDocumentQuestionAnswering,
TFAutoModelForDocumentTokenClassification,
TFAutoModelForImageClassification,
TFAutoModelForMaskedLM,
TFAutoModelForMultipleChoice,
Expand Down
18 changes: 18 additions & 0 deletions src/transformers/models/auto/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,12 @@
]
)

MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
[
("layoutlmv3", "LayoutLMv3ForTokenClassification"),
]
)

MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
[
# Model for Token Classification mapping
Expand Down Expand Up @@ -926,6 +932,9 @@
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES
)
MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES
)
MODEL_FOR_MASKED_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES)
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES
Expand Down Expand Up @@ -1060,6 +1069,15 @@ class AutoModelForDocumentQuestionAnswering(_BaseAutoModelClass):
checkpoint_for_example='impira/layoutlm-document-qa", revision="52e01b3',
)

class AutoModelForDocumentTokenClassification(_BaseAutoModelClass):
_model_mapping = MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING

AutoModelForDocumentTokenClassification = auto_class_update(
AutoModelForDocumentTokenClassification,
head_doc="document token classification",
checkpoint_for_example='microsoft/layoutlmv3-base", revision="07c9b08',
)


class AutoModelForTokenClassification(_BaseAutoModelClass):
_model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
Expand Down
16 changes: 16 additions & 0 deletions src/transformers/models/auto/modeling_tf_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,11 @@
]
)

TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
[
("layoutlmv3", "TFLayoutLMv3ForTokenClassification"),
]
)

TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES = OrderedDict(
[
Expand Down Expand Up @@ -442,6 +447,9 @@
TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES
)
TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES
)
TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES
)
Expand Down Expand Up @@ -561,6 +569,14 @@ class TFAutoModelForDocumentQuestionAnswering(_BaseAutoModelClass):
checkpoint_for_example='impira/layoutlm-document-qa", revision="52e01b3',
)

class TFAutoModelForDocumentTokenClassification(_BaseAutoModelClass):
_model_mapping = TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING

TFAutoModelForDocumentTokenClassification = auto_class_update(
TFAutoModelForDocumentTokenClassification,
head_doc="document token classification",
checkpoint_for_example='microsoft/layoutlmv3-base", revision="07c9b08',
)

class TFAutoModelForTableQuestionAnswering(_BaseAutoModelClass):
_model_mapping = TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
Expand Down
14 changes: 14 additions & 0 deletions src/transformers/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
from .conversational import Conversation, ConversationalPipeline
from .depth_estimation import DepthEstimationPipeline
from .document_question_answering import DocumentQuestionAnsweringPipeline
from .document_token_classification import DocumentTokenClassificationPipeline
from .feature_extraction import FeatureExtractionPipeline
from .fill_mask import FillMaskPipeline
from .image_classification import ImageClassificationPipeline
Expand Down Expand Up @@ -123,6 +124,7 @@
AutoModelForCausalLM,
AutoModelForCTC,
AutoModelForDocumentQuestionAnswering,
AutoModelForDocumentTokenClassification,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForMaskedLM,
Expand Down Expand Up @@ -240,6 +242,18 @@
},
"type": "multimodal",
},
"document-token-classification": {
"impl": DocumentTokenClassificationPipeline,
"pt": (AutoModelForDocumentTokenClassification,) if is_torch_available() else (),
"tf": (),
"default": {
"model": {
"pt": ("microsoft/layoutlmv3-base", "07c9b08"),
"tf": ("microsoft/layoutlmv3-base", "07c9b08"),
},
},
"type": "multimodal",
},
"fill-mask": {
"impl": FillMaskPipeline,
"tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (),
Expand Down
Loading