Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add document token classification pipeline (#1) #21012

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/source/en/main_classes/pipelines.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,12 @@ Pipelines available for multimodal tasks include the following.
- __call__
- all

### DocumentTokenClassificationPipeline

[[autodoc]] DocumentTokenClassificationPipeline
- __call__
- all

### FeatureExtractionPipeline

[[autodoc]] FeatureExtractionPipeline
Expand Down
8 changes: 8 additions & 0 deletions docs/source/en/model_doc/auto.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,14 @@ The following auto classes are available for the following multimodal tasks.

[[autodoc]] TFAutoModelForDocumentQuestionAnswering

### AutoModelForDocumentTokenClassification

[[autodoc]] AutoModelForDocumentTokenClassification

### TFAutoModelForDocumentTokenClassification

[[autodoc]] TFAutoModelForDocumentTokenClassification

### AutoModelForVisualQuestionAnswering

[[autodoc]] AutoModelForVisualQuestionAnswering
Expand Down
10 changes: 10 additions & 0 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@
"CsvPipelineDataFormat",
"DepthEstimationPipeline",
"DocumentQuestionAnsweringPipeline",
"DocumentTokenClassificationPipeline",
"FeatureExtractionPipeline",
"FillMaskPipeline",
"ImageClassificationPipeline",
Expand Down Expand Up @@ -938,6 +939,7 @@
"MODEL_FOR_CTC_MAPPING",
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
"MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
Expand Down Expand Up @@ -970,6 +972,7 @@
"AutoModelForCTC",
"AutoModelForDepthEstimation",
"AutoModelForDocumentQuestionAnswering",
"AutoModelForDocumentTokenClassification",
"AutoModelForImageClassification",
"AutoModelForImageSegmentation",
"AutoModelForInstanceSegmentation",
Expand Down Expand Up @@ -2531,6 +2534,7 @@
[
"TF_MODEL_FOR_CAUSAL_LM_MAPPING",
"TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
"TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
"TF_MODEL_FOR_MASKED_LM_MAPPING",
Expand All @@ -2550,6 +2554,7 @@
"TFAutoModel",
"TFAutoModelForCausalLM",
"TFAutoModelForDocumentQuestionAnswering",
"TFAutoModelForDocumentTokenClassification",
"TFAutoModelForImageClassification",
"TFAutoModelForMaskedLM",
"TFAutoModelForMultipleChoice",
Expand Down Expand Up @@ -3796,6 +3801,7 @@
CsvPipelineDataFormat,
DepthEstimationPipeline,
DocumentQuestionAnsweringPipeline,
DocumentTokenClassificationPipeline,
FeatureExtractionPipeline,
FillMaskPipeline,
ImageClassificationPipeline,
Expand Down Expand Up @@ -4183,6 +4189,7 @@
MODEL_FOR_CTC_MAPPING,
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
Expand Down Expand Up @@ -4215,6 +4222,7 @@
AutoModelForCTC,
AutoModelForDepthEstimation,
AutoModelForDocumentQuestionAnswering,
AutoModelForDocumentTokenClassification,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForInstanceSegmentation,
Expand Down Expand Up @@ -5497,6 +5505,7 @@
from .models.auto import (
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
TF_MODEL_FOR_MASKED_LM_MAPPING,
Expand All @@ -5516,6 +5525,7 @@
TFAutoModel,
TFAutoModelForCausalLM,
TFAutoModelForDocumentQuestionAnswering,
TFAutoModelForDocumentTokenClassification,
TFAutoModelForImageClassification,
TFAutoModelForMaskedLM,
TFAutoModelForMultipleChoice,
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/models/auto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"MODEL_FOR_CAUSAL_LM_MAPPING",
"MODEL_FOR_CTC_MAPPING",
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
Expand Down Expand Up @@ -103,6 +104,7 @@
"AutoModelForVision2Seq",
"AutoModelForVisualQuestionAnswering",
"AutoModelForDocumentQuestionAnswering",
"AutoModelForDocumentTokenClassification",
"AutoModelWithLMHead",
"AutoModelForZeroShotObjectDetection",
]
Expand All @@ -123,6 +125,7 @@
"TF_MODEL_FOR_PRETRAINING_MAPPING",
"TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING",
"TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
"TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
"TF_MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING",
"TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING",
"TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING",
Expand All @@ -140,6 +143,7 @@
"TFAutoModelForNextSentencePrediction",
"TFAutoModelForPreTraining",
"TFAutoModelForDocumentQuestionAnswering",
"TFAutoModelForDocumentTokenClassification",
"TFAutoModelForQuestionAnswering",
"TFAutoModelForSemanticSegmentation",
"TFAutoModelForSeq2SeqLM",
Expand Down Expand Up @@ -208,6 +212,7 @@
MODEL_FOR_CTC_MAPPING,
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
Expand Down Expand Up @@ -240,6 +245,7 @@
AutoModelForCTC,
AutoModelForDepthEstimation,
AutoModelForDocumentQuestionAnswering,
AutoModelForDocumentTokenClassification,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForInstanceSegmentation,
Expand Down Expand Up @@ -273,6 +279,7 @@
from .modeling_tf_auto import (
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
TF_MODEL_FOR_MASKED_LM_MAPPING,
Expand All @@ -292,6 +299,7 @@
TFAutoModel,
TFAutoModelForCausalLM,
TFAutoModelForDocumentQuestionAnswering,
TFAutoModelForDocumentTokenClassification,
TFAutoModelForImageClassification,
TFAutoModelForMaskedLM,
TFAutoModelForMultipleChoice,
Expand Down
20 changes: 20 additions & 0 deletions src/transformers/models/auto/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,12 @@
]
)

MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
[
("layoutlmv3", "LayoutLMv3ForTokenClassification"),
]
)

MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
[
# Model for Token Classification mapping
Expand Down Expand Up @@ -926,6 +932,9 @@
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES
)
MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES
)
MODEL_FOR_MASKED_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES)
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES
Expand Down Expand Up @@ -1061,6 +1070,17 @@ class AutoModelForDocumentQuestionAnswering(_BaseAutoModelClass):
)


class AutoModelForDocumentTokenClassification(_BaseAutoModelClass):
_model_mapping = MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING


AutoModelForDocumentTokenClassification = auto_class_update(
AutoModelForDocumentTokenClassification,
head_doc="document token classification",
checkpoint_for_example='microsoft/layoutlmv3-base", revision="07c9b08',
)


class AutoModelForTokenClassification(_BaseAutoModelClass):
_model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING

Expand Down
19 changes: 19 additions & 0 deletions src/transformers/models/auto/modeling_tf_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,11 @@
]
)

TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
[
("layoutlmv3", "TFLayoutLMv3ForTokenClassification"),
]
)

TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES = OrderedDict(
[
Expand Down Expand Up @@ -442,6 +447,9 @@
TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES
)
TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES
)
TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES
)
Expand Down Expand Up @@ -562,6 +570,17 @@ class TFAutoModelForDocumentQuestionAnswering(_BaseAutoModelClass):
)


class TFAutoModelForDocumentTokenClassification(_BaseAutoModelClass):
_model_mapping = TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING


TFAutoModelForDocumentTokenClassification = auto_class_update(
TFAutoModelForDocumentTokenClassification,
head_doc="document token classification",
checkpoint_for_example='microsoft/layoutlmv3-base", revision="07c9b08',
)


class TFAutoModelForTableQuestionAnswering(_BaseAutoModelClass):
_model_mapping = TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING

Expand Down
14 changes: 14 additions & 0 deletions src/transformers/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
from .conversational import Conversation, ConversationalPipeline
from .depth_estimation import DepthEstimationPipeline
from .document_question_answering import DocumentQuestionAnsweringPipeline
from .document_token_classification import DocumentTokenClassificationPipeline
from .feature_extraction import FeatureExtractionPipeline
from .fill_mask import FillMaskPipeline
from .image_classification import ImageClassificationPipeline
Expand Down Expand Up @@ -123,6 +124,7 @@
AutoModelForCausalLM,
AutoModelForCTC,
AutoModelForDocumentQuestionAnswering,
AutoModelForDocumentTokenClassification,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForMaskedLM,
Expand Down Expand Up @@ -240,6 +242,18 @@
},
"type": "multimodal",
},
"document-token-classification": {
"impl": DocumentTokenClassificationPipeline,
"pt": (AutoModelForDocumentTokenClassification,) if is_torch_available() else (),
"tf": (),
"default": {
"model": {
"pt": ("microsoft/layoutlmv3-base", "07c9b08"),
"tf": ("microsoft/layoutlmv3-base", "07c9b08"),
},
},
"type": "multimodal",
},
"fill-mask": {
"impl": FillMaskPipeline,
"tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (),
Expand Down
Loading