loop-payments · vaishak2future · Jan 4, 2023 · Dec 29, 2022 · Dec 29, 2022 · Dec 29, 2022
diff --git a/docs/source/en/main_classes/pipelines.mdx b/docs/source/en/main_classes/pipelines.mdx
@@ -446,6 +446,12 @@ Pipelines available for multimodal tasks include the following.
  - __call__
  - all
 
+### DocumentTokenClassificationPipeline
+
+[[autodoc]] DocumentTokenClassificationPipeline
+ - __call__
+ - all
+
 ### FeatureExtractionPipeline
 
 [[autodoc]] FeatureExtractionPipeline

diff --git a/docs/source/en/model_doc/auto.mdx b/docs/source/en/model_doc/auto.mdx
@@ -310,6 +310,14 @@ The following auto classes are available for the following multimodal tasks.
 
 [[autodoc]] TFAutoModelForDocumentQuestionAnswering
 
+### AutoModelForDocumentTokenClassification
+
+[[autodoc]] AutoModelForDocumentTokenClassification
+
+### TFAutoModelForDocumentTokenClassification
+
+[[autodoc]] TFAutoModelForDocumentTokenClassification
+
 ### AutoModelForVisualQuestionAnswering
 
 [[autodoc]] AutoModelForVisualQuestionAnswering

diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -481,6 +481,7 @@
  "CsvPipelineDataFormat",
  "DepthEstimationPipeline",
  "DocumentQuestionAnsweringPipeline",
+ "DocumentTokenClassificationPipeline",
  "FeatureExtractionPipeline",
  "FillMaskPipeline",
  "ImageClassificationPipeline",
@@ -938,6 +939,7 @@
  "MODEL_FOR_CTC_MAPPING",
  "MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
  "MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
+ "MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
  "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
  "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
  "MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
@@ -970,6 +972,7 @@
  "AutoModelForCTC",
  "AutoModelForDepthEstimation",
  "AutoModelForDocumentQuestionAnswering",
+ "AutoModelForDocumentTokenClassification",
  "AutoModelForImageClassification",
  "AutoModelForImageSegmentation",
  "AutoModelForInstanceSegmentation",
@@ -2531,6 +2534,7 @@
  [
  "TF_MODEL_FOR_CAUSAL_LM_MAPPING",
  "TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
+ "TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
  "TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
  "TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
  "TF_MODEL_FOR_MASKED_LM_MAPPING",
@@ -2550,6 +2554,7 @@
  "TFAutoModel",
  "TFAutoModelForCausalLM",
  "TFAutoModelForDocumentQuestionAnswering",
+ "TFAutoModelForDocumentTokenClassification",
  "TFAutoModelForImageClassification",
  "TFAutoModelForMaskedLM",
  "TFAutoModelForMultipleChoice",
@@ -3796,6 +3801,7 @@
  CsvPipelineDataFormat,
  DepthEstimationPipeline,
  DocumentQuestionAnsweringPipeline,
+ DocumentTokenClassificationPipeline,
  FeatureExtractionPipeline,
  FillMaskPipeline,
  ImageClassificationPipeline,
@@ -4183,6 +4189,7 @@
  MODEL_FOR_CTC_MAPPING,
  MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
  MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
+ MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
  MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
  MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
  MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
@@ -4215,6 +4222,7 @@
  AutoModelForCTC,
  AutoModelForDepthEstimation,
  AutoModelForDocumentQuestionAnswering,
+ AutoModelForDocumentTokenClassification,
  AutoModelForImageClassification,
  AutoModelForImageSegmentation,
  AutoModelForInstanceSegmentation,
@@ -5497,6 +5505,7 @@
  from .models.auto import (
  TF_MODEL_FOR_CAUSAL_LM_MAPPING,
  TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
+ TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
  TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
  TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
  TF_MODEL_FOR_MASKED_LM_MAPPING,
@@ -5516,6 +5525,7 @@
  TFAutoModel,
  TFAutoModelForCausalLM,
  TFAutoModelForDocumentQuestionAnswering,
+ TFAutoModelForDocumentTokenClassification,
  TFAutoModelForImageClassification,
  TFAutoModelForMaskedLM,
  TFAutoModelForMultipleChoice,

diff --git a/src/transformers/models/auto/__init__.py b/src/transformers/models/auto/__init__.py
@@ -50,6 +50,7 @@
  "MODEL_FOR_CAUSAL_LM_MAPPING",
  "MODEL_FOR_CTC_MAPPING",
  "MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
+ "MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
  "MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
  "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
  "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
@@ -103,6 +104,7 @@
  "AutoModelForVision2Seq",
  "AutoModelForVisualQuestionAnswering",
  "AutoModelForDocumentQuestionAnswering",
+ "AutoModelForDocumentTokenClassification",
  "AutoModelWithLMHead",
  "AutoModelForZeroShotObjectDetection",
  ]
@@ -123,6 +125,7 @@
  "TF_MODEL_FOR_PRETRAINING_MAPPING",
  "TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING",
  "TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
+ "TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING",
  "TF_MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING",
  "TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING",
  "TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING",
@@ -140,6 +143,7 @@
  "TFAutoModelForNextSentencePrediction",
  "TFAutoModelForPreTraining",
  "TFAutoModelForDocumentQuestionAnswering",
+ "TFAutoModelForDocumentTokenClassification",
  "TFAutoModelForQuestionAnswering",
  "TFAutoModelForSemanticSegmentation",
  "TFAutoModelForSeq2SeqLM",
@@ -208,6 +212,7 @@
  MODEL_FOR_CTC_MAPPING,
  MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
  MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
+ MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
  MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
  MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
  MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
@@ -240,6 +245,7 @@
  AutoModelForCTC,
  AutoModelForDepthEstimation,
  AutoModelForDocumentQuestionAnswering,
+ AutoModelForDocumentTokenClassification,
  AutoModelForImageClassification,
  AutoModelForImageSegmentation,
  AutoModelForInstanceSegmentation,
@@ -273,6 +279,7 @@
  from .modeling_tf_auto import (
  TF_MODEL_FOR_CAUSAL_LM_MAPPING,
  TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
+ TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING,
  TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
  TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
  TF_MODEL_FOR_MASKED_LM_MAPPING,
@@ -292,6 +299,7 @@
  TFAutoModel,
  TFAutoModelForCausalLM,
  TFAutoModelForDocumentQuestionAnswering,
+ TFAutoModelForDocumentTokenClassification,
  TFAutoModelForImageClassification,
  TFAutoModelForMaskedLM,
  TFAutoModelForMultipleChoice,

diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
@@ -716,6 +716,12 @@
  ]
 )
 
+MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
+ [
+ ("layoutlmv3", "LayoutLMv3ForTokenClassification"),
+ ]
+)
+
 MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
  [
  # Model for Token Classification mapping
@@ -926,6 +932,9 @@
 MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
  CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES
 )
+MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING = _LazyAutoMapping(
+ CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES
+)
 MODEL_FOR_MASKED_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES)
 MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = _LazyAutoMapping(
  CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES
@@ -1060,6 +1069,15 @@ class AutoModelForDocumentQuestionAnswering(_BaseAutoModelClass):
  checkpoint_for_example='impira/layoutlm-document-qa", revision="52e01b3',
 )
 
+class AutoModelForDocumentTokenClassification(_BaseAutoModelClass):
+ _model_mapping = MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING
+
+AutoModelForDocumentTokenClassification = auto_class_update(
+ AutoModelForDocumentTokenClassification,
+ head_doc="document token classification",
+ checkpoint_for_example='microsoft/layoutlmv3-base", revision="07c9b08',
+)
+
 
 class AutoModelForTokenClassification(_BaseAutoModelClass):
  _model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING

diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py
@@ -344,6 +344,11 @@
  ]
 )
 
+TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
+ [
+ ("layoutlmv3", "TFLayoutLMv3ForTokenClassification"),
+ ]
+)
 
 TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES = OrderedDict(
  [
@@ -442,6 +447,9 @@
 TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
  CONFIG_MAPPING_NAMES, TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES
 )
+TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING = _LazyAutoMapping(
+ CONFIG_MAPPING_NAMES, TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING_NAMES
+)
 TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
  CONFIG_MAPPING_NAMES, TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES
 )
@@ -561,6 +569,14 @@ class TFAutoModelForDocumentQuestionAnswering(_BaseAutoModelClass):
  checkpoint_for_example='impira/layoutlm-document-qa", revision="52e01b3',
 )
 
+class TFAutoModelForDocumentTokenClassification(_BaseAutoModelClass):
+ _model_mapping = TF_MODEL_FOR_DOCUMENT_TOKEN_CLASSIFICATION_MAPPING
+
+TFAutoModelForDocumentTokenClassification = auto_class_update(
+ TFAutoModelForDocumentTokenClassification,
+ head_doc="document token classification",
+ checkpoint_for_example='microsoft/layoutlmv3-base", revision="07c9b08',
+)
 
 class TFAutoModelForTableQuestionAnswering(_BaseAutoModelClass):
  _model_mapping = TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING

diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py
@@ -62,6 +62,7 @@
 from .conversational import Conversation, ConversationalPipeline
 from .depth_estimation import DepthEstimationPipeline
 from .document_question_answering import DocumentQuestionAnsweringPipeline
+from .document_token_classification import DocumentTokenClassificationPipeline
 from .feature_extraction import FeatureExtractionPipeline
 from .fill_mask import FillMaskPipeline
 from .image_classification import ImageClassificationPipeline
@@ -123,6 +124,7 @@
  AutoModelForCausalLM,
  AutoModelForCTC,
  AutoModelForDocumentQuestionAnswering,
+ AutoModelForDocumentTokenClassification,
  AutoModelForImageClassification,
  AutoModelForImageSegmentation,
  AutoModelForMaskedLM,
@@ -240,6 +242,18 @@
  },
  "type": "multimodal",
  },
+ "document-token-classification": {
+ "impl": DocumentTokenClassificationPipeline,
+ "pt": (AutoModelForDocumentTokenClassification,) if is_torch_available() else (),
+ "tf": (),
+ "default": {
+ "model": {
+ "pt": ("microsoft/layoutlmv3-base", "07c9b08"),
+ "tf": ("microsoft/layoutlmv3-base", "07c9b08"),
+ },
+ },
+ "type": "multimodal",
+ },
  "fill-mask": {
  "impl": FillMaskPipeline,
  "tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (),