From 9fb42f3bb9d7d9ec92624f6829c9eb5129ea67a0 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Sun, 18 Sep 2022 19:09:12 +0200 Subject: [PATCH 1/9] Add first draft --- docs/source/en/model_doc/layoutlmv2.mdx | 4 + src/transformers/__init__.py | 2 + .../models/layoutlmv2/__init__.py | 2 + .../models/layoutlmv2/modeling_layoutlmv2.py | 247 ++++++++++++++++++ src/transformers/utils/dummy_pt_objects.py | 7 + .../layoutlmv2/test_modeling_layoutlmv2.py | 2 + 6 files changed, 264 insertions(+) diff --git a/docs/source/en/model_doc/layoutlmv2.mdx b/docs/source/en/model_doc/layoutlmv2.mdx index e40a3cfc8d8a6b..b0ea6139130c82 100644 --- a/docs/source/en/model_doc/layoutlmv2.mdx +++ b/docs/source/en/model_doc/layoutlmv2.mdx @@ -307,3 +307,7 @@ print(encoding.keys()) ## LayoutLMv2ForQuestionAnswering [[autodoc]] LayoutLMv2ForQuestionAnswering + +## LayoutLMv2ForRelationExtraction + +[[autodoc]] LayoutLMv2ForRelationExtraction \ No newline at end of file diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 3c3a3a50064162..49dd9296b57860 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -1379,6 +1379,7 @@ "LayoutLMv2ForTokenClassification", "LayoutLMv2Model", "LayoutLMv2PreTrainedModel", + "LayoutLMv2ForRelationExtraction", ] ) _import_structure["models.layoutlmv3"].extend( @@ -4080,6 +4081,7 @@ from .models.layoutlmv2 import ( LAYOUTLMV2_PRETRAINED_MODEL_ARCHIVE_LIST, LayoutLMv2ForQuestionAnswering, + LayoutLMv2ForRelationExtraction, LayoutLMv2ForSequenceClassification, LayoutLMv2ForTokenClassification, LayoutLMv2Model, diff --git a/src/transformers/models/layoutlmv2/__init__.py b/src/transformers/models/layoutlmv2/__init__.py index beaacb815843d0..802b1875920a2f 100644 --- a/src/transformers/models/layoutlmv2/__init__.py +++ b/src/transformers/models/layoutlmv2/__init__.py @@ -63,6 +63,7 @@ "LayoutLMv2Layer", "LayoutLMv2Model", "LayoutLMv2PreTrainedModel", + "LayoutLMv2ForRelationExtraction", ] if TYPE_CHECKING: @@ -95,6 +96,7 @@ from .modeling_layoutlmv2 import ( LAYOUTLMV2_PRETRAINED_MODEL_ARCHIVE_LIST, LayoutLMv2ForQuestionAnswering, + LayoutLMv2ForRelationExtraction, LayoutLMv2ForSequenceClassification, LayoutLMv2ForTokenClassification, LayoutLMv2Layer, diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index be31af99d6dfd8..bcfbb3f98ce4f9 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -15,6 +15,8 @@ """ PyTorch LayoutLMv2 model.""" import math +from copy import copy +from dataclasses import dataclass from typing import Optional, Tuple, Union import torch @@ -33,6 +35,7 @@ from ...modeling_utils import PreTrainedModel from ...pytorch_utils import apply_chunking_to_forward, torch_int_div from ...utils import ( + ModelOutput, add_start_docstrings, add_start_docstrings_to_model_forward, is_detectron2_available, @@ -61,6 +64,32 @@ ] +@dataclass +class RelationExtractionOutput(ModelOutput): + """ + Class for outputs of [`LayoutLMv2ForRelationExtraction`]. + + Args: + loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided): + Classification (or regression if config.num_labels==1) loss. + logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`): + Classification (or regression if config.num_labels==1) scores (before SoftMax). + hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): + Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, + + one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of + the model at the output of each layer plus the optional initial embedding outputs. + attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): + Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, + sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in + the self-attention heads. + """ + + loss: Optional[torch.FloatTensor] = None + logits: torch.FloatTensor = None + hidden_states: Optional[Tuple[torch.FloatTensor]] = None + attentions: Optional[Tuple[torch.FloatTensor]] = None + + class LayoutLMv2Embeddings(nn.Module): """Construct the embeddings from word, position and token_type embeddings.""" @@ -1424,3 +1453,221 @@ def forward( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) + + +class BiaffineAttention(torch.nn.Module): + """Implements a biaffine attention operator for binary relation classification. + Args: + PyTorch implementation of the biaffine attention operator from "End-to-end neural relation extraction using deep + biaffine attention" (https://arxiv.org/abs/1812.11275) which can be used as a classifier for binary relation + classification. + in_features (int): The size of the feature dimension of the inputs. out_features (int): The size of the feature + dimension of the output. + Shape: + - x_1: `(N, *, in_features)` where `N` is the batch dimension and `*` means any number of additional + dimensisons. + - x_2: `(N, *, in_features)`, where `N` is the batch dimension and `*` means any number of additional + dimensions. + - Output: `(N, *, out_features)`, where `N` is the batch dimension and `*` means any number + of additional dimensions. + Examples: + >>> batch_size, in_features, out_features = 32, 100, 4 >>> biaffine_attention = BiaffineAttention(in_features, + out_features) >>> x_1 = torch.randn(batch_size, in_features) >>> x_2 = torch.randn(batch_size, in_features) >>> + output = biaffine_attention(x_1, x_2) >>> print(output.size()) torch.Size([32, 4]) + """ + + def __init__(self, in_features, out_features): + super(BiaffineAttention, self).__init__() + + self.in_features = in_features + self.out_features = out_features + + self.bilinear = torch.nn.Bilinear(in_features, in_features, out_features, bias=False) + self.linear = torch.nn.Linear(2 * in_features, out_features, bias=True) + + self.reset_parameters() + + def forward(self, x_1, x_2): + return self.bilinear(x_1, x_2) + self.linear(torch.cat((x_1, x_2), dim=-1)) + + def reset_parameters(self): + self.bilinear.reset_parameters() + self.linear.reset_parameters() + + +class LayoutLMv2RelationExtractionDecoder(nn.Module): + def __init__(self, config): + super().__init__() + self.entity_emb = nn.Embedding(3, config.hidden_size, scale_grad_by_freq=True) + projection = nn.Sequential( + nn.Linear(config.hidden_size * 2, config.hidden_size), + nn.ReLU(), + nn.Dropout(config.hidden_dropout_prob), + nn.Linear(config.hidden_size, config.hidden_size // 2), + nn.ReLU(), + nn.Dropout(config.hidden_dropout_prob), + ) + self.ffnn_head = copy.deepcopy(projection) + self.ffnn_tail = copy.deepcopy(projection) + self.rel_classifier = BiaffineAttention(config.hidden_size // 2, 2) + self.loss_fct = CrossEntropyLoss() + + def build_relation(self, relations, entities): + batch_size = len(relations) + new_relations = [] + for b in range(batch_size): + if len(entities[b]["start"]) <= 2: + entities[b] = {"end": [1, 1], "label": [0, 0], "start": [0, 0]} + all_possible_relations = set( + [ + (i, j) + for i in range(len(entities[b]["label"])) + for j in range(len(entities[b]["label"])) + if entities[b]["label"][i] == 1 and entities[b]["label"][j] == 2 + ] + ) + if len(all_possible_relations) == 0: + all_possible_relations = set([(0, 1)]) + positive_relations = set(list(zip(relations[b]["head"], relations[b]["tail"]))) + negative_relations = all_possible_relations - positive_relations + positive_relations = set([i for i in positive_relations if i in all_possible_relations]) + reordered_relations = list(positive_relations) + list(negative_relations) + relation_per_doc = {"head": [], "tail": [], "label": []} + relation_per_doc["head"] = [i[0] for i in reordered_relations] + relation_per_doc["tail"] = [i[1] for i in reordered_relations] + relation_per_doc["label"] = [1] * len(positive_relations) + [0] * ( + len(reordered_relations) - len(positive_relations) + ) + assert len(relation_per_doc["head"]) != 0 + new_relations.append(relation_per_doc) + return new_relations, entities + + def get_predicted_relations(self, logits, relations, entities): + pred_relations = [] + for i, pred_label in enumerate(logits.argmax(-1)): + if pred_label != 1: + continue + rel = {} + rel["head_id"] = relations["head"][i] + rel["head"] = (entities["start"][rel["head_id"]], entities["end"][rel["head_id"]]) + rel["head_type"] = entities["label"][rel["head_id"]] + + rel["tail_id"] = relations["tail"][i] + rel["tail"] = (entities["start"][rel["tail_id"]], entities["end"][rel["tail_id"]]) + rel["tail_type"] = entities["label"][rel["tail_id"]] + rel["type"] = 1 + pred_relations.append(rel) + return pred_relations + + def forward(self, hidden_states, entities, relations): + batch_size, max_n_words, context_dim = hidden_states.size() + device = hidden_states.device + relations, entities = self.build_relation(relations, entities) + loss = 0 + all_pred_relations = [] + for b in range(batch_size): + head_entities = torch.tensor(relations[b]["head"], device=device) + tail_entities = torch.tensor(relations[b]["tail"], device=device) + relation_labels = torch.tensor(relations[b]["label"], device=device) + entities_start_index = torch.tensor(entities[b]["start"], device=device) + entities_labels = torch.tensor(entities[b]["label"], device=device) + head_index = entities_start_index[head_entities] + head_label = entities_labels[head_entities] + head_label_repr = self.entity_emb(head_label) + + tail_index = entities_start_index[tail_entities] + tail_label = entities_labels[tail_entities] + tail_label_repr = self.entity_emb(tail_label) + + head_repr = torch.cat( + (hidden_states[b][head_index], head_label_repr), + dim=-1, + ) + tail_repr = torch.cat( + (hidden_states[b][tail_index], tail_label_repr), + dim=-1, + ) + heads = self.ffnn_head(head_repr) + tails = self.ffnn_tail(tail_repr) + logits = self.rel_classifier(heads, tails) + loss += self.loss_fct(logits, relation_labels) + pred_relations = self.get_predicted_relations(logits, relations[b], entities[b]) + all_pred_relations.append(pred_relations) + return loss, all_pred_relations + + +@add_start_docstrings( + """ + LayoutLMv2 Model with a relation extraction head on top for key-value extraction tasks such as + [XFUND](https://github.com/doc-analysis/XFUND) (a bi-affine attention layer on top). + """, + LAYOUTLMV2_START_DOCSTRING, +) +class LayoutLMv2ForRelationExtraction(LayoutLMv2PreTrainedModel): + def __init__(self, config): + super().__init__(config) + + self.layoutlmv2 = LayoutLMv2Model(config) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + self.extractor = LayoutLMv2RelationExtractionDecoder(config) + + # Initialize weights and apply final processing + self.post_init() + + @add_start_docstrings_to_model_forward(LAYOUTLMV2_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @replace_return_docstrings(output_type=RelationExtractionOutput, config_class=_CONFIG_FOR_DOC) + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + bbox: Optional[torch.LongTensor] = None, + image: Optional[torch.FloatTensor] = None, + attention_mask: Optional[torch.FloatTensor] = None, + token_type_ids: Optional[torch.LongTensor] = None, + position_ids: Optional[torch.LongTensor] = None, + head_mask: Optional[torch.FloatTensor] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + start_positions: Optional[torch.LongTensor] = None, + end_positions: Optional[torch.LongTensor] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + entities=None, + relations=None, + ): + r""" + entities (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + ... + relations (...): + ... + + Returns: + + Example: + + ```python + >>> from transformers import LayoutLMv2Processor, LayoutLMv2ForRelationExtraction + >>> from PIL import Image + >>> from datasets import load_dataset + ``` + """ + outputs = self.layoutlmv2( + input_ids=input_ids, + bbox=bbox, + image=image, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + ) + + seq_length = input_ids.size(1) + sequence_output = outputs[0][:, :seq_length] + sequence_output = self.dropout(sequence_output) + loss, pred_relations = self.extractor(sequence_output, entities, relations) + + return RelationExtractionOutput( + loss=loss, + entities=entities, + relations=relations, + pred_relations=pred_relations, + hidden_states=outputs[0], + ) diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index b656cee9c89bdc..dccceb462ff8b7 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -2628,6 +2628,13 @@ def __init__(self, *args, **kwargs): requires_backends(self, ["torch"]) +class LayoutLMv2ForRelationExtraction(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class LayoutLMv2ForSequenceClassification(metaclass=DummyObject): _backends = ["torch"] diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py index 3c38373163e496..178feacda68340 100644 --- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py @@ -34,6 +34,7 @@ MODEL_MAPPING, LayoutLMv2Config, LayoutLMv2ForQuestionAnswering, + LayoutLMv2ForRelationExtraction, LayoutLMv2ForSequenceClassification, LayoutLMv2ForTokenClassification, LayoutLMv2Model, @@ -269,6 +270,7 @@ class LayoutLMv2ModelTest(ModelTesterMixin, unittest.TestCase): LayoutLMv2ForSequenceClassification, LayoutLMv2ForTokenClassification, LayoutLMv2ForQuestionAnswering, + LayoutLMv2ForRelationExtraction, ) if is_torch_available() else () From bfa9fbeb3ce18bd6dacaca11230c8cfd35131a06 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Sun, 18 Sep 2022 19:33:33 +0200 Subject: [PATCH 2/9] Fix bug --- src/transformers/models/layoutlmv2/modeling_layoutlmv2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index bcfbb3f98ce4f9..276835b6fd56da 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -14,8 +14,8 @@ # limitations under the License. """ PyTorch LayoutLMv2 model.""" +import copy import math -from copy import copy from dataclasses import dataclass from typing import Optional, Tuple, Union From 36c41a1017eb6ed321e480d944095ae01a68e1a3 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Sun, 18 Sep 2022 21:36:56 +0200 Subject: [PATCH 3/9] Fix output --- .../models/layoutlmv2/modeling_layoutlmv2.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index 276835b6fd56da..1034813d36dff8 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -72,8 +72,12 @@ class RelationExtractionOutput(ModelOutput): Args: loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided): Classification (or regression if config.num_labels==1) loss. - logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`): - Classification (or regression if config.num_labels==1) scores (before SoftMax). + entities (...) + ... + relations (...) + ... + pred_relations (...) + ... hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, + one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of @@ -85,7 +89,9 @@ class RelationExtractionOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + entities: dict = None + relations: dict = None + pred_relations: dict = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -1669,5 +1675,6 @@ def forward( entities=entities, relations=relations, pred_relations=pred_relations, - hidden_states=outputs[0], + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, ) From 0c05911e4c0b65903b498012edc7f6d00182fabe Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Mon, 19 Sep 2022 13:35:42 +0200 Subject: [PATCH 4/9] Add return_dict option --- .../models/layoutlmv2/modeling_layoutlmv2.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index 1034813d36dff8..b0e6dc5fa046bb 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -1632,12 +1632,11 @@ def forward( position_ids: Optional[torch.LongTensor] = None, head_mask: Optional[torch.FloatTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, - start_positions: Optional[torch.LongTensor] = None, - end_positions: Optional[torch.LongTensor] = None, - output_attentions: Optional[bool] = None, - output_hidden_states: Optional[bool] = None, entities=None, relations=None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, ): r""" entities (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): @@ -1655,6 +1654,9 @@ def forward( >>> from datasets import load_dataset ``` """ + + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + outputs = self.layoutlmv2( input_ids=input_ids, bbox=bbox, @@ -1663,12 +1665,20 @@ def forward( token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, ) seq_length = input_ids.size(1) - sequence_output = outputs[0][:, :seq_length] - sequence_output = self.dropout(sequence_output) - loss, pred_relations = self.extractor(sequence_output, entities, relations) + text_output = outputs[0][:, :seq_length] + text_output = self.dropout(text_output) + loss, pred_relations = self.extractor(text_output, entities, relations) + + if not return_dict: + output = (pred_relations,) + outputs[2:] + return ((loss,) + output) if loss is not None else output return RelationExtractionOutput( loss=loss, From 219a4f72a667cdb25bb342f0b4e96cf537155ca0 Mon Sep 17 00:00:00 2001 From: NielsRogge Date: Mon, 19 Sep 2022 20:01:21 +0000 Subject: [PATCH 5/9] Add test --- tests/models/layoutlmv2/test.py | 38 +++++++ .../layoutlmv2/test_modeling_layoutlmv2.py | 100 +++++++++++++++++- 2 files changed, 133 insertions(+), 5 deletions(-) create mode 100644 tests/models/layoutlmv2/test.py diff --git a/tests/models/layoutlmv2/test.py b/tests/models/layoutlmv2/test.py new file mode 100644 index 00000000000000..cb5ba15507e25a --- /dev/null +++ b/tests/models/layoutlmv2/test.py @@ -0,0 +1,38 @@ +# for example in range(self.batch_size): + # # sample a number of entities for the example + # num_entities = random.randint(1, self.max_entities) + # entity_starts = [] + # entity_ends = [] + # entity_labels = [] + # for entity in range(num_entities): + # entity_start = random.randint(0, self.seq_length) + # entity_end = entity_start + random.randint(1, self.max_entity_length) + # entity_label = random.randint(0, self.num_labels) + # entity_starts.append(entity_start) + # entity_ends.append(entity_end) + # entity_labels.append(entity_label) + # entity_dict = { + # "start": entity_starts, + # "end": entity_ends, + # "label": entity_labels, + # } + # entities.append(entity_dict) + + # # sample a number of relations for the example + # num_relations = random.randint(1, self.max_relations) + # start_indices = [] + # end_indices = [] + # heads = [] + # tails = [] + # for relation in range(num_relations): + # start_index = random.randint(0, self.seq_length) + # end_index = start_index + random.randint(1, self.max_entity_length) + # head = random.randint(0, self.max_entities) + # tail = random.randint(0, self.max_entities) + # relation_dict = { + # "start_index": start_indices, + # "end_index": end_indices, + # "head": heads, + # "tail": tails, + # } + # relations.append(relation_dict) \ No newline at end of file diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py index 178feacda68340..d3aa6bc7d62c75 100644 --- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py @@ -137,9 +137,14 @@ def prepare_config_and_inputs(self): sequence_labels = None token_labels = None + entities = None + relations = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + # we choose some random entities and relations + entities = [{"start": [0, 4], "end": [3, 6], "label": [2,1]} for _ in range(self.batch_size)] + relations = [{"start_index": [0], "end_index": [5], "head": [0], "tail": [1]} for _ in range(self.batch_size)] config = LayoutLMv2Config( vocab_size=self.vocab_size, @@ -164,10 +169,31 @@ def prepare_config_and_inputs(self): config.detectron2_config_args["MODEL.RESNETS.RES2_OUT_CHANNELS"] = 64 config.detectron2_config_args["MODEL.RESNETS.NUM_GROUPS"] = 1 - return config, input_ids, bbox, image, token_type_ids, input_mask, sequence_labels, token_labels + return ( + config, + input_ids, + bbox, + image, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + entities, + relations, + ) def create_and_check_model( - self, config, input_ids, bbox, image, token_type_ids, input_mask, sequence_labels, token_labels + self, + config, + input_ids, + bbox, + image, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + entities, + relations, ): model = LayoutLMv2Model(config=config) model.to(torch_device) @@ -183,7 +209,17 @@ def create_and_check_model( self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size)) def create_and_check_for_sequence_classification( - self, config, input_ids, bbox, image, token_type_ids, input_mask, sequence_labels, token_labels + self, + config, + input_ids, + bbox, + image, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + entities, + relations, ): config.num_labels = self.num_labels model = LayoutLMv2ForSequenceClassification(config) @@ -200,7 +236,17 @@ def create_and_check_for_sequence_classification( self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) def create_and_check_for_token_classification( - self, config, input_ids, bbox, image, token_type_ids, input_mask, sequence_labels, token_labels + self, + config, + input_ids, + bbox, + image, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + entities, + relations, ): config.num_labels = self.num_labels model = LayoutLMv2ForTokenClassification(config=config) @@ -217,7 +263,17 @@ def create_and_check_for_token_classification( self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) def create_and_check_for_question_answering( - self, config, input_ids, bbox, image, token_type_ids, input_mask, sequence_labels, token_labels + self, + config, + input_ids, + bbox, + image, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + entities, + relations, ): model = LayoutLMv2ForQuestionAnswering(config=config) model.to(torch_device) @@ -234,6 +290,36 @@ def create_and_check_for_question_answering( self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) + def create_and_check_for_relation_extraction( + self, + config, + input_ids, + bbox, + image, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + entities, + relations, + ): + model = LayoutLMv2ForRelationExtraction(config=config) + torch_device = "cpu" + model.to(torch_device) + model.eval() + result = model( + input_ids.to("cpu"), + bbox=bbox.to("cpu"), + image=image.to("cpu"), + attention_mask=input_mask.to("cpu"), + token_type_ids=token_type_ids.to("cpu"), + entities=entities, + relations=relations, + ) + print(result.pred_relations) + # self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) + # self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) + def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() ( @@ -315,6 +401,10 @@ def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_question_answering(*config_and_inputs) + def test_for_relation_extraction(self): + config_and_inputs = self.model_tester.prepare_config_and_inputs() + self.model_tester.create_and_check_for_relation_extraction(*config_and_inputs) + def test_save_load_fast_init_from_base(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() base_class = MODEL_MAPPING[config.__class__] From 4c5c3dc5df1b9d9f66ebb4c16ac493b2a02725b4 Mon Sep 17 00:00:00 2001 From: NielsRogge Date: Tue, 20 Sep 2022 07:40:53 +0000 Subject: [PATCH 6/9] Make most tests pass --- .../models/layoutlmv2/modeling_layoutlmv2.py | 30 ++-- .../layoutlmv2/test_modeling_layoutlmv2.py | 149 ++++++++++++++++-- tests/test_modeling_common.py | 7 - 3 files changed, 151 insertions(+), 35 deletions(-) diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index b0e6dc5fa046bb..b24a78e19ddf74 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -89,9 +89,9 @@ class RelationExtractionOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None + pred_relations: dict = None entities: dict = None relations: dict = None - pred_relations: dict = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -546,7 +546,7 @@ class LayoutLMv2PreTrainedModel(PreTrainedModel): def _init_weights(self, module): """Initialize the weights""" - if isinstance(module, nn.Linear): + if isinstance(module, (nn.Linear, nn.Bilinear)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) @@ -1491,15 +1491,9 @@ def __init__(self, in_features, out_features): self.bilinear = torch.nn.Bilinear(in_features, in_features, out_features, bias=False) self.linear = torch.nn.Linear(2 * in_features, out_features, bias=True) - self.reset_parameters() - def forward(self, x_1, x_2): return self.bilinear(x_1, x_2) + self.linear(torch.cat((x_1, x_2), dim=-1)) - def reset_parameters(self): - self.bilinear.reset_parameters() - self.linear.reset_parameters() - class LayoutLMv2RelationExtractionDecoder(nn.Module): def __init__(self, config): @@ -1632,14 +1626,14 @@ def forward( position_ids: Optional[torch.LongTensor] = None, head_mask: Optional[torch.FloatTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, - entities=None, - relations=None, + entities: Optional[dict] = None, + relations: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ): r""" - entities (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + entities (...): ... relations (...): ... @@ -1671,20 +1665,24 @@ def forward( return_dict=return_dict, ) - seq_length = input_ids.size(1) + seq_length = input_ids.size(1) if input_ids is not None else inputs_embeds.size(1) text_output = outputs[0][:, :seq_length] text_output = self.dropout(text_output) - loss, pred_relations = self.extractor(text_output, entities, relations) + + loss = None + pred_relations = None + if entities is not None and relations is not None: + loss, pred_relations = self.extractor(text_output, entities, relations) if not return_dict: - output = (pred_relations,) + outputs[2:] - return ((loss,) + output) if loss is not None else output + output = (entities, relations) + outputs[2:] + return ((loss, pred_relations) + output) if loss is not None else output return RelationExtractionOutput( loss=loss, + pred_relations=pred_relations, entities=entities, relations=relations, - pred_relations=pred_relations, hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py index d3aa6bc7d62c75..ebb06f67319a5a 100644 --- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py @@ -15,11 +15,13 @@ """ Testing suite for the PyTorch LayoutLMv2 model. """ +import copy import os import random import tempfile import unittest +from transformers.models.auto import get_values from transformers.testing_utils import require_detectron2, require_torch, require_torch_multi_gpu, slow, torch_device from transformers.utils import is_detectron2_available, is_torch_available @@ -31,6 +33,10 @@ import torch from transformers import ( + MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING, + MODEL_FOR_QUESTION_ANSWERING_MAPPING, + MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING, + MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, MODEL_MAPPING, LayoutLMv2Config, LayoutLMv2ForQuestionAnswering, @@ -143,8 +149,10 @@ def prepare_config_and_inputs(self): sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) # we choose some random entities and relations - entities = [{"start": [0, 4], "end": [3, 6], "label": [2,1]} for _ in range(self.batch_size)] - relations = [{"start_index": [0], "end_index": [5], "head": [0], "tail": [1]} for _ in range(self.batch_size)] + entities = [{"start": [0, 4], "end": [3, 6], "label": [2, 1]} for _ in range(self.batch_size)] + relations = [ + {"start_index": [0], "end_index": [5], "head": [0], "tail": [1]} for _ in range(self.batch_size) + ] config = LayoutLMv2Config( vocab_size=self.vocab_size, @@ -304,21 +312,18 @@ def create_and_check_for_relation_extraction( relations, ): model = LayoutLMv2ForRelationExtraction(config=config) - torch_device = "cpu" model.to(torch_device) model.eval() result = model( - input_ids.to("cpu"), - bbox=bbox.to("cpu"), - image=image.to("cpu"), - attention_mask=input_mask.to("cpu"), - token_type_ids=token_type_ids.to("cpu"), + input_ids, + bbox=bbox, + image=image, + attention_mask=input_mask, + token_type_ids=token_type_ids, entities=entities, relations=relations, ) - print(result.pred_relations) - # self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) - # self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) + self.parent.assertTrue(result.pred_relations) def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() @@ -331,6 +336,8 @@ def prepare_config_and_inputs_for_common(self): input_mask, sequence_labels, token_labels, + entities, + relations, ) = config_and_inputs inputs_dict = { "input_ids": input_ids, @@ -362,6 +369,42 @@ class LayoutLMv2ModelTest(ModelTesterMixin, unittest.TestCase): else () ) + def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): + inputs_dict = copy.deepcopy(inputs_dict) + + if model_class.__name__ == "LayoutLMv2ForRelationExtraction": + # we choose some random entities and relations + entities = [{"start": [0, 4], "end": [3, 6], "label": [2, 1]} for _ in range(self.model_tester.batch_size)] + relations = [ + {"start_index": [0], "end_index": [5], "head": [0], "tail": [1]} + for _ in range(self.model_tester.batch_size) + ] + inputs_dict["entities"] = entities + inputs_dict["relations"] = relations + + if return_labels: + if model_class in [ + *get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING), + *get_values(MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING), + ]: + + inputs_dict["start_positions"] = torch.zeros( + self.model_tester.batch_size, dtype=torch.long, device=torch_device + ) + inputs_dict["end_positions"] = torch.zeros( + self.model_tester.batch_size, dtype=torch.long, device=torch_device + ) + elif model_class in get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING): + inputs_dict["labels"] = torch.zeros( + self.model_tester.batch_size, dtype=torch.long, device=torch_device + ) + elif model_class in get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING): + inputs_dict["labels"] = torch.zeros( + (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device + ) + + return inputs_dict + def setUp(self): self.model_tester = LayoutLMv2ModelTester(self) self.config_tester = ConfigTester(self, config_class=LayoutLMv2Config, hidden_size=37) @@ -404,7 +447,7 @@ def test_for_question_answering(self): def test_for_relation_extraction(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_relation_extraction(*config_and_inputs) - + def test_save_load_fast_init_from_base(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() base_class = MODEL_MAPPING[config.__class__] @@ -577,6 +620,88 @@ def test_initialization(self): msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) + def _create_and_check_torchscript(self, config, inputs_dict): + if not self.test_torchscript: + return + + configs_no_init = _config_zero_init(config) # To be sure we have no Nan + configs_no_init.torchscript = True + for model_class in self.all_model_classes: + if model_class.__name__ == "LayoutLMv2ForRelationExtraction": + continue + + model = model_class(config=configs_no_init) + model.to(torch_device) + model.eval() + inputs = self._prepare_for_class(inputs_dict, model_class) + + try: + input_ids = inputs["input_ids"] + bbox = inputs["bbox"] + image = inputs["image"].tensor + traced_model = torch.jit.trace( + model, (input_ids, bbox, image), check_trace=False + ) # when traced model is checked, an error is produced due to name mangling + except RuntimeError: + self.fail("Couldn't trace module.") + + with tempfile.TemporaryDirectory() as tmp_dir_name: + pt_file_name = os.path.join(tmp_dir_name, "traced_model.pt") + + try: + torch.jit.save(traced_model, pt_file_name) + except Exception: + self.fail("Couldn't save module.") + + try: + loaded_model = torch.jit.load(pt_file_name) + except Exception: + self.fail("Couldn't load module.") + + model.to(torch_device) + model.eval() + + loaded_model.to(torch_device) + loaded_model.eval() + + model_state_dict = model.state_dict() + loaded_model_state_dict = loaded_model.state_dict() + + non_persistent_buffers = {} + for key in loaded_model_state_dict.keys(): + if key not in model_state_dict.keys(): + non_persistent_buffers[key] = loaded_model_state_dict[key] + + loaded_model_state_dict = { + key: value for key, value in loaded_model_state_dict.items() if key not in non_persistent_buffers + } + + self.assertEqual(set(model_state_dict.keys()), set(loaded_model_state_dict.keys())) + + model_buffers = list(model.buffers()) + for non_persistent_buffer in non_persistent_buffers.values(): + found_buffer = False + for i, model_buffer in enumerate(model_buffers): + if torch.equal(non_persistent_buffer, model_buffer): + found_buffer = True + break + + self.assertTrue(found_buffer) + model_buffers.pop(i) + + models_equal = True + for layer_name, p1 in model_state_dict.items(): + if layer_name in loaded_model_state_dict: + p2 = loaded_model_state_dict[layer_name] + if p1.data.ne(p2.data).sum() > 0: + models_equal = False + + self.assertTrue(models_equal) + + # Avoid memory leak. Without this, each call increase RAM usage by ~20MB. + # (Even with this call, there are still memory leak by ~0.04MB) + self.clear_torch_jit_class_registry() + def prepare_layoutlmv2_batch_inputs(): # Here we prepare a batch of 2 sequences to test a LayoutLMv2 forward pass on: diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 082f2a8a9057f9..78308ced25dce9 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -658,13 +658,6 @@ def _create_and_check_torchscript(self, config, inputs_dict): traced_model = torch.jit.trace( model, (main_input, attention_mask, decoder_input_ids, decoder_attention_mask) ) - elif "bbox" in inputs and "image" in inputs: # LayoutLMv2 requires additional inputs - input_ids = inputs["input_ids"] - bbox = inputs["bbox"] - image = inputs["image"].tensor - traced_model = torch.jit.trace( - model, (input_ids, bbox, image), check_trace=False - ) # when traced model is checked, an error is produced due to name mangling else: main_input = inputs[main_input_name] traced_model = torch.jit.trace(model, main_input) From cf7e2a878a5ae448b2d4ffa6566fcde7767e9415 Mon Sep 17 00:00:00 2001 From: NielsRogge Date: Tue, 20 Sep 2022 08:09:14 +0000 Subject: [PATCH 7/9] Make more tests pass --- .../layoutlmv2/test_modeling_layoutlmv2.py | 234 ++++++++++-------- 1 file changed, 133 insertions(+), 101 deletions(-) diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py index ebb06f67319a5a..82bb61bd7276dc 100644 --- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py @@ -58,7 +58,7 @@ def __init__( batch_size=2, num_channels=3, image_size=4, - seq_length=7, + text_seq_length=7, is_training=True, use_input_mask=True, use_token_type_ids=True, @@ -87,7 +87,7 @@ def __init__( self.batch_size = batch_size self.num_channels = num_channels self.image_size = image_size - self.seq_length = seq_length + self.text_seq_length = text_seq_length self.is_training = is_training self.use_input_mask = use_input_mask self.use_token_type_ids = use_token_type_ids @@ -112,10 +112,13 @@ def __init__( self.scope = scope self.range_bbox = range_bbox + # in LayoutLMv2, the seq length equals the number of text tokens + number of image tokens + self.seq_length = self.text_seq_length + self.image_feature_pool_shape[0] * self.image_feature_pool_shape[1] + def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + input_ids = ids_tensor([self.batch_size, self.text_seq_length], self.vocab_size) - bbox = ids_tensor([self.batch_size, self.seq_length, 4], self.range_bbox) + bbox = ids_tensor([self.batch_size, self.text_seq_length, 4], self.range_bbox) # Ensure that bbox is legal for i in range(bbox.shape[0]): for j in range(bbox.shape[1]): @@ -135,11 +138,11 @@ def prepare_config_and_inputs(self): input_mask = None if self.use_input_mask: - input_mask = random_attention_mask([self.batch_size, self.seq_length]) + input_mask = random_attention_mask([self.batch_size, self.text_seq_length]) token_type_ids = None if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + token_type_ids = ids_tensor([self.batch_size, self.text_seq_length], self.type_vocab_size) sequence_labels = None token_labels = None @@ -147,7 +150,7 @@ def prepare_config_and_inputs(self): relations = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + token_labels = ids_tensor([self.batch_size, self.text_seq_length], self.num_labels) # we choose some random entities and relations entities = [{"start": [0, 4], "end": [3, 6], "label": [2, 1]} for _ in range(self.batch_size)] relations = [ @@ -211,9 +214,7 @@ def create_and_check_model( result = model(input_ids, bbox=bbox, image=image, token_type_ids=token_type_ids) result = model(input_ids, bbox=bbox, image=image) - # LayoutLMv2 has a different expected sequence length, namely also visual tokens are added - expected_seq_len = self.seq_length + self.image_feature_pool_shape[0] * self.image_feature_pool_shape[1] - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, expected_seq_len, self.hidden_size)) + self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size)) def create_and_check_for_sequence_classification( @@ -268,7 +269,7 @@ def create_and_check_for_token_classification( token_type_ids=token_type_ids, labels=token_labels, ) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.text_seq_length, self.num_labels)) def create_and_check_for_question_answering( self, @@ -295,8 +296,8 @@ def create_and_check_for_question_answering( start_positions=sequence_labels, end_positions=sequence_labels, ) - self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) - self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) + self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.text_seq_length)) + self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.text_seq_length)) def create_and_check_for_relation_extraction( self, @@ -400,7 +401,7 @@ def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): ) elif model_class in get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING): inputs_dict["labels"] = torch.zeros( - (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device + (self.model_tester.batch_size, self.model_tester.text_seq_length), dtype=torch.long, device=torch_device ) return inputs_dict @@ -496,113 +497,79 @@ class CopyClass(model_class): max_diff = (model_slow_init.state_dict()[key] - model_fast_init.state_dict()[key]).sum().item() self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") - def test_attention_outputs(self): + def test_save_load_fast_init_to_base(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True + base_class = MODEL_MAPPING[config.__class__] - # LayoutLMv2 has a different expected sequence length - expected_seq_len = ( - self.model_tester.seq_length - + self.model_tester.image_feature_pool_shape[0] * self.model_tester.image_feature_pool_shape[1] - ) + if isinstance(base_class, tuple): + base_class = base_class[0] for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - config.return_dict = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, expected_seq_len, expected_seq_len], - ) - out_len = len(outputs) - - # Check attention is always last and order is fine - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - if hasattr(self.model_tester, "num_hidden_states_types"): - added_hidden_states = self.model_tester.num_hidden_states_types - else: - added_hidden_states = 1 - self.assertEqual(out_len + added_hidden_states, len(outputs)) - - self_attentions = outputs.attentions - self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(self_attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, expected_seq_len, expected_seq_len], - ) - - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - model.to(torch_device) - model.eval() - - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) + if model_class == base_class: + continue - hidden_states = outputs.hidden_states + # make a copy of model class to not break future tests + # from https://stackoverflow.com/questions/9541025/how-to-copy-a-python-class + class CopyClass(base_class): + pass - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - self.assertEqual(len(hidden_states), expected_num_layers) + base_class_copy = CopyClass - # LayoutLMv2 has a different expected sequence length - expected_seq_len = ( - self.model_tester.seq_length - + self.model_tester.image_feature_pool_shape[0] * self.model_tester.image_feature_pool_shape[1] - ) + # make sure that all keys are expected for test + base_class_copy._keys_to_ignore_on_load_missing = [] - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [expected_seq_len, self.model_tester.hidden_size], - ) + # make init deterministic, but make sure that + # non-initialized weights throw errors nevertheless + base_class_copy._init_weights = self._mock_init_weights - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + model = model_class(config) + state_dict = model.state_dict() - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) + # this will often delete a single weight of a multi-weight module + # to test an edge case + random_key_to_del = random.choice(list(state_dict.keys())) + del state_dict[random_key_to_del] - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True + # check that certain keys didn't get saved with the model + with tempfile.TemporaryDirectory() as tmpdirname: + model.config.save_pretrained(tmpdirname) + torch.save(state_dict, os.path.join(tmpdirname, "pytorch_model.bin")) - check_hidden_states_output(inputs_dict, config, model_class) + model_fast_init = base_class_copy.from_pretrained(tmpdirname) + model_slow_init = base_class_copy.from_pretrained(tmpdirname, _fast_init=False) + for key in model_fast_init.state_dict().keys(): + if key == "layoutlmv2.visual_segment_embedding": + # we skip the visual segment embedding as it has a custom initialization scheme + continue + max_diff = (model_slow_init.state_dict()[key] - model_fast_init.state_dict()[key]).sum().item() + self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") + @slow def test_model_from_pretrained(self): for model_name in LAYOUTLMV2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = LayoutLMv2Model.from_pretrained(model_name) self.assertIsNotNone(model) + def test_training(self): + if not self.model_tester.is_training: + return + + for model_class in self.all_model_classes: + print("Model class:", model_class) + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.return_dict = True + + if model_class in get_values(MODEL_MAPPING): + continue + + model = model_class(config) + model.to(torch_device) + model.train() + inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + loss = model(**inputs).loss + def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -620,6 +587,71 @@ def test_initialization(self): msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) + # we overwrite this as LayoutLMv2ForRelationExtraction is not supported + def test_headmasking(self): + if not self.test_head_masking: + return + + global_rng = random.Random() + + global_rng.seed(42) + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + global_rng.seed() + + inputs_dict["output_attentions"] = True + config.output_hidden_states = True + configs_no_init = _config_zero_init(config) # To be sure we have no Nan + for model_class in self.all_model_classes: + if model_class.__name__ == "LayoutLMv2ForRelationExtraction": + continue + + model = model_class(config=configs_no_init) + model.to(torch_device) + model.eval() + + # Prepare head_mask + # Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior) + head_mask = torch.ones( + self.model_tester.num_hidden_layers, + self.model_tester.num_attention_heads, + device=torch_device, + ) + head_mask[0, 0] = 0 + head_mask[-1, :-1] = 0 + head_mask.requires_grad_(requires_grad=True) + inputs = self._prepare_for_class(inputs_dict, model_class).copy() + inputs["head_mask"] = head_mask + outputs = model(**inputs, return_dict=True) + + # Test that we can get a gradient back for importance score computation + output = sum(t.sum() for t in outputs[0]) + output = output.sum() + output.backward() + multihead_outputs = head_mask.grad + + self.assertIsNotNone(multihead_outputs) + self.assertEqual(len(multihead_outputs), self.model_tester.num_hidden_layers) + + def check_attentions_validity(attentions): + # Remove Nan + for t in attentions: + self.assertLess( + torch.sum(torch.isnan(t)), t.numel() / 4 + ) # Check we don't have more than 25% nans (arbitrary) + attentions = [ + t.masked_fill(torch.isnan(t), 0.0) for t in attentions + ] # remove them (the test is less complete) + + self.assertAlmostEqual(attentions[0][..., 0, :, :].flatten().sum().item(), 0.0) + self.assertNotEqual(attentions[0][..., -1, :, :].flatten().sum().item(), 0.0) + if len(attentions) > 2: # encoder-decoder models have only 2 layers in each module + self.assertNotEqual(attentions[1][..., 0, :, :].flatten().sum().item(), 0.0) + self.assertAlmostEqual(attentions[-1][..., -2, :, :].flatten().sum().item(), 0.0) + self.assertNotEqual(attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0) + + check_attentions_validity(outputs.attentions) + + # we overwrite this as LayoutLMv2 requires special inputs + LayoutLMv2ForRelationExtraction is not supported def _create_and_check_torchscript(self, config, inputs_dict): if not self.test_torchscript: return From 3a904ea2dcfe0fae258aa97c19c6046419ccbeb2 Mon Sep 17 00:00:00 2001 From: NielsRogge Date: Tue, 20 Sep 2022 08:16:12 +0000 Subject: [PATCH 8/9] Improve docstrign --- .../models/layoutlmv2/modeling_layoutlmv2.py | 24 ++++--- tests/models/layoutlmv2/test.py | 72 +++++++++---------- .../layoutlmv2/test_modeling_layoutlmv2.py | 26 ++----- 3 files changed, 54 insertions(+), 68 deletions(-) diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index b24a78e19ddf74..9fc957b808989b 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -1463,12 +1463,17 @@ def forward( class BiaffineAttention(torch.nn.Module): """Implements a biaffine attention operator for binary relation classification. - Args: + PyTorch implementation of the biaffine attention operator from "End-to-end neural relation extraction using deep biaffine attention" (https://arxiv.org/abs/1812.11275) which can be used as a classifier for binary relation classification. - in_features (int): The size of the feature dimension of the inputs. out_features (int): The size of the feature - dimension of the output. + + Args: + in_features (int): + The size of the feature dimension of the inputs. + out_features (int): + The size of the feature dimension of the output. + Shape: - x_1: `(N, *, in_features)` where `N` is the batch dimension and `*` means any number of additional dimensisons. @@ -1476,10 +1481,6 @@ class BiaffineAttention(torch.nn.Module): dimensions. - Output: `(N, *, out_features)`, where `N` is the batch dimension and `*` means any number of additional dimensions. - Examples: - >>> batch_size, in_features, out_features = 32, 100, 4 >>> biaffine_attention = BiaffineAttention(in_features, - out_features) >>> x_1 = torch.randn(batch_size, in_features) >>> x_2 = torch.randn(batch_size, in_features) >>> - output = biaffine_attention(x_1, x_2) >>> print(output.size()) torch.Size([32, 4]) """ def __init__(self, in_features, out_features): @@ -1669,10 +1670,11 @@ def forward( text_output = outputs[0][:, :seq_length] text_output = self.dropout(text_output) - loss = None - pred_relations = None - if entities is not None and relations is not None: - loss, pred_relations = self.extractor(text_output, entities, relations) + if entities is None or relations is None: + raise ValueError( + "You need to provide entities and relations. Instantiate relations with empty lists at inference time" + ) + loss, pred_relations = self.extractor(text_output, entities, relations) if not return_dict: output = (entities, relations) + outputs[2:] diff --git a/tests/models/layoutlmv2/test.py b/tests/models/layoutlmv2/test.py index cb5ba15507e25a..70ca480b9a633b 100644 --- a/tests/models/layoutlmv2/test.py +++ b/tests/models/layoutlmv2/test.py @@ -1,38 +1,38 @@ # for example in range(self.batch_size): - # # sample a number of entities for the example - # num_entities = random.randint(1, self.max_entities) - # entity_starts = [] - # entity_ends = [] - # entity_labels = [] - # for entity in range(num_entities): - # entity_start = random.randint(0, self.seq_length) - # entity_end = entity_start + random.randint(1, self.max_entity_length) - # entity_label = random.randint(0, self.num_labels) - # entity_starts.append(entity_start) - # entity_ends.append(entity_end) - # entity_labels.append(entity_label) - # entity_dict = { - # "start": entity_starts, - # "end": entity_ends, - # "label": entity_labels, - # } - # entities.append(entity_dict) +# # sample a number of entities for the example +# num_entities = random.randint(1, self.max_entities) +# entity_starts = [] +# entity_ends = [] +# entity_labels = [] +# for entity in range(num_entities): +# entity_start = random.randint(0, self.seq_length) +# entity_end = entity_start + random.randint(1, self.max_entity_length) +# entity_label = random.randint(0, self.num_labels) +# entity_starts.append(entity_start) +# entity_ends.append(entity_end) +# entity_labels.append(entity_label) +# entity_dict = { +# "start": entity_starts, +# "end": entity_ends, +# "label": entity_labels, +# } +# entities.append(entity_dict) - # # sample a number of relations for the example - # num_relations = random.randint(1, self.max_relations) - # start_indices = [] - # end_indices = [] - # heads = [] - # tails = [] - # for relation in range(num_relations): - # start_index = random.randint(0, self.seq_length) - # end_index = start_index + random.randint(1, self.max_entity_length) - # head = random.randint(0, self.max_entities) - # tail = random.randint(0, self.max_entities) - # relation_dict = { - # "start_index": start_indices, - # "end_index": end_indices, - # "head": heads, - # "tail": tails, - # } - # relations.append(relation_dict) \ No newline at end of file +# # sample a number of relations for the example +# num_relations = random.randint(1, self.max_relations) +# start_indices = [] +# end_indices = [] +# heads = [] +# tails = [] +# for relation in range(num_relations): +# start_index = random.randint(0, self.seq_length) +# end_index = start_index + random.randint(1, self.max_entity_length) +# head = random.randint(0, self.max_entities) +# tail = random.randint(0, self.max_entities) +# relation_dict = { +# "start_index": start_indices, +# "end_index": end_indices, +# "head": heads, +# "tail": tails, +# } +# relations.append(relation_dict) diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py index 82bb61bd7276dc..294d4f7ce971dc 100644 --- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py @@ -401,7 +401,9 @@ def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): ) elif model_class in get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING): inputs_dict["labels"] = torch.zeros( - (self.model_tester.batch_size, self.model_tester.text_seq_length), dtype=torch.long, device=torch_device + (self.model_tester.batch_size, self.model_tester.text_seq_length), + dtype=torch.long, + device=torch_device, ) return inputs_dict @@ -545,31 +547,13 @@ class CopyClass(base_class): continue max_diff = (model_slow_init.state_dict()[key] - model_fast_init.state_dict()[key]).sum().item() self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") - + @slow def test_model_from_pretrained(self): for model_name in LAYOUTLMV2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = LayoutLMv2Model.from_pretrained(model_name) self.assertIsNotNone(model) - def test_training(self): - if not self.model_tester.is_training: - return - - for model_class in self.all_model_classes: - print("Model class:", model_class) - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - if model_class in get_values(MODEL_MAPPING): - continue - - model = model_class(config) - model.to(torch_device) - model.train() - inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) - loss = model(**inputs).loss - def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -650,7 +634,7 @@ def check_attentions_validity(attentions): self.assertNotEqual(attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0) check_attentions_validity(outputs.attentions) - + # we overwrite this as LayoutLMv2 requires special inputs + LayoutLMv2ForRelationExtraction is not supported def _create_and_check_torchscript(self, config, inputs_dict): if not self.test_torchscript: From 1e844fa0c1d274a08e94f73739931284ddccacf3 Mon Sep 17 00:00:00 2001 From: NielsRogge Date: Tue, 20 Sep 2022 09:11:03 +0000 Subject: [PATCH 9/9] Make all tests pass --- .../models/layoutlmv2/modeling_layoutlmv2.py | 46 +++++++---- tests/models/layoutlmv2/test.py | 38 --------- .../layoutlmv2/test_modeling_layoutlmv2.py | 78 +++++++++++++++++++ 3 files changed, 111 insertions(+), 51 deletions(-) delete mode 100644 tests/models/layoutlmv2/test.py diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index 9fc957b808989b..32766d4724940b 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -70,14 +70,17 @@ class RelationExtractionOutput(ModelOutput): Class for outputs of [`LayoutLMv2ForRelationExtraction`]. Args: - loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided): + loss (`torch.FloatTensor` of shape `(1,)`: Classification (or regression if config.num_labels==1) loss. - entities (...) - ... - relations (...) - ... - pred_relations (...) - ... + entities (`list[dict]`): + List of dictionaries (one per example in the batch). Each dictionary contains 3 keys: `start`, `end` and + `label`. + relations (`list[dict]`): + List of dictionaries (one per example in the batch). Each dictionary contains 4 keys: `start_index`, + `end_index`, `head` and `tail`. + pred_relations (`list[dict]`): + List of dictionaries (one per example in the batch). Each dictionary contains 7 keys: `head`, `head_id`, + `head_type`, `tail`, `tail_id`, `tail_type` and `type`. hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, + one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of @@ -1634,10 +1637,12 @@ def forward( return_dict: Optional[bool] = None, ): r""" - entities (...): - ... - relations (...): - ... + entities (`list[dict]`): + List of dictionaries (one per example in the batch). Each dictionary contains 3 keys: `start`, `end` and + `label`. + relations (`list[dict]`): + List of dictionaries (one per example in the batch). Each dictionary contains 4 keys: `start_index`, + `end_index`, `head` and `tail`. Returns: @@ -1647,6 +1652,21 @@ def forward( >>> from transformers import LayoutLMv2Processor, LayoutLMv2ForRelationExtraction >>> from PIL import Image >>> from datasets import load_dataset + + >>> processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased") + >>> model = LayoutLMv2ForRelationExtraction.from_pretrained("microsoft/layoutlmv2-base-uncased") + + >>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa") + >>> image_path = dataset["test"][0]["file"] + >>> image = Image.open(image_path).convert("RGB") + >>> encoding = processor(image, return_tensors="pt") + + >>> # instantiate relations as empty at inference time + >>> encoding["entities"] = [{"start": [0, 4], "end": [3, 6], "label": [2, 1]}] + >>> encoding["relations"] = [{"start_index": [], "end_index": [], "head": [], "tail": []}] + + >>> outputs = model(**encoding) + >>> predicted_relations = outputs.pred_relations[0] ``` """ @@ -1677,8 +1697,8 @@ def forward( loss, pred_relations = self.extractor(text_output, entities, relations) if not return_dict: - output = (entities, relations) + outputs[2:] - return ((loss, pred_relations) + output) if loss is not None else output + output = (loss, pred_relations, entities, relations) + outputs[2:] + return output return RelationExtractionOutput( loss=loss, diff --git a/tests/models/layoutlmv2/test.py b/tests/models/layoutlmv2/test.py deleted file mode 100644 index 70ca480b9a633b..00000000000000 --- a/tests/models/layoutlmv2/test.py +++ /dev/null @@ -1,38 +0,0 @@ -# for example in range(self.batch_size): -# # sample a number of entities for the example -# num_entities = random.randint(1, self.max_entities) -# entity_starts = [] -# entity_ends = [] -# entity_labels = [] -# for entity in range(num_entities): -# entity_start = random.randint(0, self.seq_length) -# entity_end = entity_start + random.randint(1, self.max_entity_length) -# entity_label = random.randint(0, self.num_labels) -# entity_starts.append(entity_start) -# entity_ends.append(entity_end) -# entity_labels.append(entity_label) -# entity_dict = { -# "start": entity_starts, -# "end": entity_ends, -# "label": entity_labels, -# } -# entities.append(entity_dict) - -# # sample a number of relations for the example -# num_relations = random.randint(1, self.max_relations) -# start_indices = [] -# end_indices = [] -# heads = [] -# tails = [] -# for relation in range(num_relations): -# start_index = random.randint(0, self.seq_length) -# end_index = start_index + random.randint(1, self.max_entity_length) -# head = random.randint(0, self.max_entities) -# tail = random.randint(0, self.max_entities) -# relation_dict = { -# "start_index": start_indices, -# "end_index": end_indices, -# "head": heads, -# "tail": tails, -# } -# relations.append(relation_dict) diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py index 294d4f7ce971dc..05e452e24e4074 100644 --- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py @@ -20,6 +20,7 @@ import random import tempfile import unittest +from typing import Dict, List, Tuple from transformers.models.auto import get_values from transformers.testing_utils import require_detectron2, require_torch, require_torch_multi_gpu, slow, torch_device @@ -718,6 +719,83 @@ def _create_and_check_torchscript(self, config, inputs_dict): # (Even with this call, there are still memory leak by ~0.04MB) self.clear_torch_jit_class_registry() + # overwrite as LayoutLMv2ForRelationExtraction outputs dictonaries containing integers rather than tensors + def test_model_outputs_equivalence(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + + def set_nan_tensor_to_zero(t): + t[t != t] = 0 + return t + + def check_equivalence(model, tuple_inputs, dict_inputs, additional_kwargs={}): + with torch.no_grad(): + tuple_output = model(**tuple_inputs, return_dict=False, **additional_kwargs) + dict_output = model(**dict_inputs, return_dict=True, **additional_kwargs).to_tuple() + + def recursive_check(tuple_object, dict_object): + if isinstance(tuple_object, (List, Tuple)): + for tuple_iterable_value, dict_iterable_value in zip(tuple_object, dict_object): + recursive_check(tuple_iterable_value, dict_iterable_value) + elif isinstance(tuple_object, Dict): + for tuple_iterable_value, dict_iterable_value in zip( + tuple_object.values(), dict_object.values() + ): + recursive_check(tuple_iterable_value, dict_iterable_value) + elif tuple_object is None: + return + elif isinstance(tuple_object, int): + self.assertEqual(tuple_object, dict_object) + else: + self.assertTrue( + torch.allclose( + set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5 + ), + msg=( + "Tuple and dict output are not equal. Difference:" + f" {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`:" + f" {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has" + f" `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}." + ), + ) + + recursive_check(tuple_output, dict_output) + + for model_class in self.all_model_classes: + model = model_class(config) + model.to(torch_device) + model.eval() + + tuple_inputs = self._prepare_for_class(inputs_dict, model_class) + dict_inputs = self._prepare_for_class(inputs_dict, model_class) + check_equivalence(model, tuple_inputs, dict_inputs) + + tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + check_equivalence(model, tuple_inputs, dict_inputs) + + tuple_inputs = self._prepare_for_class(inputs_dict, model_class) + dict_inputs = self._prepare_for_class(inputs_dict, model_class) + check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True}) + + tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True}) + + if self.has_attentions: + tuple_inputs = self._prepare_for_class(inputs_dict, model_class) + dict_inputs = self._prepare_for_class(inputs_dict, model_class) + check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True}) + + tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True}) + + tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) + check_equivalence( + model, tuple_inputs, dict_inputs, {"output_hidden_states": True, "output_attentions": True} + ) + def prepare_layoutlmv2_batch_inputs(): # Here we prepare a batch of 2 sequences to test a LayoutLMv2 forward pass on: