Skip to content

Commit

Permalink
update to transformers v4.2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
jeswan committed Jan 14, 2021
1 parent 6e6c2e3 commit e24c84a
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 14 deletions.
14 changes: 9 additions & 5 deletions jiant/proj/main/modeling/heads.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,10 @@ class BertMLMHead(BaseMLMHead):
def __init__(self, hidden_size, vocab_size, layer_norm_eps=1e-12, hidden_act="gelu"):
super().__init__()
self.dense = nn.Linear(hidden_size, hidden_size)
self.transform_act_fn = transformers.modeling_bert.ACT2FN[hidden_act]
self.LayerNorm = transformers.modeling_bert.BertLayerNorm(hidden_size, eps=layer_norm_eps)
self.transform_act_fn = transformers.models.bert.modeling_bert.ACT2FN[hidden_act]
self.LayerNorm = transformers.models.bert.modeling_bert.BertLayerNorm(
hidden_size, eps=layer_norm_eps
)

self.decoder = nn.Linear(hidden_size, vocab_size, bias=False)
self.bias = nn.Parameter(torch.zeros(vocab_size), requires_grad=True)
Expand All @@ -132,7 +134,9 @@ class RobertaMLMHead(BaseMLMHead):
def __init__(self, hidden_size, vocab_size, layer_norm_eps=1e-12):
super().__init__()
self.dense = nn.Linear(hidden_size, hidden_size)
self.layer_norm = transformers.modeling_bert.BertLayerNorm(hidden_size, eps=layer_norm_eps)
self.layer_norm = transformers.models.bert.modeling_bert.BertLayerNorm(
hidden_size, eps=layer_norm_eps
)

self.decoder = nn.Linear(hidden_size, vocab_size, bias=False)
self.bias = nn.Parameter(torch.zeros(vocab_size), requires_grad=True)
Expand All @@ -143,7 +147,7 @@ def __init__(self, hidden_size, vocab_size, layer_norm_eps=1e-12):

def forward(self, unpooled):
x = self.dense(unpooled)
x = transformers.modeling_bert.gelu(x)
x = transformers.models.bert.modeling_bert.gelu(x)
x = self.layer_norm(x)

# project back to size of vocabulary with bias
Expand All @@ -161,7 +165,7 @@ def __init__(self, hidden_size, embedding_size, vocab_size, hidden_act="gelu"):
self.bias = nn.Parameter(torch.zeros(vocab_size), requires_grad=True)
self.dense = nn.Linear(hidden_size, embedding_size)
self.decoder = nn.Linear(embedding_size, vocab_size)
self.activation = transformers.modeling_bert.ACT2FN[hidden_act]
self.activation = transformers.models.bert.modeling_bert.ACT2FN[hidden_act]

# Need a link between the two variables so that the bias is correctly resized with
# `resize_token_embeddings`
Expand Down
16 changes: 13 additions & 3 deletions jiant/proj/main/modeling/taskmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,12 @@ def get_output_from_encoder(encoder, input_ids, segment_ids, input_mask) -> Enco


def get_output_from_standard_transformer_models(encoder, input_ids, segment_ids, input_mask):
output = encoder(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask)
output = encoder(
input_ids=input_ids,
token_type_ids=segment_ids,
attention_mask=input_mask,
return_dict=False,
)
pooled, unpooled, other = output[1], output[0], output[2:]
return pooled, unpooled, other

Expand All @@ -347,7 +352,7 @@ def get_output_from_bart_models(encoder, input_ids, input_mask):
# sentence representation is the final decoder state.
# That's what we use for `unpooled` here.
dec_last, dec_all, enc_last, enc_all = encoder(
input_ids=input_ids, attention_mask=input_mask, output_hidden_states=True,
input_ids=input_ids, attention_mask=input_mask, output_hidden_states=True, return_dict=False
)
unpooled = dec_last

Expand All @@ -361,7 +366,12 @@ def get_output_from_bart_models(encoder, input_ids, input_mask):


def get_output_from_electra(encoder, input_ids, segment_ids, input_mask):
output = encoder(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask)
output = encoder(
input_ids=input_ids,
token_type_ids=segment_ids,
attention_mask=input_mask,
return_dict=False,
)
unpooled = output[0]
pooled = unpooled[:, 0, :]
return pooled, unpooled, output
Expand Down
2 changes: 1 addition & 1 deletion jiant/tasks/lib/templates/squad_style/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dataclasses import dataclass
from typing import Union, List, Dict, Optional

from transformers.tokenization_bert import whitespace_tokenize
from transformers.models.bert.tokenization_bert import whitespace_tokenize

from jiant.tasks.lib.templates.squad_style import utils as squad_utils
from jiant.shared.constants import PHASE
Expand Down
2 changes: 1 addition & 1 deletion jiant/tasks/lib/templates/squad_style/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from dataclasses import dataclass
from typing import List, Dict

from transformers.tokenization_bert import BasicTokenizer
from transformers.models.bert.tokenization_bert import BasicTokenizer
from jiant.utils.display import maybe_tqdm


Expand Down
4 changes: 2 additions & 2 deletions requirements-no-torch.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ seqeval==0.0.12
scikit-learn==0.22.2.post1
scipy==1.4.1
sentencepiece==0.1.86
tokenizers==0.8.1.rc2
tokenizers==0.9.4
tqdm==4.46.0
transformers==3.1.0
transformers==4.2.1
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@
"scikit-learn == 0.22.2.post1",
"scipy == 1.4.1",
"sentencepiece == 0.1.86",
"tokenizers == 0.8.1.rc2",
"tokenizers == 0.9.4",
"torch >= 1.5.0",
"tqdm == 4.46.0",
"transformers == 3.1.0",
"transformers == 4.2.1",
"torchvision == 0.6.0",
],
extras_require=extras,
Expand Down

0 comments on commit e24c84a

Please sign in to comment.