Skip to content

Commit

Permalink
updated code for quality test
Browse files Browse the repository at this point in the history
  • Loading branch information
Jesujoba Alabi committed Apr 20, 2023
1 parent 5253fb9 commit 0f848ff
Show file tree
Hide file tree
Showing 31 changed files with 242 additions and 199 deletions.
5 changes: 3 additions & 2 deletions src/transformers/adapters/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,8 +722,9 @@ def resolve_adapter_path(
except Exception as ex:
logger.info(ex)
raise EnvironmentError(
"Unable to load adapter {} from any source. Please check the name of the adapter or the source."
.format(adapter_name_or_path)
"Unable to load adapter {} from any source. Please check the name of the adapter or the source.".format(
adapter_name_or_path
)
)
else:
raise ValueError("Unable to identify {} as a valid module location.".format(adapter_name_or_path))
Expand Down
9 changes: 7 additions & 2 deletions src/transformers/commands/add_new_model_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,9 +438,14 @@ def duplicate_module(
# Special cases
if "PRETRAINED_CONFIG_ARCHIVE_MAP = {" in obj:
# docstyle-ignore
obj = f"{new_model_patterns.model_upper_cased}_PRETRAINED_CONFIG_ARCHIVE_MAP = " + "{" + f"""
obj = (
f"{new_model_patterns.model_upper_cased}_PRETRAINED_CONFIG_ARCHIVE_MAP = "
+ "{"
+ f"""
"{new_model_patterns.checkpoint}": "https://huggingface.co/{new_model_patterns.checkpoint}/resolve/main/config.json",
""" + "}\n"
"""
+ "}\n"
)
new_objects.append(obj)
continue
elif "PRETRAINED_MODEL_ARCHIVE_LIST = [" in obj:
Expand Down
6 changes: 4 additions & 2 deletions src/transformers/integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1046,11 +1046,13 @@ def __del__(self):

class NeptuneMissingConfiguration(Exception):
def __init__(self):
super().__init__("""
super().__init__(
"""
------ Unsupported ---- We were not able to create new runs. You provided a custom Neptune run to
`NeptuneCallback` with the `run` argument. For the integration to work fully, provide your `api_token` and
`project` by saving them as environment variables or passing them to the callback.
""")
"""
)


class NeptuneCallback(TrainerCallback):
Expand Down
6 changes: 4 additions & 2 deletions src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2423,11 +2423,13 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
key: device_map[key] for key in device_map.keys() if key not in modules_to_not_convert
}
if "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
raise ValueError("""
raise ValueError(
"""
Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit
the quantized model. If you have set a value for `max_memory` you should increase that. To have
an idea of the modules that are set on the CPU or RAM you can print model.hf_device_map.
""")
"""
)
del device_map_without_lm_head

if from_tf:
Expand Down
8 changes: 5 additions & 3 deletions src/transformers/models/big_bird/modeling_big_bird.py
Original file line number Diff line number Diff line change
Expand Up @@ -927,9 +927,11 @@ def bigbird_block_sparse_attention(
attention_probs[:, :, -2 * from_block_size : -from_block_size, :to_block_size] = second_last_attn_weights[
:, :, :, :to_block_size
] # 1st key block (global)
attention_probs[:, :, -2 * from_block_size : -from_block_size, -3 * to_block_size :] = (
second_last_attn_weights[:, :, :, to_block_size : 4 * to_block_size]
) # last three blocks (global + sliding)
attention_probs[
:, :, -2 * from_block_size : -from_block_size, -3 * to_block_size :
] = second_last_attn_weights[
:, :, :, to_block_size : 4 * to_block_size
] # last three blocks (global + sliding)
# random keys
for p1, i1, w1 in zip(range(bsz), rand_attn, second_last_attn_weights):
# p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -745,9 +745,11 @@ def bigbird_block_sparse_attention(
attention_probs[:, :, -2 * from_block_size : -from_block_size, :to_block_size] = second_last_attn_weights[
:, :, :, :to_block_size
] # 1st key block (global)
attention_probs[:, :, -2 * from_block_size : -from_block_size, -3 * to_block_size :] = (
second_last_attn_weights[:, :, :, to_block_size : 4 * to_block_size]
) # last three blocks (global + sliding)
attention_probs[
:, :, -2 * from_block_size : -from_block_size, -3 * to_block_size :
] = second_last_attn_weights[
:, :, :, to_block_size : 4 * to_block_size
] # last three blocks (global + sliding)
# random keys
for p1, i1, w1 in zip(range(bsz), rand_attn, second_last_attn_weights):
# p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch
Expand Down
132 changes: 66 additions & 66 deletions src/transformers/models/convbert/modeling_convbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,72 +88,72 @@ def load_tf_weights_in_convbert(model, config, tf_checkpoint_path):
group_dense_name = "dense"

for j in range(config.num_hidden_layers):
param_mapping[f"encoder.layer.{j}.attention.self.query.weight"] = (
f"electra/encoder/layer_{j}/attention/self/query/kernel"
)
param_mapping[f"encoder.layer.{j}.attention.self.query.bias"] = (
f"electra/encoder/layer_{j}/attention/self/query/bias"
)
param_mapping[f"encoder.layer.{j}.attention.self.key.weight"] = (
f"electra/encoder/layer_{j}/attention/self/key/kernel"
)
param_mapping[f"encoder.layer.{j}.attention.self.key.bias"] = (
f"electra/encoder/layer_{j}/attention/self/key/bias"
)
param_mapping[f"encoder.layer.{j}.attention.self.value.weight"] = (
f"electra/encoder/layer_{j}/attention/self/value/kernel"
)
param_mapping[f"encoder.layer.{j}.attention.self.value.bias"] = (
f"electra/encoder/layer_{j}/attention/self/value/bias"
)
param_mapping[f"encoder.layer.{j}.attention.self.key_conv_attn_layer.depthwise.weight"] = (
f"electra/encoder/layer_{j}/attention/self/conv_attn_key/depthwise_kernel"
)
param_mapping[f"encoder.layer.{j}.attention.self.key_conv_attn_layer.pointwise.weight"] = (
f"electra/encoder/layer_{j}/attention/self/conv_attn_key/pointwise_kernel"
)
param_mapping[f"encoder.layer.{j}.attention.self.key_conv_attn_layer.bias"] = (
f"electra/encoder/layer_{j}/attention/self/conv_attn_key/bias"
)
param_mapping[f"encoder.layer.{j}.attention.self.conv_kernel_layer.weight"] = (
f"electra/encoder/layer_{j}/attention/self/conv_attn_kernel/kernel"
)
param_mapping[f"encoder.layer.{j}.attention.self.conv_kernel_layer.bias"] = (
f"electra/encoder/layer_{j}/attention/self/conv_attn_kernel/bias"
)
param_mapping[f"encoder.layer.{j}.attention.self.conv_out_layer.weight"] = (
f"electra/encoder/layer_{j}/attention/self/conv_attn_point/kernel"
)
param_mapping[f"encoder.layer.{j}.attention.self.conv_out_layer.bias"] = (
f"electra/encoder/layer_{j}/attention/self/conv_attn_point/bias"
)
param_mapping[f"encoder.layer.{j}.attention.output.dense.weight"] = (
f"electra/encoder/layer_{j}/attention/output/dense/kernel"
)
param_mapping[f"encoder.layer.{j}.attention.output.LayerNorm.weight"] = (
f"electra/encoder/layer_{j}/attention/output/LayerNorm/gamma"
)
param_mapping[f"encoder.layer.{j}.attention.output.dense.bias"] = (
f"electra/encoder/layer_{j}/attention/output/dense/bias"
)
param_mapping[f"encoder.layer.{j}.attention.output.LayerNorm.bias"] = (
f"electra/encoder/layer_{j}/attention/output/LayerNorm/beta"
)
param_mapping[f"encoder.layer.{j}.intermediate.dense.weight"] = (
f"electra/encoder/layer_{j}/intermediate/{group_dense_name}/kernel"
)
param_mapping[f"encoder.layer.{j}.intermediate.dense.bias"] = (
f"electra/encoder/layer_{j}/intermediate/{group_dense_name}/bias"
)
param_mapping[f"encoder.layer.{j}.output.dense.weight"] = (
f"electra/encoder/layer_{j}/output/{group_dense_name}/kernel"
)
param_mapping[f"encoder.layer.{j}.output.dense.bias"] = (
f"electra/encoder/layer_{j}/output/{group_dense_name}/bias"
)
param_mapping[f"encoder.layer.{j}.output.LayerNorm.weight"] = (
f"electra/encoder/layer_{j}/output/LayerNorm/gamma"
)
param_mapping[
f"encoder.layer.{j}.attention.self.query.weight"
] = f"electra/encoder/layer_{j}/attention/self/query/kernel"
param_mapping[
f"encoder.layer.{j}.attention.self.query.bias"
] = f"electra/encoder/layer_{j}/attention/self/query/bias"
param_mapping[
f"encoder.layer.{j}.attention.self.key.weight"
] = f"electra/encoder/layer_{j}/attention/self/key/kernel"
param_mapping[
f"encoder.layer.{j}.attention.self.key.bias"
] = f"electra/encoder/layer_{j}/attention/self/key/bias"
param_mapping[
f"encoder.layer.{j}.attention.self.value.weight"
] = f"electra/encoder/layer_{j}/attention/self/value/kernel"
param_mapping[
f"encoder.layer.{j}.attention.self.value.bias"
] = f"electra/encoder/layer_{j}/attention/self/value/bias"
param_mapping[
f"encoder.layer.{j}.attention.self.key_conv_attn_layer.depthwise.weight"
] = f"electra/encoder/layer_{j}/attention/self/conv_attn_key/depthwise_kernel"
param_mapping[
f"encoder.layer.{j}.attention.self.key_conv_attn_layer.pointwise.weight"
] = f"electra/encoder/layer_{j}/attention/self/conv_attn_key/pointwise_kernel"
param_mapping[
f"encoder.layer.{j}.attention.self.key_conv_attn_layer.bias"
] = f"electra/encoder/layer_{j}/attention/self/conv_attn_key/bias"
param_mapping[
f"encoder.layer.{j}.attention.self.conv_kernel_layer.weight"
] = f"electra/encoder/layer_{j}/attention/self/conv_attn_kernel/kernel"
param_mapping[
f"encoder.layer.{j}.attention.self.conv_kernel_layer.bias"
] = f"electra/encoder/layer_{j}/attention/self/conv_attn_kernel/bias"
param_mapping[
f"encoder.layer.{j}.attention.self.conv_out_layer.weight"
] = f"electra/encoder/layer_{j}/attention/self/conv_attn_point/kernel"
param_mapping[
f"encoder.layer.{j}.attention.self.conv_out_layer.bias"
] = f"electra/encoder/layer_{j}/attention/self/conv_attn_point/bias"
param_mapping[
f"encoder.layer.{j}.attention.output.dense.weight"
] = f"electra/encoder/layer_{j}/attention/output/dense/kernel"
param_mapping[
f"encoder.layer.{j}.attention.output.LayerNorm.weight"
] = f"electra/encoder/layer_{j}/attention/output/LayerNorm/gamma"
param_mapping[
f"encoder.layer.{j}.attention.output.dense.bias"
] = f"electra/encoder/layer_{j}/attention/output/dense/bias"
param_mapping[
f"encoder.layer.{j}.attention.output.LayerNorm.bias"
] = f"electra/encoder/layer_{j}/attention/output/LayerNorm/beta"
param_mapping[
f"encoder.layer.{j}.intermediate.dense.weight"
] = f"electra/encoder/layer_{j}/intermediate/{group_dense_name}/kernel"
param_mapping[
f"encoder.layer.{j}.intermediate.dense.bias"
] = f"electra/encoder/layer_{j}/intermediate/{group_dense_name}/bias"
param_mapping[
f"encoder.layer.{j}.output.dense.weight"
] = f"electra/encoder/layer_{j}/output/{group_dense_name}/kernel"
param_mapping[
f"encoder.layer.{j}.output.dense.bias"
] = f"electra/encoder/layer_{j}/output/{group_dense_name}/bias"
param_mapping[
f"encoder.layer.{j}.output.LayerNorm.weight"
] = f"electra/encoder/layer_{j}/output/LayerNorm/gamma"
param_mapping[f"encoder.layer.{j}.output.LayerNorm.bias"] = f"electra/encoder/layer_{j}/output/LayerNorm/beta"

for param in model.named_parameters():
Expand Down
24 changes: 12 additions & 12 deletions src/transformers/models/donut/convert_donut_to_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,22 +106,22 @@ def convert_state_dict(orig_state_dict, model):
orig_state_dict[
f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.weight"
] = val[:dim, :]
orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.weight"] = (
val[dim : dim * 2, :]
)
orig_state_dict[
f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.weight"
] = val[dim : dim * 2, :]
orig_state_dict[
f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.weight"
] = val[-dim:, :]
else:
orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.bias"] = (
val[:dim]
)
orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.bias"] = (
val[dim : dim * 2]
)
orig_state_dict[f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.bias"] = (
val[-dim:]
)
orig_state_dict[
f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.query.bias"
] = val[:dim]
orig_state_dict[
f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.key.bias"
] = val[dim : dim * 2]
orig_state_dict[
f"encoder.encoder.layers.{layer_num}.blocks.{block_num}.attention.self.value.bias"
] = val[-dim:]
elif "attn_mask" in key or key in ["encoder.model.norm.weight", "encoder.model.norm.bias"]:
# HuggingFace implementation doesn't use attn_mask buffer
# and model doesn't use final LayerNorms for the encoder
Expand Down
23 changes: 17 additions & 6 deletions src/transformers/models/flava/modeling_flava.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,11 +775,14 @@ def forward(self, hidden_states: torch.Tensor):

FLAVA_TEXT_INPUTS_DOCSTRING = FLAVA_TEXT_INPUTS_DOCSTRING_BASE + FLAVA_INPUTS_DOCSTRING_COMMON

FLAVA_MULTIMODAL_INPUTS_DOCSTRING = r"""
FLAVA_MULTIMODAL_INPUTS_DOCSTRING = (
r"""
Args:
hidden_states (`torch.FloatTensor` of shape `(batch_size, image_num_patches + text_seq_len, hidden_size)`):
The concatenated hidden states of unimodal encoders.
""" + FLAVA_INPUTS_DOCSTRING_COMMON
"""
+ FLAVA_INPUTS_DOCSTRING_COMMON
)

FLAVA_MODEL_INPUTS_DOCSTRING_BASE = r"""
Args:
Expand Down Expand Up @@ -1260,7 +1263,9 @@ def get_text_features(
... text=["a photo of a cat", "a photo of a dog"], max_length=77, padding="max_length", return_tensors="pt"
... )
>>> text_features = model.get_text_features(**inputs)
```""".format(_CHECKPOINT_FOR_DOC)
```""".format(
_CHECKPOINT_FOR_DOC
)
text_outputs = self.text_model(
input_ids=input_ids,
attention_mask=attention_mask,
Expand Down Expand Up @@ -1309,7 +1314,9 @@ def get_image_features(
>>> inputs = processor(images=image, return_tensors="pt")
>>> image_features = model.get_image_features(**inputs)
```""".format(_CHECKPOINT_FOR_DOC)
```""".format(
_CHECKPOINT_FOR_DOC
)
image_outputs = self.image_model(
pixel_values=pixel_values,
bool_masked_pos=bool_masked_pos,
Expand Down Expand Up @@ -1574,7 +1581,9 @@ def get_codebook_indices(self, pixel_values: torch.Tensor) -> torch.Tensor:
>>> outputs = model.get_codebook_indices(**inputs)
```
""".format(_CHECKPOINT_FOR_CODEBOOK_DOC)
""".format(
_CHECKPOINT_FOR_CODEBOOK_DOC
)
z_logits = self.blocks(pixel_values)
return torch.argmax(z_logits, axis=1)

Expand Down Expand Up @@ -1609,7 +1618,9 @@ def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
>>> print(outputs.shape)
(1, 196)
```
""".format(_CHECKPOINT_FOR_CODEBOOK_DOC)
""".format(
_CHECKPOINT_FOR_CODEBOOK_DOC
)
if len(pixel_values.shape) != 4:
raise ValueError(f"input shape {pixel_values.shape} is not 4d")
if pixel_values.shape[1] != self.input_channels:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1213,8 +1213,7 @@ def truncate_sequences(
)
if truncation_strategy == TruncationStrategy.ONLY_FIRST:
error_msg = (
error_msg
+ "Please select another truncation strategy than "
error_msg + "Please select another truncation strategy than "
f"{truncation_strategy}, for instance 'longest_first' or 'only_second'."
)
logger.error(error_msg)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1345,8 +1345,7 @@ def truncate_sequences(
)
if truncation_strategy == TruncationStrategy.ONLY_FIRST:
error_msg = (
error_msg
+ "Please select another truncation strategy than "
error_msg + "Please select another truncation strategy than "
f"{truncation_strategy}, for instance 'longest_first' or 'only_second'."
)
logger.error(error_msg)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,14 +236,24 @@ def write_model_card(self, model_dict, dry_run=False) -> str:
* OPUS readme: [README.md]({readme_url})
"""

content = f"""
content = (
f"""
* model: {model_dict['modeltype']}
* source language code{src_multilingual*'s'}: {', '.join(a2_src_tags)}
* target language code{tgt_multilingual*'s'}: {', '.join(a2_tgt_tags)}
* dataset: opus {backtranslated_data}
* release date: {model_dict['release-date']}
* pre-processing: {model_dict['pre-processing']}
""" + multilingual_data + tuned + download + langtoken + datainfo + testset + testscores + scorestable
"""
+ multilingual_data
+ tuned
+ download
+ langtoken
+ datainfo
+ testset
+ testscores
+ scorestable
)

content = FRONT_MATTER_TEMPLATE.format(lang_tags) + extra_markdown + content

Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/markuplm/tokenization_markuplm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1315,8 +1315,7 @@ def truncate_sequences(
)
if truncation_strategy == TruncationStrategy.ONLY_FIRST:
error_msg = (
error_msg
+ "Please select another truncation strategy than "
error_msg + "Please select another truncation strategy than "
f"{truncation_strategy}, for instance 'longest_first' or 'only_second'."
)
logger.error(error_msg)
Expand Down
Loading

0 comments on commit 0f848ff

Please sign in to comment.