Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Typo fixes #11432

Merged
merged 1 commit into from
Apr 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/transformers/commands/add_new_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,14 @@ def run(self):
if not _has_cookiecutter:
raise ImportError(
"Model creation dependencies are required to use the `add_new_model` command. Install them by running "
"the folowing at the root of your `transformers` clone:\n\n\t$ pip install -e .[modelcreation]\n"
"the following at the root of your `transformers` clone:\n\n\t$ pip install -e .[modelcreation]\n"
)
# Ensure that there is no other `cookiecutter-template-xxx` directory in the current working directory
directories = [directory for directory in os.listdir() if "cookiecutter-template-" == directory[:22]]
if len(directories) > 0:
raise ValueError(
"Several directories starting with `cookiecutter-template-` in current working directory. "
"Please clean your directory by removing all folders startign with `cookiecutter-template-` or "
"Please clean your directory by removing all folders starting with `cookiecutter-template-` or "
"change your working directory."
)

Expand Down
2 changes: 1 addition & 1 deletion src/transformers/data/processors/squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def squad_convert_example_to_features(
cls_index = span["input_ids"].index(tokenizer.cls_token_id)

# p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer)
# Original TF implem also keep the classification token (set to 0)
# Original TF implementation also keep the classification token (set to 0)
p_mask = np.ones_like(span["token_type_ids"])
if tokenizer.padding_side == "right":
p_mask[len(truncated_query) + sequence_added_tokens :] = 0
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/feature_extraction_sequence_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Sequence feature extraction class for common feature extrcactors to preprocess sequences.
Sequence feature extraction class for common feature extractors to preprocess sequences.
"""
from typing import Dict, List, Optional, Union

Expand Down
2 changes: 1 addition & 1 deletion src/transformers/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ def wrapper(*args, **kwargs):
("sklearn", (is_sklearn_available, SKLEARN_IMPORT_ERROR)),
("speech", (is_speech_available, SPEECH_IMPORT_ERROR)),
("tf", (is_tf_available, TENSORFLOW_IMPORT_ERROR)),
("tokenziers", (is_tokenizers_available, TOKENIZERS_IMPORT_ERROR)),
("tokenizers", (is_tokenizers_available, TOKENIZERS_IMPORT_ERROR)),
("torch", (is_torch_available, PYTORCH_IMPORT_ERROR)),
("vision", (is_vision_available, VISION_IMPORT_ERROR)),
]
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/generation_logits_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ def _set_scores_to_inf_for_banned_tokens(self, scores: torch.Tensor, banned_toke

class PrefixConstrainedLogitsProcessor(LogitsProcessor):
r"""
:class:`transformers.LogitsProcessor` that enforces contrained generation and is useful for prefix-conditioned
:class:`transformers.LogitsProcessor` that enforces constrained generation and is useful for prefix-conditioned
constrained generation. See `Autoregressive Entity Retrieval <https://arxiv.org/abs/2010.00904>`__ for more
information.

Expand Down
2 changes: 1 addition & 1 deletion src/transformers/generation_stopping_criteria.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
Prediction scores of a language modeling head. These can be scores for each vocabulary token before SoftMax
or scores for each vocabulary token after SoftMax.
kwargs:
Additional stopping critera specific kwargs.
Additional stopping criteria specific kwargs.

Return:
:obj:`bool`. :obj:`False` indicates we should continue, :obj:`True` indicates we should stop.
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/generation_tf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,8 +442,8 @@ def _generate_no_beam_search(
**kwargs
):
"""
Generate sequences for each example without beam search (num_beams == 1). All returned sequence are generated
independantly.
Generate sequences for each example without beam search (num_beams == 1). All returned sequences are generated
independently.
"""

# length of generated sentences / unfinished sentences
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/generation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,7 @@ def generate(
... "at least two people were killed in a suspected bomb attack on a passenger bus "
... "in the strife-torn southern philippines on monday , the military said."
... )
>>> # encode input contex
>>> # encode input context
>>> input_ids = tokenizer(document, return_tensors="pt").input_ids
>>> # generate 3 independent sequences using beam search decoding (5 beams)
>>> # with T5 encoder-decoder model conditioned on short news article.
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/modeling_flax_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def __init__(
self.key = PRNGKey(seed)
self.dtype = dtype

# randomely initialized parameters
# randomly initialized parameters
random_params = self.init_weights(self.key, input_shape)

# save required_params as set
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/modeling_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ class CausalLMOutputWithPast(ModelOutput):
Language modeling loss (for next-token prediction).
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
past_key_values (:obj:`tuple(tupel(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`)

Expand Down Expand Up @@ -423,7 +423,7 @@ class SequenceClassifierOutputWithPast(ModelOutput):
Classification (or regression if config.num_labels==1) loss.
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
Classification (or regression if config.num_labels==1) scores (before SoftMax).
past_key_values (:obj:`tuple(tupel(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`)

Expand Down
2 changes: 1 addition & 1 deletion src/transformers/modeling_tf_pytorch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def convert_tf_weight_name_to_pt_weight_name(tf_name, start_prefix_to_remove="")
) # '_._' is replaced by a level separation (can be used to convert TF2.0 lists in PyTorch nn.ModulesList)
tf_name = re.sub(r"//+", "/", tf_name) # Remove empty levels at the end
tf_name = tf_name.split("/") # Convert from TF2.0 '/' separators to PyTorch '.' separators
# Some weights have a single name withtout "/" such as final_logits_bias in BART
# Some weights have a single name without "/" such as final_logits_bias in BART
if len(tf_name) > 1:
tf_name = tf_name[1:] # Remove level zero

Expand Down
4 changes: 2 additions & 2 deletions src/transformers/modeling_tf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ def serving(self, inputs):

Args:
inputs (:obj:`Dict[str, tf.Tensor]`):
The input of the saved model as a dictionnary of tensors.
The input of the saved model as a dictionary of tensors.
"""
output = self.call(inputs)

Expand Down Expand Up @@ -944,7 +944,7 @@ def _get_resized_lm_head_decoder(self, old_lm_head_decoder, new_num_tokens):
vectors from the end. If not provided or :obj:`None`, just returns None

Return:
:obj:`tf.Variable`: Pointer to the resized decoder or None if the output embeddings are differents of the
:obj:`tf.Variable`: Pointer to the resized decoder or None if the output embeddings are different from the
input ones.
"""
new_lm_head_decoder = old_lm_head_decoder
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def get_head_mask(
The mask indicating if we should keep the heads or not (1.0 for keep, 0.0 for discard).
num_hidden_layers (:obj:`int`):
The number of hidden layers in the model.
is_attention_chunked: (:obj:`bool`, `optional, defaults to :obj:`False`):
is_attention_chunked: (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not the attentions scores are computed by chunks or not.

Returns:
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/auto/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@
"AutoModelForPreTraining", MODEL_FOR_PRETRAINING_MAPPING, head_doc="pretraining"
)

# Private on puprose, the public class will add the deprecation warnings.
# Private on purpose, the public class will add the deprecation warnings.
_AutoModelWithLMHead = auto_class_factory(
"AutoModelWithLMHead", MODEL_WITH_LM_HEAD_MAPPING, head_doc="language modeling"
)
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/auto/modeling_flax_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
)

FlaxAutoModelForSequenceClassification = auto_class_factory(
"AFlaxutoModelForSequenceClassification",
"FlaxAutoModelForSequenceClassification",
FLAX_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
head_doc="sequence classification",
)
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/auto/modeling_tf_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@
"TFAutoModelForPreTraining", TF_MODEL_FOR_PRETRAINING_MAPPING, head_doc="pretraining"
)

# Private on puprose, the public class will add the deprecation warnings.
# Private on purpose, the public class will add the deprecation warnings.
_TFAutoModelWithLMHead = auto_class_factory(
"TFAutoModelWithLMHead", TF_MODEL_WITH_LM_HEAD_MAPPING, head_doc="language modeling"
)
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/bart/configuration_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def __init__(
self.gradient_checkpointing = gradient_checkpointing
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True

# ensure backward compatibilty for BART CNN models
# ensure backward compatibility for BART CNN models
if self.forced_bos_token_id is None and kwargs.get("force_bos_token_to_be_generated", False):
self.forced_bos_token_id = self.bos_token_id
warnings.warn(
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/bart/modeling_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ class BartLearnedPositionalEmbedding(nn.Embedding):

def __init__(self, num_embeddings: int, embedding_dim: int):
# Bart is set up so that if padding_idx is specified then offset the embedding ids by 2
# and adjust num_embeddings appropriately. Other models dont have this hack
# and adjust num_embeddings appropriately. Other models don't have this hack
self.offset = 2
super().__init__(num_embeddings + self.offset, embedding_dim)

Expand Down Expand Up @@ -236,9 +236,9 @@ def forward(
attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)

if output_attentions:
# this operation is a bit akward, but it's required to
# this operation is a bit awkward, but it's required to
# make sure that attn_weights keeps its gradient.
# In order to do so, attn_weights have to reshaped
# In order to do so, attn_weights have to be reshaped
# twice and have to be reused in the following
attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len)
Expand Down
10 changes: 5 additions & 5 deletions src/transformers/models/bart/modeling_tf_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class TFBartLearnedPositionalEmbedding(TFSharedEmbeddings):

def __init__(self, num_embeddings: int, embedding_dim: int, **kwargs):
# Bart is set up so that if padding_idx is specified then offset the embedding ids by 2
# and adjust num_embeddings appropriately. Other models dont have this hack
# and adjust num_embeddings appropriately. Other models don't have this hack
self.offset = 2
super().__init__(num_embeddings + self.offset, embedding_dim, **kwargs)

Expand Down Expand Up @@ -572,7 +572,7 @@ def serving(self, inputs):
Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in ``[0, 1]``:

- 1 indicates the head is **not masked**,
- 0 indicates the heas is **masked**.
- 0 indicates the head is **masked**.

decoder_head_mask (:obj:`tf.Tensor` of shape :obj:`(decoder_layers, decoder_attention_heads)`, `optional`):
Mask to nullify selected heads of the attention modules in the decoder. Mask values selected in ``[0, 1]``:
Expand Down Expand Up @@ -677,7 +677,7 @@ def call(
Mask to nullify selected heads of the attention modules. Mask values selected in ``[0, 1]``:

- 1 indicates the head is **not masked**,
- 0 indicates the heas is **masked**.
- 0 indicates the head is **masked**.

inputs_embeds (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded
Expand Down Expand Up @@ -856,14 +856,14 @@ def call(
Mask to nullify selected heads of the attention modules. Mask values selected in ``[0, 1]``:

- 1 indicates the head is **not masked**,
- 0 indicates the heas is **masked**.
- 0 indicates the head is **masked**.

encoder_head_mask (:obj:`tf.Tensor` of shape :obj:`(encoder_layers, encoder_attention_heads)`, `optional`):
Mask to nullify selected heads of the attention modules in encoder to avoid performing cross-attention
on hidden heads. Mask values selected in ``[0, 1]``:

- 1 indicates the head is **not masked**,
- 0 indicates the heas is **masked**.
- 0 indicates the head is **masked**.

past_key_values (:obj:`Tuple[Tuple[tf.Tensor]]` of length :obj:`config.n_layers` with each tuple having 2 tuples each of which has 2 tensors of shape :obj:`(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`):
Contains precomputed key and value hidden-states of the attention blocks. Can be used to speed up
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def tokenize(self, text, never_split=None, **kwargs):


class CharacterTokenizer:
"""Runs Character tokenziation."""
"""Runs Character tokenization."""

def __init__(self, vocab, unk_token, normalize_text=True):
"""
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/bertweet/tokenization_bertweet.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ def add_from_file(self, f):
the class Tokenizer.

4. When instantiating Tokenizer objects, there is a single option: preserve_case. By default, it is set to True. If it
is set to False, then the tokenizer will downcase everything except for emoticons.
is set to False, then the tokenizer will lowercase everything except for emoticons.

"""

Expand Down
18 changes: 9 additions & 9 deletions src/transformers/models/big_bird/modeling_big_bird.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ def bigbird_block_sparse_attention(
band_product, dim=-1
) # [bsz, n_heads, from_seq_len//from_block_size-4, from_block_size, (5+n_rand_blocks)*to_block_size]

# contibution of sliding keys
# contribution of sliding keys
# [bsz, n_heads, m//from_block_size-4, from_block_size, 3*to_block_size] x [bsz, n_heads, from_seq_len//from_block_size-4, 3*to_block_size, -1]
context_layer = self.torch_bmm_nd(
attn_weights[:, :, :, :, to_block_size : 4 * to_block_size], exp_blocked_value_matrix, ndim=5
Expand Down Expand Up @@ -877,7 +877,7 @@ def bigbird_block_sparse_attention(
attn_probs_view[:, :, q_idx, :, q_idx : q_idx + 3, :] = right_slice.view(
bsz, n_heads, from_block_size, 3, to_block_size
) # inner_band_product
# global keys (correspomding to 1st key block)
# global keys (corresponding to 1st key block)
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, :to_block_size] = attn_weights[
:, :, :, :, :to_block_size
].view(
Expand Down Expand Up @@ -947,7 +947,7 @@ def bigbird_block_sparse_attention(

@staticmethod
def torch_gather_b2(params, indices):
# this operation is equilvalent to tf.gather when batch_dims=2
# this operation is equivalent to tf.gather when batch_dims=2

if params.shape[:2] != indices.shape[:2]:
raise ValueError(
Expand Down Expand Up @@ -1055,7 +1055,7 @@ def _bigbird_block_rand_mask(
to_block_size: int. size of block in to sequence.
num_rand_blocks: int. Number of random chunks per row.
last_idx: if -1 then num_rand_blocks blocks chosen anywhere in to sequence,
if positive then num_rand_blocks blocks choosen only upto last_idx.
if positive then num_rand_blocks blocks chosen only up to last_idx.

Returns:
adjacency list of size from_seq_length//from_block_size-2 by num_rand_blocks
Expand Down Expand Up @@ -1150,7 +1150,7 @@ def _bigbird_block_rand_mask_with_head(
plan_block_length = np.array(plan_from_length) // from_block_size
# till when to follow plan
max_plan_idx = plan_from_length.index(from_seq_length)
# Random Attention adjajency list
# Random Attention adjacency list
rand_attn = [
np.zeros((num_blocks, np.sum(plan_num_rand_blocks[: max_plan_idx + 1])), dtype=np.int32)
for i in range(num_heads)
Expand Down Expand Up @@ -1247,8 +1247,8 @@ def _get_single_block_row_attention(

Args:
block_id: int. block id of row.
to_start_block_id: int. random attention coloum start id.
to_end_block_id: int. random attention coloum end id.
to_start_block_id: int. random attention column start id.
to_end_block_id: int. random attention column end id.
num_rand_blocks: int. number of random blocks to be selected.
window_block_left: int. number of blocks of window to left of a block.
window_block_right: int. number of blocks of window to right of a block.
Expand Down Expand Up @@ -1826,7 +1826,7 @@ def _init_weights(self, module):
@dataclass
class BigBirdForPreTrainingOutput(ModelOutput):
"""
Output type of :class:`~transformers.BigBirdtForPreTraining`.
Output type of :class:`~transformers.BigBirdForPreTraining`.

Args:
loss (`optional`, returned when ``labels`` is provided, ``torch.FloatTensor`` of shape :obj:`(1,)`):
Expand Down Expand Up @@ -2907,7 +2907,7 @@ def forward(

logits_mask = None
if question_lengths is not None:
# setting lengths logits to `-infi`
# setting lengths logits to `-inf`
logits_mask = self.prepare_question_mask(question_lengths, seqlen)
if token_type_ids is None:
token_type_ids = (~logits_mask).long()
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/blenderbot/modeling_blenderbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,9 @@ def forward(
attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)

if output_attentions:
# this operation is a bit akward, but it's required to
# this operation is a bit awkward, but it's required to
# make sure that attn_weights keeps its gradient.
# In order to do so, attn_weights have to reshaped
# In order to do so, attn_weights have to be reshaped
# twice and have to be reused in the following
attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len)
Expand Down
Loading