From 3c50138e5dbcf52e825a737b60452babaaf894bd Mon Sep 17 00:00:00 2001 From: fabiocapsouza Date: Sun, 15 Nov 2020 12:30:46 -0300 Subject: [PATCH] Revert "Doc fixes in preparation for the docstyle PR (#8061)" This reverts commit 04d6f09aeb1d9d0a7a7b588dac4ca84382092c8e. --- docs/source/main_classes/processors.rst | 4 +- docs/source/model_doc/bertgeneration.rst | 2 +- docs/source/model_doc/blenderbot.rst | 9 +- docs/source/model_doc/gpt.rst | 2 - src/transformers/benchmark/benchmark_utils.py | 4 +- src/transformers/commands/convert.py | 8 +- src/transformers/commands/serving.py | 8 +- src/transformers/commands/train.py | 8 +- src/transformers/commands/user.py | 7 +- ..._bert_pytorch_checkpoint_to_original_tf.py | 22 ++-- .../convert_marian_tatoeba_to_pytorch.py | 10 +- src/transformers/data/data_collator.py | 9 +- src/transformers/file_utils.py | 8 -- src/transformers/generation_tf_utils.py | 2 +- src/transformers/modeling_gpt2.py | 6 +- src/transformers/modeling_longformer.py | 12 +- src/transformers/modeling_openai.py | 6 +- src/transformers/modeling_roberta.py | 12 +- src/transformers/modeling_tf_gpt2.py | 2 +- src/transformers/modeling_tf_longformer.py | 14 +-- src/transformers/modeling_tf_openai.py | 2 +- src/transformers/modeling_tf_roberta.py | 14 +-- src/transformers/modeling_tf_xlnet.py | 117 ++++++++++++++++++ src/transformers/modeling_xlnet.py | 2 +- src/transformers/testing_utils.py | 72 +++++------ src/transformers/tokenization_bertweet.py | 39 +++--- src/transformers/tokenization_deberta.py | 15 +-- 27 files changed, 237 insertions(+), 179 deletions(-) diff --git a/docs/source/main_classes/processors.rst b/docs/source/main_classes/processors.rst index 6769d0717ff41c..9167a43ef36245 100644 --- a/docs/source/main_classes/processors.rst +++ b/docs/source/main_classes/processors.rst @@ -112,7 +112,7 @@ Example usage ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Here is an example using the processors as well as the conversion method using data files: -.. code-block:: +Example:: # Loading a V2 processor processor = SquadV2Processor() @@ -133,7 +133,7 @@ Here is an example using the processors as well as the conversion method using d Using `tensorflow_datasets` is as easy as using a data file: -.. code-block:: +Example:: # tensorflow_datasets only handle Squad V1. tfds_examples = tfds.load("squad") diff --git a/docs/source/model_doc/bertgeneration.rst b/docs/source/model_doc/bertgeneration.rst index f72e0924f44b1c..ee2591e1b6d39f 100644 --- a/docs/source/model_doc/bertgeneration.rst +++ b/docs/source/model_doc/bertgeneration.rst @@ -47,7 +47,7 @@ Usage: - Pretrained :class:`~transformers.EncoderDecoderModel` are also directly available in the model hub, e.g., -.. code-block:: +:: code-block # instantiate sentence fusion model sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse") diff --git a/docs/source/model_doc/blenderbot.rst b/docs/source/model_doc/blenderbot.rst index a1e08c7e284ef0..94988443f04dd0 100644 --- a/docs/source/model_doc/blenderbot.rst +++ b/docs/source/model_doc/blenderbot.rst @@ -28,9 +28,7 @@ Implementation Notes Usage ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Here is an example of model usage: - -.. code-block:: +Model Usage: >>> from transformers import BlenderbotSmallTokenizer, BlenderbotForConditionalGeneration >>> mname = 'facebook/blenderbot-90M' @@ -42,10 +40,7 @@ Here is an example of model usage: >>> print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in reply_ids]) -Here is how you can check out config values: - -.. code-block:: - +See Config Values: >>> from transformers import BlenderbotConfig >>> config_90 = BlenderbotConfig.from_pretrained("facebook/blenderbot-90M") diff --git a/docs/source/model_doc/gpt.rst b/docs/source/model_doc/gpt.rst index 40d6778850dcb9..5f945227a4b7f0 100644 --- a/docs/source/model_doc/gpt.rst +++ b/docs/source/model_doc/gpt.rst @@ -45,8 +45,6 @@ Note: If you want to reproduce the original tokenization process of the `OpenAI GPT` paper, you will need to install ``ftfy`` and ``SpaCy``:: -.. code-block:: bash - pip install spacy ftfy==4.4.3 python -m spacy download en diff --git a/src/transformers/benchmark/benchmark_utils.py b/src/transformers/benchmark/benchmark_utils.py index 438b273bd4683d..f0ff2daa78f33e 100644 --- a/src/transformers/benchmark/benchmark_utils.py +++ b/src/transformers/benchmark/benchmark_utils.py @@ -1,7 +1,7 @@ -# This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp -# Copyright by the AllenNLP authors. """ Utilities for working with the local dataset cache. +This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp +Copyright by the AllenNLP authors. """ import copy diff --git a/src/transformers/commands/convert.py b/src/transformers/commands/convert.py index 3bc92a8649590e..4238f078d85a1a 100644 --- a/src/transformers/commands/convert.py +++ b/src/transformers/commands/convert.py @@ -8,8 +8,7 @@ def convert_command_factory(args: Namespace): """ Factory function used to convert a model TF 1.0 checkpoint in a PyTorch checkpoint. - - Returns: ServeCommand + :return: ServeCommand """ return ConvertCommand( args.model_type, args.tf_checkpoint, args.pytorch_dump_output, args.config, args.finetuning_task_name @@ -27,9 +26,8 @@ class ConvertCommand(BaseTransformersCLICommand): def register_subcommand(parser: ArgumentParser): """ Register this command to argparse so it's available for the transformer-cli - - Args: - parser: Root parser to register command-specific arguments + :param parser: Root parser to register command-specific arguments + :return: """ train_parser = parser.add_parser( "convert", diff --git a/src/transformers/commands/serving.py b/src/transformers/commands/serving.py index 7199aee9160eb7..d505efc961728b 100644 --- a/src/transformers/commands/serving.py +++ b/src/transformers/commands/serving.py @@ -31,8 +31,7 @@ def Body(*x, **y): def serve_command_factory(args: Namespace): """ Factory function used to instantiate serving server from provided command line arguments. - - Returns: ServeCommand + :return: ServeCommand """ nlp = pipeline( task=args.task, @@ -82,9 +81,8 @@ class ServeCommand(BaseTransformersCLICommand): def register_subcommand(parser: ArgumentParser): """ Register this command to argparse so it's available for the transformer-cli - - Args: - parser: Root parser to register command-specific arguments + :param parser: Root parser to register command-specific arguments + :return: """ serve_parser = parser.add_parser( "serve", help="CLI tool to run inference requests through REST and GraphQL endpoints." diff --git a/src/transformers/commands/train.py b/src/transformers/commands/train.py index fa5b3f857a58f3..92299b4d8de396 100644 --- a/src/transformers/commands/train.py +++ b/src/transformers/commands/train.py @@ -19,8 +19,7 @@ def train_command_factory(args: Namespace): """ Factory function used to instantiate training command from provided command line arguments. - - Returns: TrainCommand + :return: TrainCommand """ return TrainCommand(args) @@ -30,9 +29,8 @@ class TrainCommand(BaseTransformersCLICommand): def register_subcommand(parser: ArgumentParser): """ Register this command to argparse so it's available for the transformer-cli - - Args: - parser: Root parser to register command-specific arguments + :param parser: Root parser to register command-specific arguments + :return: """ train_parser = parser.add_parser("train", help="CLI tool to train a model on a task.") diff --git a/src/transformers/commands/user.py b/src/transformers/commands/user.py index fa4f6dafd87dc8..820d2c1510c51e 100644 --- a/src/transformers/commands/user.py +++ b/src/transformers/commands/user.py @@ -70,7 +70,7 @@ def __init__(self, args): class LoginCommand(BaseUserCommand): def run(self): - print( # docstyle-ignore + print( """ _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| @@ -127,9 +127,8 @@ class ListObjsCommand(BaseUserCommand): def tabulate(self, rows: List[List[Union[str, int]]], headers: List[str]) -> str: """ Inspired by: - - - stackoverflow.com/a/8356620/593036 - - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data + stackoverflow.com/a/8356620/593036 + stackoverflow.com/questions/9535954/printing-lists-as-tabular-data """ col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)] row_format = ("{{:{}}} " * len(headers)).format(*col_widths) diff --git a/src/transformers/convert_bert_pytorch_checkpoint_to_original_tf.py b/src/transformers/convert_bert_pytorch_checkpoint_to_original_tf.py index d9b0926f4c918a..c451521a461b67 100644 --- a/src/transformers/convert_bert_pytorch_checkpoint_to_original_tf.py +++ b/src/transformers/convert_bert_pytorch_checkpoint_to_original_tf.py @@ -28,19 +28,19 @@ def convert_pytorch_checkpoint_to_tf(model: BertModel, ckpt_dir: str, model_name: str): """ - Args - model: BertModel Pytorch model instance to be converted - ckpt_dir: Tensorflow model directory - model_name: model name + :param model:BertModel Pytorch model instance to be converted + :param ckpt_dir: Tensorflow model directory + :param model_name: model name + :return: Currently supported HF models: - - Y BertModel - - N BertForMaskedLM - - N BertForPreTraining - - N BertForMultipleChoice - - N BertForNextSentencePrediction - - N BertForSequenceClassification - - N BertForQuestionAnswering + Y BertModel + N BertForMaskedLM + N BertForPreTraining + N BertForMultipleChoice + N BertForNextSentencePrediction + N BertForSequenceClassification + N BertForQuestionAnswering """ tensors_to_transpose = ("dense.weight", "attention.self.query", "attention.self.key", "attention.self.value") diff --git a/src/transformers/convert_marian_tatoeba_to_pytorch.py b/src/transformers/convert_marian_tatoeba_to_pytorch.py index 1e13f2a9a05e6b..88557e94add7a1 100644 --- a/src/transformers/convert_marian_tatoeba_to_pytorch.py +++ b/src/transformers/convert_marian_tatoeba_to_pytorch.py @@ -28,13 +28,11 @@ class TatoebaConverter: """Convert Tatoeba-Challenge models to huggingface format. - Steps: - - 1. convert numpy state dict to hf format (same code as OPUS-MT-Train conversion). - 2. rename opus model to huggingface format. This means replace each alpha3 code with an alpha2 code if a unique one existes. - e.g. aav-eng -> aav-en, heb-eng -> he-en - 3. write a model card containing the original Tatoeba-Challenge/README.md and extra info about alpha3 group members. + (1) convert numpy state dict to hf format (same code as OPUS-MT-Train conversion). + (2) rename opus model to huggingface format. This means replace each alpha3 code with an alpha2 code if a unique one existes. + e.g. aav-eng -> aav-en, heb-eng -> he-en + (3) write a model card containing the original Tatoeba-Challenge/README.md and extra info about alpha3 group members. """ def __init__(self, save_dir="marian_converted"): diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py index 6193b09acd1cde..d05061a7c02007 100644 --- a/src/transformers/data/data_collator.py +++ b/src/transformers/data/data_collator.py @@ -19,12 +19,14 @@ def default_data_collator(features: List[InputDataClass]) -> Dict[str, torch.Tensor]: """ - Very simple data collator that simply collates batches of dict-like objects and erforms special handling for potential keys named: - + Very simple data collator that: + - simply collates batches of dict-like objects + - Performs special handling for potential keys named: - ``label``: handles a single value (int or float) per object - ``label_ids``: handles a list of values per object + - does not do any additional preprocessing - Des not do any additional preprocessing: property names of the input object will be used as corresponding inputs to the model. + i.e., Property names of the input object will be used as corresponding inputs to the model. See glue and ner for example of how it's useful. """ @@ -423,7 +425,6 @@ def _tensorize_batch( def mask_tokens(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """ The masked tokens to be predicted for a particular sequence are determined by the following algorithm: - 0. Start from the beginning of the sequence by setting ``cur_len = 0`` (number of tokens processed so far). 1. Sample a ``span_length`` from the interval ``[1, max_span_length]`` (length of span of tokens to be masked) 2. Reserve a context of length ``context_length = span_length / plm_probability`` to surround span to be masked diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index cef794b5914829..642b6506b1de8a 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -289,7 +289,6 @@ def wrapper(*args, **kwargs): return wrapper -# docstyle-ignore DATASETS_IMPORT_ERROR = """ {0} requires the 🤗 Datasets library but it was not found in your enviromnent. You can install it with: ``` @@ -307,7 +306,6 @@ def wrapper(*args, **kwargs): """ -# docstyle-ignore TOKENIZERS_IMPORT_ERROR = """ {0} requires the 🤗 Tokenizers library but it was not found in your enviromnent. You can install it with: ``` @@ -320,7 +318,6 @@ def wrapper(*args, **kwargs): """ -# docstyle-ignore SENTENCEPIECE_IMPORT_ERROR = """ {0} requires the SentencePiece library but it was not found in your enviromnent. Checkout the instructions on the installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones @@ -328,7 +325,6 @@ def wrapper(*args, **kwargs): """ -# docstyle-ignore FAISS_IMPORT_ERROR = """ {0} requires the faiss library but it was not found in your enviromnent. Checkout the instructions on the installation page of its repo: https://github.com/facebookresearch/faiss/blob/master/INSTALL.md and follow the ones @@ -336,14 +332,12 @@ def wrapper(*args, **kwargs): """ -# docstyle-ignore PYTORCH_IMPORT_ERROR = """ {0} requires the PyTorch library but it was not found in your enviromnent. Checkout the instructions on the installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your enviromnent. """ -# docstyle-ignore SKLEARN_IMPORT_ERROR = """ {0} requires the scikit-learn library but it was not found in your enviromnent. You can install it with: ``` @@ -356,14 +350,12 @@ def wrapper(*args, **kwargs): """ -# docstyle-ignore TENSORFLOW_IMPORT_ERROR = """ {0} requires the TensorFlow library but it was not found in your enviromnent. Checkout the instructions on the installation page: https://www.tensorflow.org/install and follow the ones that match your enviromnent. """ -# docstyle-ignore FLAX_IMPORT_ERROR = """ {0} requires the FLAX library but it was not found in your enviromnent. Checkout the instructions on the installation page: https://github.com/google/flax and follow the ones that match your enviromnent. diff --git a/src/transformers/generation_tf_utils.py b/src/transformers/generation_tf_utils.py index dcf2f74cbbca48..6d2e056b667b9a 100644 --- a/src/transformers/generation_tf_utils.py +++ b/src/transformers/generation_tf_utils.py @@ -917,7 +917,7 @@ def _create_next_token_logits_penalties(input_ids, logits, repetition_penalty): def calc_banned_ngram_tokens(prev_input_ids, num_hypos, no_repeat_ngram_size, cur_len): - # Copied from fairseq for no_repeat_ngram in beam_search + # Copied from fairseq for no_repeat_ngram in beam_search""" if cur_len + 1 < no_repeat_ngram_size: # return no banned tokens if we haven't generated no_repeat_ngram_size tokens yet return [[] for _ in range(num_hypos)] diff --git a/src/transformers/modeling_gpt2.py b/src/transformers/modeling_gpt2.py index cc2d8ff2d06c90..5e9f7032ba355d 100644 --- a/src/transformers/modeling_gpt2.py +++ b/src/transformers/modeling_gpt2.py @@ -857,16 +857,16 @@ def forward( **kwargs, ): r""" - mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input): + mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input) Index of the classification token in each input sequence. Selected in the range ``[0, input_ids.size(-1) - 1[``. - labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`) Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set ``labels = input_ids`` Indices are selected in ``[-1, 0, ..., config.vocab_size]`` All labels set to ``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]`` - mc_labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size)`, `optional`): + mc_labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size)`, `optional`) Labels for computing the multiple choice classification loss. Indices should be in ``[0, ..., num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see `input_ids` above) diff --git a/src/transformers/modeling_longformer.py b/src/transformers/modeling_longformer.py index 464e79b06836c3..5fd5469a202f52 100755 --- a/src/transformers/modeling_longformer.py +++ b/src/transformers/modeling_longformer.py @@ -105,10 +105,8 @@ def create_position_ids_from_input_ids(input_ids, padding_idx): padding_idx+1. Padding symbols are ignored. This is modified from fairseq's `utils.make_positions`. - Args: - x: torch.Tensor x: - - Returns: torch.Tensor + :param torch.Tensor x: + :return torch.Tensor: """ # The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA. mask = input_ids.ne(padding_idx).int() @@ -178,10 +176,8 @@ def create_position_ids_from_inputs_embeds(self, inputs_embeds): """We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids. - Args: - inputs_embeds: torch.Tensor inputs_embeds: - - Returns: torch.Tensor + :param torch.Tensor inputs_embeds: + :return torch.Tensor: """ input_shape = inputs_embeds.size()[:-1] sequence_length = input_shape[1] diff --git a/src/transformers/modeling_openai.py b/src/transformers/modeling_openai.py index cab63bc01e06c7..4f449c67a4806c 100644 --- a/src/transformers/modeling_openai.py +++ b/src/transformers/modeling_openai.py @@ -647,16 +647,16 @@ def forward( **kwargs ): r""" - mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input): + mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input) Index of the classification token in each input sequence. Selected in the range ``[0, input_ids.size(-1) - 1]``. - labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`) Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set ``labels = input_ids`` Indices are selected in ``[-1, 0, ..., config.vocab_size]`` All labels set to ``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]`` - mc_labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size)`, `optional`): + mc_labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size)`, `optional`) Labels for computing the multiple choice classification loss. Indices should be in ``[0, ..., num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see `input_ids` above) diff --git a/src/transformers/modeling_roberta.py b/src/transformers/modeling_roberta.py index 0bffcd00a9fd9f..3a39067070ae74 100644 --- a/src/transformers/modeling_roberta.py +++ b/src/transformers/modeling_roberta.py @@ -127,10 +127,8 @@ def create_position_ids_from_inputs_embeds(self, inputs_embeds): """We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids. - Args: - inputs_embeds: torch.Tensor - - Returns: torch.Tensor + :param torch.Tensor inputs_embeds: + :return torch.Tensor: """ input_shape = inputs_embeds.size()[:-1] sequence_length = input_shape[1] @@ -1328,10 +1326,8 @@ def create_position_ids_from_input_ids(input_ids, padding_idx): padding_idx+1. Padding symbols are ignored. This is modified from fairseq's `utils.make_positions`. - Args: - x: torch.Tensor x: - - Returns: torch.Tensor + :param torch.Tensor x: + :return torch.Tensor: """ # The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA. mask = input_ids.ne(padding_idx).int() diff --git a/src/transformers/modeling_tf_gpt2.py b/src/transformers/modeling_tf_gpt2.py index 98537f6d864291..2f6a602aba2f28 100644 --- a/src/transformers/modeling_tf_gpt2.py +++ b/src/transformers/modeling_tf_gpt2.py @@ -704,7 +704,7 @@ def call( training=False, ): r""" - mc_token_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input): + mc_token_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input) Index of the classification token in each input sequence. Selected in the range ``[0, input_ids.size(-1) - 1[``. diff --git a/src/transformers/modeling_tf_longformer.py b/src/transformers/modeling_tf_longformer.py index 8766aef4389b40..9757a0add858de 100644 --- a/src/transformers/modeling_tf_longformer.py +++ b/src/transformers/modeling_tf_longformer.py @@ -166,11 +166,8 @@ def create_position_ids_from_input_ids(self, x): """Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols are ignored. This is modified from fairseq's `utils.make_positions`. - - Args: - x: tf.Tensor - - Returns: tf.Tensor + :param tf.Tensor x: + :return tf.Tensor: """ mask = tf.cast(tf.math.not_equal(x, self.padding_idx), dtype=tf.int32) incremental_indicies = tf.math.cumsum(mask, axis=1) * mask @@ -180,11 +177,8 @@ def create_position_ids_from_input_ids(self, x): def create_position_ids_from_inputs_embeds(self, inputs_embeds): """We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids. - - Args: - inputs_embeds: tf.Tensor - - Returns: tf.Tensor + :param tf.Tensor inputs_embeds: + :return tf.Tensor: """ seq_length = shape_list(inputs_embeds)[1] position_ids = tf.range(self.padding_idx + 1, seq_length + self.padding_idx + 1, dtype=tf.int32)[tf.newaxis, :] diff --git a/src/transformers/modeling_tf_openai.py b/src/transformers/modeling_tf_openai.py index c11623f21e91bd..16a4cd080eb5ef 100644 --- a/src/transformers/modeling_tf_openai.py +++ b/src/transformers/modeling_tf_openai.py @@ -625,7 +625,7 @@ def call( training=False, ): r""" - mc_token_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input): + mc_token_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input) Index of the classification token in each input sequence. Selected in the range ``[0, input_ids.size(-1) - 1]``. diff --git a/src/transformers/modeling_tf_roberta.py b/src/transformers/modeling_tf_roberta.py index f8a9a151c7f92a..a06163cdad8f12 100644 --- a/src/transformers/modeling_tf_roberta.py +++ b/src/transformers/modeling_tf_roberta.py @@ -111,11 +111,8 @@ def create_position_ids_from_input_ids(self, x): """Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols are ignored. This is modified from fairseq's `utils.make_positions`. - - Args: - x: tf.Tensor - - Returns: tf.Tensor + :param tf.Tensor x: + :return tf.Tensor: """ mask = tf.cast(tf.math.not_equal(x, self.padding_idx), dtype=tf.int32) incremental_indicies = tf.math.cumsum(mask, axis=1) * mask @@ -125,11 +122,8 @@ def create_position_ids_from_input_ids(self, x): def create_position_ids_from_inputs_embeds(self, inputs_embeds): """We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids. - - Args: - inputs_embeds: tf.Tensor - - Returns: tf.Tensor + :param tf.Tensor inputs_embeds: + :return tf.Tensor: """ seq_length = shape_list(inputs_embeds)[1] position_ids = tf.range(self.padding_idx + 1, seq_length + self.padding_idx + 1, dtype=tf.int32)[tf.newaxis, :] diff --git a/src/transformers/modeling_tf_xlnet.py b/src/transformers/modeling_tf_xlnet.py index a62ee420e0dce2..b20e46599f90b1 100644 --- a/src/transformers/modeling_tf_xlnet.py +++ b/src/transformers/modeling_tf_xlnet.py @@ -1718,3 +1718,120 @@ def call( hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) + + +# @add_start_docstrings("""XLNet Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of +# the hidden-states output to compute `span start logits` and `span end logits`). """, +# XLNET_START_DOCSTRING, XLNET_INPUTS_DOCSTRING) +# class TFXLNetForQuestionAnswering(TFXLNetPreTrainedModel): +# r""" +# Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs: +# **start_top_log_probs**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided) +# ``tf.Tensor`` of shape ``(batch_size, config.start_n_top)`` +# Log probabilities for the top config.start_n_top start token possibilities (beam-search). +# **start_top_index**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided) +# ``tf.Tensor`` of shape ``(batch_size, config.start_n_top)`` +# Indices for the top config.start_n_top start token possibilities (beam-search). +# **end_top_log_probs**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided) +# ``tf.Tensor`` of shape ``(batch_size, config.start_n_top * config.end_n_top)`` +# Log probabilities for the top ``config.start_n_top * config.end_n_top`` end token possibilities (beam-search). +# **end_top_index**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided) +# ``tf.Tensor`` of shape ``(batch_size, config.start_n_top * config.end_n_top)`` +# Indices for the top ``config.start_n_top * config.end_n_top`` end token possibilities (beam-search). +# **cls_logits**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided) +# ``tf.Tensor`` of shape ``(batch_size,)`` +# Log probabilities for the ``is_impossible`` label of the answers. +# **mems**: +# list of ``tf.Tensor`` (one for each layer): +# that contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model +# if config.mem_len > 0 else tuple of None. Can be used to speed up sequential decoding and attend to longer context. +# See details in the docstring of the `mems` input above. +# **hidden_states**: (`optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``) +# list of ``tf.Tensor`` (one for the output of each layer + the output of the embeddings) +# of shape ``(batch_size, sequence_length, hidden_size)``: +# Hidden-states of the model at the output of each layer plus the initial embedding outputs. +# **attentions**: (`optional`, returned when ``output_attentions=True``) +# list of ``tf.Tensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``: +# Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. + +# Examples:: + +# # For example purposes. Not runnable. +# tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') +# model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased') +# input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1 +# start_positions = tf.constant([1]) +# end_positions = tf.constant([3]) +# outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) +# loss, start_scores, end_scores = outputs[:2] + +# """ +# def __init__(self, config, *inputs, **kwargs): +# super().__init__(config, *inputs, **kwargs) +# self.start_n_top = config.start_n_top +# self.end_n_top = config.end_n_top + +# self.transformer = TFXLNetMainLayer(config, name='transformer') +# self.start_logits = TFPoolerStartLogits(config, name='start_logits') +# self.end_logits = TFPoolerEndLogits(config, name='end_logits') +# self.answer_class = TFPoolerAnswerClass(config, name='answer_class') + +# def call(self, inputs, training=False): +# transformer_outputs = self.transformer(inputs, training=training) +# hidden_states = transformer_outputs[0] +# start_logits = self.start_logits(hidden_states, p_mask=p_mask) + +# outputs = transformer_outputs[1:] # Keep mems, hidden states, attentions if there are in it + +# if start_positions is not None and end_positions is not None: +# # If we are on multi-GPU, let's remove the dimension added by batch splitting +# for x in (start_positions, end_positions, cls_index, is_impossible): +# if x is not None and x.dim() > 1: +# x.squeeze_(-1) + +# # during training, compute the end logits based on the ground truth of the start position +# end_logits = self.end_logits(hidden_states, start_positions=start_positions, p_mask=p_mask) + +# loss_fct = CrossEntropyLoss() +# start_loss = loss_fct(start_logits, start_positions) +# end_loss = loss_fct(end_logits, end_positions) +# total_loss = (start_loss + end_loss) / 2 + +# if cls_index is not None and is_impossible is not None: +# # Predict answerability from the representation of CLS and START +# cls_logits = self.answer_class(hidden_states, start_positions=start_positions, cls_index=cls_index) +# loss_fct_cls = nn.BCEWithLogitsLoss() +# cls_loss = loss_fct_cls(cls_logits, is_impossible) + +# # note(zhiliny): by default multiply the loss by 0.5 so that the scale is comparable to start_loss and end_loss +# total_loss += cls_loss * 0.5 + +# outputs = (total_loss,) + outputs + +# else: +# # during inference, compute the end logits based on beam search +# bsz, slen, hsz = hidden_states.size() +# start_log_probs = F.softmax(start_logits, dim=-1) # shape (bsz, slen) + +# start_top_log_probs, start_top_index = torch.topk(start_log_probs, self.start_n_top, dim=-1) # shape (bsz, start_n_top) +# start_top_index_exp = start_top_index.unsqueeze(-1).expand(-1, -1, hsz) # shape (bsz, start_n_top, hsz) +# start_states = torch.gather(hidden_states, -2, start_top_index_exp) # shape (bsz, start_n_top, hsz) +# start_states = start_states.unsqueeze(1).expand(-1, slen, -1, -1) # shape (bsz, slen, start_n_top, hsz) + +# hidden_states_expanded = hidden_states.unsqueeze(2).expand_as(start_states) # shape (bsz, slen, start_n_top, hsz) +# p_mask = p_mask.unsqueeze(-1) if p_mask is not None else None +# end_logits = self.end_logits(hidden_states_expanded, start_states=start_states, p_mask=p_mask) +# end_log_probs = F.softmax(end_logits, dim=1) # shape (bsz, slen, start_n_top) + +# end_top_log_probs, end_top_index = torch.topk(end_log_probs, self.end_n_top, dim=1) # shape (bsz, end_n_top, start_n_top) +# end_top_log_probs = end_top_log_probs.view(-1, self.start_n_top * self.end_n_top) +# end_top_index = end_top_index.view(-1, self.start_n_top * self.end_n_top) + +# start_states = torch.einsum("blh,bl->bh", hidden_states, start_log_probs) # get the representation of START as weighted sum of hidden states +# cls_logits = self.answer_class(hidden_states, start_states=start_states, cls_index=cls_index) # Shape (batch size,): one single `cls_logits` for each sample + +# outputs = (start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits) + outputs + +# # return start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits +# # or (if labels are provided) (total_loss,) +# return outputs diff --git a/src/transformers/modeling_xlnet.py b/src/transformers/modeling_xlnet.py index a8bdf89c080d9d..e226d747d7b3f9 100755 --- a/src/transformers/modeling_xlnet.py +++ b/src/transformers/modeling_xlnet.py @@ -1487,7 +1487,7 @@ def forward( return_dict=None, ): r""" - labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`): + labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`) Labels for computing the sequence classification/regression loss. Indices should be in ``[0, ..., config.num_labels - 1]``. If ``config.num_labels == 1`` a regression loss is computed (Mean-Square loss), diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index c561107fd27973..81b0c5a879e62b 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -352,22 +352,22 @@ class CaptureStd: - out - capture stdout: True/False, default True - err - capture stdout: True/False, default True - Examples:: + Examples: - with CaptureStdout() as cs: - print("Secret message") - print(f"captured: {cs.out}") + with CaptureStdout() as cs: + print("Secret message") + print(f"captured: {cs.out}") - import sys - with CaptureStderr() as cs: - print("Warning: ", file=sys.stderr) - print(f"captured: {cs.err}") + import sys + with CaptureStderr() as cs: + print("Warning: ", file=sys.stderr) + print(f"captured: {cs.err}") - # to capture just one of the streams, but not the other - with CaptureStd(err=False) as cs: - print("Secret message") - print(f"captured: {cs.out}") - # but best use the stream-specific subclasses + # to capture just one of the streams, but not the other + with CaptureStd(err=False) as cs: + print("Secret message") + print(f"captured: {cs.out}") + # but best use the stream-specific subclasses """ @@ -444,17 +444,17 @@ class CaptureLogger: Results: The captured output is available via `self.out` - Example:: + Example: - >>> from transformers import logging - >>> from transformers.testing_utils import CaptureLogger + >>> from transformers import logging + >>> from transformers.testing_utils import CaptureLogger - >>> msg = "Testing 1, 2, 3" - >>> logging.set_verbosity_info() - >>> logger = logging.get_logger("transformers.tokenization_bart") - >>> with CaptureLogger(logger) as cl: - ... logger.info(msg) - >>> assert cl.out, msg+"\n" + >>> msg = "Testing 1, 2, 3" + >>> logging.set_verbosity_info() + >>> logger = logging.get_logger("transformers.tokenization_bart") + >>> with CaptureLogger(logger) as cl: + ... logger.info(msg) + >>> assert cl.out, msg+"\n" """ def __init__(self, logger): @@ -485,36 +485,24 @@ class TestCasePlus(unittest.TestCase): of test, unless `after=False`. # 1. create a unique temp dir, `tmp_dir` will contain the path to the created temp dir - - :: - - def test_whatever(self): - tmp_dir = self.get_auto_remove_tmp_dir() + def test_whatever(self): + tmp_dir = self.get_auto_remove_tmp_dir() # 2. create a temp dir of my choice and delete it at the end - useful for debug when you want to # monitor a specific directory - - :: - - def test_whatever(self): - tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test") + def test_whatever(self): + tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test") # 3. create a temp dir of my choice and do not delete it at the end - useful for when you want # to look at the temp results - - :: - - def test_whatever(self): - tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test", after=False) + def test_whatever(self): + tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test", after=False) # 4. create a temp dir of my choice and ensure to delete it right away - useful for when you # disabled deletion in the previous test run and want to make sure the that tmp dir is empty # before the new test is run - - :: - - def test_whatever(self): - tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test", before=True) + def test_whatever(self): + tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test", before=True) Note 1: In order to run the equivalent of `rm -r` safely, only subdirs of the project repository checkout are allowed if an explicit `tmp_dir` is used, so diff --git a/src/transformers/tokenization_bertweet.py b/src/transformers/tokenization_bertweet.py index 66f3adcefa86be..1d28289f1b5550 100644 --- a/src/transformers/tokenization_bertweet.py +++ b/src/transformers/tokenization_bertweet.py @@ -488,7 +488,6 @@ def add_from_file(self, f): # This particular element is used in a couple ways, so we define it # with a name: -# docstyle-ignore EMOTICONS = r""" (?: [<>]? @@ -506,7 +505,7 @@ def add_from_file(self, f): # URL pattern due to John Gruber, modified by Tom Winzig. See # https://gist.github.com/winzig/8894715 -# docstyle-ignore + URLS = r""" # Capture 1: entire matched URL (?: https?: # URL protocol and colon @@ -550,7 +549,6 @@ def add_from_file(self, f): ) """ -# docstyle-ignore # The components of the tokenizer: REGEXPS = ( URLS, @@ -630,16 +628,18 @@ def _replace_html_entities(text, keep=(), remove_illegal=True, encoding="utf-8") Remove entities from text by converting them to their corresponding unicode character. - Args: - text: - A unicode string or a byte string encoded in the given `encoding` (which defaults to 'utf-8'). - keep (list): - List of entity names which should not be replaced. This supports both numeric entities (``&#nnnn;`` and ``&#hhhh;``) - and named entities (such as `` `` or ``>``). - remove_illegal (bool): - If `True`, entities that can't be converted are removed. Otherwise, entities that can't be converted are kept "as is". + :param text: a unicode string or a byte string encoded in the given + `encoding` (which defaults to 'utf-8'). + + :param list keep: list of entity names which should not be replaced.\ + This supports both numeric entities (``&#nnnn;`` and ``&#hhhh;``) + and named entities (such as `` `` or ``>``). + + :param bool remove_illegal: If `True`, entities that can't be converted are\ + removed. Otherwise, entities that can't be converted are kept "as + is". - Returns: A unicode string with the entities removed. + :returns: A unicode string with the entities removed. See https://github.com/scrapy/w3lib/blob/master/w3lib/html.py @@ -688,16 +688,16 @@ def _convert_entity(match): class TweetTokenizer: r""" - Examples:: + Tokenizer for tweets. - >>> # Tokenizer for tweets. >>> from nltk.tokenize import TweetTokenizer >>> tknzr = TweetTokenizer() >>> s0 = "This is a cooool #dummysmiley: :-) :-P <3 and some arrows < > -> <--" >>> tknzr.tokenize(s0) ['This', 'is', 'a', 'cooool', '#dummysmiley', ':', ':-)', ':-P', '<3', 'and', 'some', 'arrows', '<', '>', '->', '<--'] - >>> # Examples using `strip_handles` and `reduce_len parameters`: + Examples using `strip_handles` and `reduce_len parameters`: + >>> tknzr = TweetTokenizer(strip_handles=True, reduce_len=True) >>> s1 = '@remy: This is waaaaayyyy too much for you!!!!!!' >>> tknzr.tokenize(s1) @@ -711,11 +711,10 @@ def __init__(self, preserve_case=True, reduce_len=False, strip_handles=False): def tokenize(self, text): """ - Args: - text: str - - Returns: list(str) - A tokenized list of strings; concatenating this list returns the original string if `preserve_case=False` + :param text: str + :rtype: list(str) + :return: a tokenized list of strings; concatenating this list returns\ + the original string if `preserve_case=False` """ # Fix HTML character entities: text = _replace_html_entities(text) diff --git a/src/transformers/tokenization_deberta.py b/src/transformers/tokenization_deberta.py index 015cbba33305fd..d4d29bc0ccfbe3 100644 --- a/src/transformers/tokenization_deberta.py +++ b/src/transformers/tokenization_deberta.py @@ -628,16 +628,13 @@ def get_special_tokens_mask(self, token_ids_0, token_ids_1=None, already_has_spe def create_token_type_ids_from_sequences(self, token_ids_0, token_ids_1=None): """ - Create a mask from the two sequences passed to be used in a sequence-pair classification task. - A DeBERTa sequence pair mask has the following format: - - :: - - 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 - | first sequence | second sequence | - - If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s). + Creates a mask from the two sequences passed to be used in a sequence-pair classification task. + A BERT sequence pair mask has the following format: + 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 + | first sequence | second sequence + if token_ids_1 is None, only returns the first portion of the mask (0's). + ~ Args: token_ids_0 (:obj:`List[int]`): List of IDs.