From 5944635b1d192956a9b8224fe57466ca626ee3c4 Mon Sep 17 00:00:00 2001 From: Sachin Prasad Date: Wed, 20 Mar 2024 11:35:43 -0700 Subject: [PATCH] Standardize docstring (#1516) --- STYLE_GUIDE.md | 2 +- keras_nlp/layers/modeling/alibi_bias.py | 2 +- keras_nlp/layers/modeling/f_net_encoder.py | 2 +- keras_nlp/layers/modeling/masked_lm_head.py | 2 +- keras_nlp/layers/modeling/position_embedding.py | 2 +- keras_nlp/layers/modeling/reversible_embedding.py | 2 +- keras_nlp/layers/modeling/sine_position_encoding.py | 2 +- keras_nlp/layers/modeling/token_and_position_embedding.py | 2 +- keras_nlp/layers/modeling/transformer_decoder.py | 2 +- keras_nlp/layers/modeling/transformer_encoder.py | 2 +- keras_nlp/models/albert/albert_backbone.py | 2 +- keras_nlp/models/albert/albert_masked_lm.py | 2 +- keras_nlp/models/bert/bert_masked_lm.py | 2 +- keras_nlp/models/bloom/bloom_backbone.py | 2 +- keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py | 2 +- keras_nlp/models/distil_bert/distil_bert_masked_lm.py | 2 +- keras_nlp/models/electra/electra_backbone.py | 2 +- keras_nlp/models/f_net/f_net_masked_lm.py | 2 +- keras_nlp/models/gemma/gemma_backbone.py | 4 ++-- keras_nlp/models/gemma/gemma_causal_lm.py | 2 +- keras_nlp/models/preprocessor.py | 2 +- keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py | 2 +- keras_nlp/models/xlnet/xlnet_backbone.py | 2 +- keras_nlp/samplers/sampler.py | 2 +- keras_nlp/tokenizers/byte_pair_tokenizer.py | 4 ++-- keras_nlp/tokenizers/sentence_piece_tokenizer.py | 2 +- keras_nlp/tokenizers/tokenizer.py | 2 +- keras_nlp/tokenizers/word_piece_tokenizer.py | 2 +- 28 files changed, 30 insertions(+), 30 deletions(-) diff --git a/STYLE_GUIDE.md b/STYLE_GUIDE.md index 3db287de9..335f7ade9 100644 --- a/STYLE_GUIDE.md +++ b/STYLE_GUIDE.md @@ -116,7 +116,7 @@ class PositionEmbedding(keras.layers.Layer): Args: sequence_length: The maximum length of the dynamic sequence. - Examples: + Example: Direct call. >>> layer = keras_nlp.layers.PositionEmbedding(sequence_length=10) diff --git a/keras_nlp/layers/modeling/alibi_bias.py b/keras_nlp/layers/modeling/alibi_bias.py index cc72be3f8..c5f8706f9 100644 --- a/keras_nlp/layers/modeling/alibi_bias.py +++ b/keras_nlp/layers/modeling/alibi_bias.py @@ -43,7 +43,7 @@ class AlibiBias(keras.layers.Layer): multi-head attention layer of the transformer to add alibi bias to it. With shape `(batch_size, num_heads, query_length, key_length)`. - Examples: + Example: ```python query_length = 10 key_length = 10 diff --git a/keras_nlp/layers/modeling/f_net_encoder.py b/keras_nlp/layers/modeling/f_net_encoder.py index 0732dee34..919e3beb0 100644 --- a/keras_nlp/layers/modeling/f_net_encoder.py +++ b/keras_nlp/layers/modeling/f_net_encoder.py @@ -50,7 +50,7 @@ class FNetEncoder(keras.layers.Layer): **kwargs: other keyword arguments passed to `keras.layers.Layer`, including `name`, `trainable`, `dtype` etc. - Examples: + Example: ```python # Create a single FNet encoder layer. diff --git a/keras_nlp/layers/modeling/masked_lm_head.py b/keras_nlp/layers/modeling/masked_lm_head.py index d51f0eb50..9f9397cb7 100644 --- a/keras_nlp/layers/modeling/masked_lm_head.py +++ b/keras_nlp/layers/modeling/masked_lm_head.py @@ -62,7 +62,7 @@ class MaskedLMHead(keras.layers.Layer): **kwargs: other keyword arguments passed to `keras.layers.Layer`, including `name`, `trainable`, `dtype` etc. - Examples: + Example: ```python batch_size = 16 diff --git a/keras_nlp/layers/modeling/position_embedding.py b/keras_nlp/layers/modeling/position_embedding.py index 34597cb11..9f6b314b9 100644 --- a/keras_nlp/layers/modeling/position_embedding.py +++ b/keras_nlp/layers/modeling/position_embedding.py @@ -45,7 +45,7 @@ class PositionEmbedding(keras.layers.Layer): compute the position embedding from. This is useful during cached decoding, where each position is predicted separately in a loop. - Examples: + Example: Called directly on input. >>> layer = keras_nlp.layers.PositionEmbedding(sequence_length=10) diff --git a/keras_nlp/layers/modeling/reversible_embedding.py b/keras_nlp/layers/modeling/reversible_embedding.py index 9266b6d28..1fa5f5f90 100644 --- a/keras_nlp/layers/modeling/reversible_embedding.py +++ b/keras_nlp/layers/modeling/reversible_embedding.py @@ -61,7 +61,7 @@ class ReversibleEmbedding(keras.layers.Embedding): from `output_dim` to `input_dim`, instead of a normal embedding call. Default to `False`. - Examples: + Example: ```python batch_size = 16 vocab_size = 100 diff --git a/keras_nlp/layers/modeling/sine_position_encoding.py b/keras_nlp/layers/modeling/sine_position_encoding.py index 5ab874c11..b1cd7fbf4 100644 --- a/keras_nlp/layers/modeling/sine_position_encoding.py +++ b/keras_nlp/layers/modeling/sine_position_encoding.py @@ -44,7 +44,7 @@ class SinePositionEncoding(keras.layers.Layer): compute the encoding from. This is useful during cached decoding, where each position is predicted separately in a loop. - Examples: + Example: ```python # create a simple embedding layer with sinusoidal positional encoding seq_len = 100 diff --git a/keras_nlp/layers/modeling/token_and_position_embedding.py b/keras_nlp/layers/modeling/token_and_position_embedding.py index 6266963bf..8261cc7f3 100644 --- a/keras_nlp/layers/modeling/token_and_position_embedding.py +++ b/keras_nlp/layers/modeling/token_and_position_embedding.py @@ -49,7 +49,7 @@ class TokenAndPositionEmbedding(keras.layers.Layer): **kwargs: other keyword arguments passed to `keras.layers.Layer`, including `name`, `trainable`, `dtype` etc. - Examples: + Example: ```python inputs = np.ones(shape=(1, 50), dtype="int32") embedding_layer = keras_nlp.layers.TokenAndPositionEmbedding( diff --git a/keras_nlp/layers/modeling/transformer_decoder.py b/keras_nlp/layers/modeling/transformer_decoder.py index 0de35da0b..b8f797f2e 100644 --- a/keras_nlp/layers/modeling/transformer_decoder.py +++ b/keras_nlp/layers/modeling/transformer_decoder.py @@ -72,7 +72,7 @@ class TransformerDecoder(keras.layers.Layer): **kwargs: other keyword arguments passed to `keras.layers.Layer`, including `name`, `trainable`, `dtype` etc. - Examples: + Example: ```python # Create a single transformer decoder layer. decoder = keras_nlp.layers.TransformerDecoder( diff --git a/keras_nlp/layers/modeling/transformer_encoder.py b/keras_nlp/layers/modeling/transformer_encoder.py index cd45b6aeb..20cec4ecf 100644 --- a/keras_nlp/layers/modeling/transformer_encoder.py +++ b/keras_nlp/layers/modeling/transformer_encoder.py @@ -61,7 +61,7 @@ class TransformerEncoder(keras.layers.Layer): **kwargs: other keyword arguments passed to `keras.layers.Layer`, including `name`, `trainable`, `dtype` etc. - Examples: + Example: ```python # Create a single transformer encoder layer. diff --git a/keras_nlp/models/albert/albert_backbone.py b/keras_nlp/models/albert/albert_backbone.py index 09053ff89..0cc1d4d02 100644 --- a/keras_nlp/models/albert/albert_backbone.py +++ b/keras_nlp/models/albert/albert_backbone.py @@ -77,7 +77,7 @@ class AlbertBackbone(Backbone): such as softmax and layer normalization, will always be done at float32 precision regardless of dtype. - Examples: + Example: ```python input_data = { "token_ids": np.ones(shape=(1, 12), dtype="int32"), diff --git a/keras_nlp/models/albert/albert_masked_lm.py b/keras_nlp/models/albert/albert_masked_lm.py index 1958713b9..e421ef524 100644 --- a/keras_nlp/models/albert/albert_masked_lm.py +++ b/keras_nlp/models/albert/albert_masked_lm.py @@ -52,7 +52,7 @@ class AlbertMaskedLM(Task): `None`. If `None`, this model will not apply preprocessing, and inputs should be preprocessed before calling the model. - Example usage: + Examples: Raw string data. ```python diff --git a/keras_nlp/models/bert/bert_masked_lm.py b/keras_nlp/models/bert/bert_masked_lm.py index 17b966961..b915a9948 100644 --- a/keras_nlp/models/bert/bert_masked_lm.py +++ b/keras_nlp/models/bert/bert_masked_lm.py @@ -51,7 +51,7 @@ class BertMaskedLM(Task): `None`. If `None`, this model will not apply preprocessing, and inputs should be preprocessed before calling the model. - Example usage: + Examples: Raw string data. ```python diff --git a/keras_nlp/models/bloom/bloom_backbone.py b/keras_nlp/models/bloom/bloom_backbone.py index 9b7c65a39..eb686668d 100644 --- a/keras_nlp/models/bloom/bloom_backbone.py +++ b/keras_nlp/models/bloom/bloom_backbone.py @@ -58,7 +58,7 @@ class BloomBackbone(Backbone): such as softmax and layer normalization, will always be done at float32 precision regardless of dtype. - Examples: + Example: ```python input_data = { "token_ids": np.ones(shape=(1, 12), dtype="int32"), diff --git a/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py b/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py index d050dde6c..a794c3437 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py @@ -55,7 +55,7 @@ class DebertaV3MaskedLM(Task): `None`. If `None`, this model will not apply preprocessing, and inputs should be preprocessed before calling the model. - Example usage: + Examples: Raw string data. ```python diff --git a/keras_nlp/models/distil_bert/distil_bert_masked_lm.py b/keras_nlp/models/distil_bert/distil_bert_masked_lm.py index fcf54e014..d99234a04 100644 --- a/keras_nlp/models/distil_bert/distil_bert_masked_lm.py +++ b/keras_nlp/models/distil_bert/distil_bert_masked_lm.py @@ -55,7 +55,7 @@ class DistilBertMaskedLM(Task): `None`. If `None`, this model will not apply preprocessing, and inputs should be preprocessed before calling the model. - Example usage: + Examples: Raw string data. ```python diff --git a/keras_nlp/models/electra/electra_backbone.py b/keras_nlp/models/electra/electra_backbone.py index f4f2a23b6..a116caa20 100644 --- a/keras_nlp/models/electra/electra_backbone.py +++ b/keras_nlp/models/electra/electra_backbone.py @@ -63,7 +63,7 @@ class ElectraBackbone(Backbone): such as softmax and layer normalization, will always be done at float32 precision regardless of dtype. - Examples: + Example: ```python input_data = { "token_ids": np.ones(shape=(1, 12), dtype="int32"), diff --git a/keras_nlp/models/f_net/f_net_masked_lm.py b/keras_nlp/models/f_net/f_net_masked_lm.py index c715a7084..4a0ec5e25 100644 --- a/keras_nlp/models/f_net/f_net_masked_lm.py +++ b/keras_nlp/models/f_net/f_net_masked_lm.py @@ -51,7 +51,7 @@ class FNetMaskedLM(Task): `None`. If `None`, this model will not apply preprocessing, and inputs should be preprocessed before calling the model. - Example usage: + Examples: Raw string data. ```python diff --git a/keras_nlp/models/gemma/gemma_backbone.py b/keras_nlp/models/gemma/gemma_backbone.py index 06f5b0f60..8e4bac126 100644 --- a/keras_nlp/models/gemma/gemma_backbone.py +++ b/keras_nlp/models/gemma/gemma_backbone.py @@ -60,7 +60,7 @@ class GemmaBackbone(Backbone): computations, such as softmax and layer normalization will always be done a float32 precision regardless of dtype. - Example usage: + Example: ```python input_data = { "token_ids": np.ones(shape=(1, 12), dtype="int32"), @@ -205,7 +205,7 @@ def get_layout_map( backbone weights, so that you can use it to distribute weights across the accelerators. - Sample usage: + Example: ``` # Feel free to change the mesh shape to balance data and model parallel mesh = keras.distribution.DeviceMesh( diff --git a/keras_nlp/models/gemma/gemma_causal_lm.py b/keras_nlp/models/gemma/gemma_causal_lm.py index 45c7c6abe..58e2e302d 100644 --- a/keras_nlp/models/gemma/gemma_causal_lm.py +++ b/keras_nlp/models/gemma/gemma_causal_lm.py @@ -359,7 +359,7 @@ def score( [batch_size, num_tokens, vocab_size] in "logits" mode, or [batch_size, num_tokens] in "loss" mode. - Examples: + Example: Compute gradients between embeddings and loss scores with TensorFlow: ```python diff --git a/keras_nlp/models/preprocessor.py b/keras_nlp/models/preprocessor.py index 16a65e57c..031a884e1 100644 --- a/keras_nlp/models/preprocessor.py +++ b/keras_nlp/models/preprocessor.py @@ -75,7 +75,7 @@ def from_preset( Args: preset: string. Must be one of "{{preset_names}}". - Examples: + Example: ```python # Load a preprocessor layer from a preset. preprocessor = keras_nlp.models.{{preprocessor_name}}.from_preset( diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py b/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py index e231f3dc7..e6b5a45bb 100644 --- a/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py +++ b/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py @@ -53,7 +53,7 @@ class XLMRobertaMaskedLM(Task): `None`. If `None`, this model will not apply preprocessing, and inputs should be preprocessed before calling the model. - Example usage: + Examples: Raw string inputs and pretrained backbone. ```python diff --git a/keras_nlp/models/xlnet/xlnet_backbone.py b/keras_nlp/models/xlnet/xlnet_backbone.py index 45be1f74e..03ea607d9 100644 --- a/keras_nlp/models/xlnet/xlnet_backbone.py +++ b/keras_nlp/models/xlnet/xlnet_backbone.py @@ -65,7 +65,7 @@ class XLNetBackbone(Backbone): padding_mask: Mask to avoid performing attention on padding token indices of shape `[batch_size, sequence_length]`. - Examples: + Example: ```python import numpy as np from keras_nlp.models import XLNetBackbone diff --git a/keras_nlp/samplers/sampler.py b/keras_nlp/samplers/sampler.py index 3ecf16ac2..a6b64b532 100644 --- a/keras_nlp/samplers/sampler.py +++ b/keras_nlp/samplers/sampler.py @@ -36,7 +36,7 @@ class Sampler: computes the next token based on a probability distribution over all possible vocab entries. - Examples: + Example: ```python causal_lm = keras_nlp.models.GPT2CausalLM.from_preset("gpt2_base_en") diff --git a/keras_nlp/tokenizers/byte_pair_tokenizer.py b/keras_nlp/tokenizers/byte_pair_tokenizer.py index 2ac8832a7..a8dbc5136 100644 --- a/keras_nlp/tokenizers/byte_pair_tokenizer.py +++ b/keras_nlp/tokenizers/byte_pair_tokenizer.py @@ -156,7 +156,7 @@ class BytePairTokenizerCache(tf.Module): The cache key is string tensor or python strings, and the value is split tokens joined by whitespace. For example, "dragonfly" => "dragon fly" - Examples: + Example: ``` cache = BytePairTokenizerCache() cache.insert(["butterfly", "dragonfly"], ["but ter fly", "dragon fly"]) @@ -665,7 +665,7 @@ def from_preset( Args: preset: string. Must be one of "{{preset_names}}". - Examples: + Example: ```python # Load a preset tokenizer. tokenizer = {{model_name}}.from_preset("{{example_preset_name}}") diff --git a/keras_nlp/tokenizers/sentence_piece_tokenizer.py b/keras_nlp/tokenizers/sentence_piece_tokenizer.py index 64e169939..20a73d6af 100644 --- a/keras_nlp/tokenizers/sentence_piece_tokenizer.py +++ b/keras_nlp/tokenizers/sentence_piece_tokenizer.py @@ -275,7 +275,7 @@ def from_preset( Args: preset: string. Must be one of "{{preset_names}}". - Examples: + Example: ```python # Load a preset tokenizer. tokenizer = {{model_name}}.from_preset("{{example_preset_name}}") diff --git a/keras_nlp/tokenizers/tokenizer.py b/keras_nlp/tokenizers/tokenizer.py index 7da1e9d7b..4c26e4524 100644 --- a/keras_nlp/tokenizers/tokenizer.py +++ b/keras_nlp/tokenizers/tokenizer.py @@ -40,7 +40,7 @@ class Tokenizer(PreprocessingLayer): "vocab free" tokenizers, such as a whitespace splitter show below, these methods do not apply and can be skipped. - Examples: + Example: ```python class WhitespaceSplitterTokenizer(keras_nlp.tokenizers.Tokenizer): diff --git a/keras_nlp/tokenizers/word_piece_tokenizer.py b/keras_nlp/tokenizers/word_piece_tokenizer.py index 75f956899..c203d50c7 100644 --- a/keras_nlp/tokenizers/word_piece_tokenizer.py +++ b/keras_nlp/tokenizers/word_piece_tokenizer.py @@ -480,7 +480,7 @@ def from_preset( Args: preset: string. Must be one of "{{preset_names}}". - Examples: + Example: ```python # Load a preset tokenizer. tokenizer = {{model_name}}.from_preset("{{example_preset_name}}")