From 2aec8c152bf8097a60442e7601b3cb748aca15bd Mon Sep 17 00:00:00 2001 From: Samuel Marks <807580+SamuelMarks@users.noreply.github.com> Date: Wed, 12 Apr 2023 20:36:36 -0400 Subject: [PATCH 1/2] [keras/layers/preprocessing/category_encoding.py,keras/layers/preprocessing/discretization.py,keras/layers/preprocessing/hashed_crossing.py,keras/layers/preprocessing/hashing.py,keras/layers/preprocessing/image_preprocessing.py,keras/layers/preprocessing/image_preprocessing_test.py,keras/layers/preprocessing/index_lookup.py,keras/layers/preprocessing/integer_lookup.py,keras/layers/preprocessing/normalization.py,keras/layers/preprocessing/string_lookup.py,keras/layers/preprocessing/text_vectorization.py] Standardise docstring usage of "Default to" --- .../layers/preprocessing/category_encoding.py | 3 ++- keras/layers/preprocessing/discretization.py | 7 +++--- keras/layers/preprocessing/hashed_crossing.py | 8 +++---- keras/layers/preprocessing/hashing.py | 18 +++++++-------- .../preprocessing/image_preprocessing.py | 22 +++++++++---------- .../preprocessing/image_preprocessing_test.py | 4 ++-- keras/layers/preprocessing/index_lookup.py | 11 +++++----- keras/layers/preprocessing/integer_lookup.py | 19 ++++++++-------- keras/layers/preprocessing/normalization.py | 3 ++- keras/layers/preprocessing/string_lookup.py | 15 +++++++------ .../preprocessing/text_vectorization.py | 8 +++---- 11 files changed, 62 insertions(+), 56 deletions(-) diff --git a/keras/layers/preprocessing/category_encoding.py b/keras/layers/preprocessing/category_encoding.py index 305caa0da42..5b606616f02 100644 --- a/keras/layers/preprocessing/category_encoding.py +++ b/keras/layers/preprocessing/category_encoding.py @@ -90,7 +90,7 @@ class CategoryEncoding(base_layer.Layer): inputs to the layer must integers in the range `0 <= value < num_tokens`, or an error will be thrown. output_mode: Specification for the output of the layer. - Defaults to `"multi_hot"`. Values can be `"one_hot"`, `"multi_hot"` or + Values can be `"one_hot"`, `"multi_hot"` or `"count"`, configuring the layer as follows: - `"one_hot"`: Encodes each individual element in the input into an array of `num_tokens` size, containing a 1 at the element index. If @@ -105,6 +105,7 @@ class CategoryEncoding(base_layer.Layer): - `"count"`: Like `"multi_hot"`, but the int array contains a count of the number of times the token at that index appeared in the sample. For all output modes, currently only output up to rank 2 is supported. + Defaults to `"multi_hot"`. sparse: Boolean. If true, returns a `SparseTensor` instead of a dense `Tensor`. Defaults to `False`. diff --git a/keras/layers/preprocessing/discretization.py b/keras/layers/preprocessing/discretization.py index a9693b99e70..72ae53c4e0a 100644 --- a/keras/layers/preprocessing/discretization.py +++ b/keras/layers/preprocessing/discretization.py @@ -164,8 +164,8 @@ class Discretization(base_preprocessing_layer.PreprocessingLayer): 0.01). Higher values of epsilon increase the quantile approximation, and hence result in more unequal buckets, but could improve performance and resource consumption. - output_mode: Specification for the output of the layer. Defaults to - `"int"`. Values can be `"int"`, `"one_hot"`, `"multi_hot"`, or + output_mode: Specification for the output of the layer. Values can be + `"int"`, `"one_hot"`, `"multi_hot"`, or `"count"` configuring the layer as follows: - `"int"`: Return the discretized bin indices directly. - `"one_hot"`: Encodes each individual element in the input into an @@ -180,9 +180,10 @@ class Discretization(base_preprocessing_layer.PreprocessingLayer): will be `(..., num_tokens)`. - `"count"`: As `"multi_hot"`, but the int array contains a count of the number of times the bin index appeared in the sample. + Defaults to `"int"`. sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, and `"count"` output modes. If True, returns a `SparseTensor` instead of - a dense `Tensor`. Defaults to False. + a dense `Tensor`. Defaults to `False`. Examples: diff --git a/keras/layers/preprocessing/hashed_crossing.py b/keras/layers/preprocessing/hashed_crossing.py index b64e0313261..86e0f58a5b5 100644 --- a/keras/layers/preprocessing/hashed_crossing.py +++ b/keras/layers/preprocessing/hashed_crossing.py @@ -51,15 +51,15 @@ class HashedCrossing(base_layer.Layer): Args: num_bins: Number of hash bins. - output_mode: Specification for the output of the layer. Defaults to - `"int"`. Values can be `"int"`, or `"one_hot"` configuring the layer as - follows: + output_mode: Specification for the output of the layer. Values can be + `"int"`, or `"one_hot"` configuring the layer as follows: - `"int"`: Return the integer bin indices directly. - `"one_hot"`: Encodes each individual element in the input into an array the same size as `num_bins`, containing a 1 at the input's bin index. + Defaults to `"int"`. sparse: Boolean. Only applicable to `"one_hot"` mode. If True, returns a - `SparseTensor` instead of a dense `Tensor`. Defaults to False. + `SparseTensor` instead of a dense `Tensor`. Defaults to `False`. **kwargs: Keyword arguments to construct a layer. Examples: diff --git a/keras/layers/preprocessing/hashing.py b/keras/layers/preprocessing/hashing.py index 84755929dd5..e64c0f34297 100644 --- a/keras/layers/preprocessing/hashing.py +++ b/keras/layers/preprocessing/hashing.py @@ -109,17 +109,16 @@ class Hashing(base_layer.Layer): bin, so the effective number of bins is `(num_bins - 1)` if `mask_value` is set. mask_value: A value that represents masked inputs, which are mapped to - index 0. Defaults to None, meaning no mask term will be added and the - hashing will start at index 0. + index 0. None means no mask term will be added and the + hashing will start at index 0. Defaults to `None`. salt: A single unsigned integer or None. If passed, the hash function used will be SipHash64, with these values used as an additional input (known as a "salt" in cryptography). - These should be non-zero. Defaults to `None` (in that - case, the FarmHash64 hash function is used). It also supports - tuple/list of 2 unsigned integer numbers, see reference paper for - details. - output_mode: Specification for the output of the layer. Defaults to - `"int"`. Values can be `"int"`, `"one_hot"`, `"multi_hot"`, or + These should be non-zero. If None, uses the FarmHash64 hash function. + It also supports tuple/list of 2 unsigned integer numbers, see + reference paper for details. Defaults to `None`. + output_mode: Specification for the output of the layer. Values can bes + `"int"`, `"one_hot"`, `"multi_hot"`, or `"count"` configuring the layer as follows: - `"int"`: Return the integer bin indices directly. - `"one_hot"`: Encodes each individual element in the input into an @@ -134,9 +133,10 @@ class Hashing(base_layer.Layer): will be `(..., num_tokens)`. - `"count"`: As `"multi_hot"`, but the int array contains a count of the number of times the bin index appeared in the sample. + Defaults to `"int"`. sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, and `"count"` output modes. If True, returns a `SparseTensor` instead of - a dense `Tensor`. Defaults to False. + a dense `Tensor`. Defaults to `False`. **kwargs: Keyword arguments to construct a layer. Input shape: diff --git a/keras/layers/preprocessing/image_preprocessing.py b/keras/layers/preprocessing/image_preprocessing.py index c81b3f6e3ae..cf3c8faa81e 100644 --- a/keras/layers/preprocessing/image_preprocessing.py +++ b/keras/layers/preprocessing/image_preprocessing.py @@ -65,9 +65,9 @@ class Resizing(base_layer.Layer): height: Integer, the height of the output shape. width: Integer, the width of the output shape. interpolation: String, the interpolation method. - Defaults to `"bilinear"`. Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. + Defaults to `"bilinear"`. crop_to_aspect_ratio: If True, resize the images without aspect ratio distortion. When the original aspect ratio differs from the target aspect ratio, the output image will be @@ -420,9 +420,9 @@ class RandomFlip(base_layer.BaseRandomLayer): Args: mode: String indicating which flip mode to use. Can be `"horizontal"`, - `"vertical"`, or `"horizontal_and_vertical"`. Defaults to - `"horizontal_and_vertical"`. `"horizontal"` is a left-right flip and - `"vertical"` is a top-bottom flip. + `"vertical"`, or `"horizontal_and_vertical"`. `"horizontal"` is a + left-right flip and `"vertical"` is a top-bottom flip. Defaults to + `"horizontal_and_vertical"` seed: Integer. Used to create a random seed. """ @@ -1055,9 +1055,9 @@ class RandomZoom(base_layer.BaseRandomLayer): result in an output zooming out between 20% to 30%. `width_factor=(-0.3, -0.2)` result in an - output zooming in between 20% to 30%. Defaults to `None`, + output zooming in between 20% to 30%. `None` means i.e., zooming vertical and horizontal directions - by preserving the aspect ratio. + by preserving the aspect ratio. Defaults to `None`. fill_mode: Points outside the boundaries of the input are filled according to the given mode (one of `{"constant", "reflect", "wrap", "nearest"}`). @@ -1377,9 +1377,9 @@ class RandomBrightness(base_layer.BaseRandomLayer): will be used for upper bound. value_range: Optional list/tuple of 2 floats for the lower and upper limit - of the values of the input data. Defaults to [0.0, 255.0]. - Can be changed to e.g. [0.0, 1.0] if the image input - has been scaled before this layer. + of the values of the input data. + To make no change, use [0.0, 1.0], e.g., if the image input + has been scaled before this layer. Defaults to [0.0, 255.0]. The brightness adjustment will be scaled to this range, and the output values will be clipped to this range. seed: optional integer, for fixed RNG behavior. @@ -1539,9 +1539,9 @@ class RandomHeight(base_layer.BaseRandomLayer): `factor=0.2` results in an output with height changed by a random amount in the range `[-20%, +20%]`. interpolation: String, the interpolation method. - Defaults to `"bilinear"`. Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. + Defaults to `"bilinear"`. seed: Integer. Used to create a random seed. Input shape: @@ -1661,9 +1661,9 @@ class RandomWidth(base_layer.BaseRandomLayer): `factor=0.2` results in an output with width changed by a random amount in the range `[-20%, +20%]`. interpolation: String, the interpolation method. - Defaults to `bilinear`. Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. + Defaults to `bilinear`. seed: Integer. Used to create a random seed. Input shape: diff --git a/keras/layers/preprocessing/image_preprocessing_test.py b/keras/layers/preprocessing/image_preprocessing_test.py index 8c07ab131f5..8385e6cdace 100644 --- a/keras/layers/preprocessing/image_preprocessing_test.py +++ b/keras/layers/preprocessing/image_preprocessing_test.py @@ -2233,7 +2233,7 @@ def test_plain_call(self): layer = image_preprocessing.RandomWidth(0.5, seed=123) shape = (12, 12, 3) img = np.random.random((12,) + shape) - out = layer(img) # Default to training=True + out = layer(img) # Defaults to training=True self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape) out = layer(img, training=True) @@ -2249,7 +2249,7 @@ def test_call_in_container(self): shape = (12, 12, 3) img = np.random.random((12,) + shape) - out = seq(img) # Default to training=True + out = seq(img) # Defaults to training=True self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape) out = seq(img, training=True) diff --git a/keras/layers/preprocessing/index_lookup.py b/keras/layers/preprocessing/index_lookup.py index c1c68ecf66a..4747b7ac206 100644 --- a/keras/layers/preprocessing/index_lookup.py +++ b/keras/layers/preprocessing/index_lookup.py @@ -134,10 +134,10 @@ class IndexLookup(base_preprocessing_layer.PreprocessingLayer): `"tf_idf"`, this argument must be supplied. invert: Only valid when `output_mode` is `"int"`. If True, this layer will map indices to vocabulary items instead of mapping vocabulary items to - indices. Default to False. - output_mode: Specification for the output of the layer. Defaults to - `"int"`. Values can be `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, - or `"tf_idf"` configuring the layer as follows: + indices. Defaults to `False`. + output_mode: Specification for the output of the layer. Values can be + `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or `"tf_idf"` + configuring the layer as follows: - `"int"`: Return the raw integer indices of the input tokens. - `"one_hot"`: Encodes each individual element in the input into an array the same size as the vocabulary, containing a 1 at the element @@ -153,6 +153,7 @@ class IndexLookup(base_preprocessing_layer.PreprocessingLayer): the number of times the token at that index appeared in the sample. - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to find the value in each token slot. + Defaults to `"int"`. pad_to_max_tokens: Only valid when `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`. If True, the output will have its feature axis padded to `max_tokens` even if the number of unique tokens in the @@ -161,7 +162,7 @@ class IndexLookup(base_preprocessing_layer.PreprocessingLayer): False. sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, `"count"` and `"tf-idf"` output modes. If True, returns a `SparseTensor` instead - of a dense `Tensor`. Defaults to False. + of a dense `Tensor`. Defaults to `False`. """ def __init__( diff --git a/keras/layers/preprocessing/integer_lookup.py b/keras/layers/preprocessing/integer_lookup.py index 8b250c3aabe..62b660a4884 100644 --- a/keras/layers/preprocessing/integer_lookup.py +++ b/keras/layers/preprocessing/integer_lookup.py @@ -71,18 +71,18 @@ class IntegerLookup(index_lookup.IndexLookup): only be specified when adapting the vocabulary or when setting `pad_to_max_tokens=True`. If None, there is no cap on the size of the vocabulary. Note that this size includes the OOV and mask tokens. - Defaults to None. + Defaults to `None`. num_oov_indices: The number of out-of-vocabulary tokens to use. If this value is more than 1, OOV inputs are modulated to determine their OOV value. If this value is 0, OOV inputs will cause an error when calling - the layer. Defaults to 1. + the layer. Defaults to `1`. mask_token: An integer token that represents masked inputs. When `output_mode` is `"int"`, the token is included in vocabulary and mapped to index 0. In other output modes, the token will not appear in the vocabulary and instances of the mask token in the input will be dropped. - If set to None, no mask term will be added. Defaults to None. + If set to None, no mask term will be added. Defaults to `None`. oov_token: Only used when `invert` is True. The token to return for OOV - indices. Defaults to -1. + indices. Defaults to `-1`. vocabulary: Optional. Either an array of integers or a string path to a text file. If passing an array, can pass a tuple, list, 1D numpy array, or 1D tensor containing the integer vocbulary terms. If passing a file @@ -98,10 +98,10 @@ class IntegerLookup(index_lookup.IndexLookup): `"tf_idf"`, this argument must be supplied. invert: Only valid when `output_mode` is `"int"`. If True, this layer will map indices to vocabulary items instead of mapping vocabulary items to - indices. Default to False. - output_mode: Specification for the output of the layer. Defaults to - `"int"`. Values can be `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, - or `"tf_idf"` configuring the layer as follows: + indices. Defaults to `False`. + output_mode: Specification for the output of the layer. Values can be + `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or `"tf_idf"` + configuring the layer as follows: - `"int"`: Return the vocabulary indices of the input tokens. - `"one_hot"`: Encodes each individual element in the input into an array the same size as the vocabulary, containing a 1 at the element @@ -119,6 +119,7 @@ class IntegerLookup(index_lookup.IndexLookup): find the value in each token slot. For `"int"` output, any shape of input and output is supported. For all other output modes, currently only output up to rank 2 is supported. + Defaults to `"int"`. pad_to_max_tokens: Only applicable when `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`. If True, the output will have its feature axis padded to `max_tokens` even if the number of unique tokens in the @@ -127,7 +128,7 @@ class IntegerLookup(index_lookup.IndexLookup): False. sparse: Boolean. Only applicable when `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`. If True, returns a `SparseTensor` instead of a - dense `Tensor`. Defaults to False. + dense `Tensor`. Defaults to `False`. Examples: diff --git a/keras/layers/preprocessing/normalization.py b/keras/layers/preprocessing/normalization.py index 2ff1bb1af0c..c105877d8d6 100644 --- a/keras/layers/preprocessing/normalization.py +++ b/keras/layers/preprocessing/normalization.py @@ -52,11 +52,12 @@ class Normalization(base_preprocessing_layer.PreprocessingLayer): example, if shape is `(None, 5)` and `axis=1`, the layer will track 5 separate mean and variance values for the last axis. If `axis` is set to `None`, the layer will normalize all elements in the input by a - scalar mean and variance. Defaults to -1, where the last axis of the + scalar mean and variance. When `-1` the last axis of the input is assumed to be a feature dimension and is normalized per index. Note that in the specific case of batched scalar inputs where the only axis is the batch axis, the default will normalize each index in the batch separately. In this case, consider passing `axis=None`. + Defaults to `-1`. mean: The mean value(s) to use during normalization. The passed value(s) will be broadcast to the shape of the kept axes above; if the value(s) cannot be broadcast, an error will be raised when this layer's diff --git a/keras/layers/preprocessing/string_lookup.py b/keras/layers/preprocessing/string_lookup.py index 4b16dca6f63..0b514c2d5cc 100644 --- a/keras/layers/preprocessing/string_lookup.py +++ b/keras/layers/preprocessing/string_lookup.py @@ -68,11 +68,11 @@ class StringLookup(index_lookup.IndexLookup): only be specified when adapting the vocabulary or when setting `pad_to_max_tokens=True`. If None, there is no cap on the size of the vocabulary. Note that this size includes the OOV and mask tokens. - Defaults to None. + Defaults to `None`. num_oov_indices: The number of out-of-vocabulary tokens to use. If this value is more than 1, OOV inputs are hashed to determine their OOV value. If this value is 0, OOV inputs will cause an error when calling - the layer. Defaults to 1. + the layer. Defaults to `1`. mask_token: A token that represents masked inputs. When `output_mode` is `"int"`, the token is included in vocabulary and mapped to index 0. In other output modes, the token will not appear in the vocabulary and @@ -93,10 +93,10 @@ class StringLookup(index_lookup.IndexLookup): `"tf_idf"`, this argument must be supplied. invert: Only valid when `output_mode` is `"int"`. If True, this layer will map indices to vocabulary items instead of mapping vocabulary items to - indices. Default to False. - output_mode: Specification for the output of the layer. Defaults to - `"int"`. Values can be `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, - or `"tf_idf"` configuring the layer as follows: + indices. Defaults to `False`. + output_mode: Specification for the output of the layer. Values can be + `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or `"tf_idf"` + configuring the layer as follows: - `"int"`: Return the raw integer indices of the input tokens. - `"one_hot"`: Encodes each individual element in the input into an array the same size as the vocabulary, containing a 1 at the element @@ -114,6 +114,7 @@ class StringLookup(index_lookup.IndexLookup): find the value in each token slot. For `"int"` output, any shape of input and output is supported. For all other output modes, currently only output up to rank 2 is supported. + Defaults to `"int"` pad_to_max_tokens: Only applicable when `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`. If True, the output will have its feature axis padded to `max_tokens` even if the number of unique tokens in the @@ -122,7 +123,7 @@ class StringLookup(index_lookup.IndexLookup): False. sparse: Boolean. Only applicable when `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`. If True, returns a `SparseTensor` instead of a - dense `Tensor`. Defaults to False. + dense `Tensor`. Defaults to `False`. encoding: Optional. The text encoding to use to interpret the input strings. Defaults to `"utf-8"`. diff --git a/keras/layers/preprocessing/text_vectorization.py b/keras/layers/preprocessing/text_vectorization.py index a50beb2789c..89f14bc55f2 100644 --- a/keras/layers/preprocessing/text_vectorization.py +++ b/keras/layers/preprocessing/text_vectorization.py @@ -152,12 +152,12 @@ class TextVectorization(base_preprocessing_layer.PreprocessingLayer): have its time dimension padded or truncated to exactly `output_sequence_length` values, resulting in a tensor of shape `(batch_size, output_sequence_length)` regardless of how many tokens - resulted from the splitting step. Defaults to None. + resulted from the splitting step. Defaults to `None`. pad_to_max_tokens: Only valid in `"multi_hot"`, `"count"`, and `"tf_idf"` modes. If True, the output will have its feature axis padded to `max_tokens` even if the number of unique tokens in the vocabulary is less than max_tokens, resulting in a tensor of shape `(batch_size, - max_tokens)` regardless of vocabulary size. Defaults to False. + max_tokens)` regardless of vocabulary size. Defaults to `False`. vocabulary: Optional. Either an array of strings or a string path to a text file. If passing an array, can pass a tuple, list, 1D numpy array, or 1D tensor containing the string vocabulary terms. If passing a file @@ -171,10 +171,10 @@ class TextVectorization(base_preprocessing_layer.PreprocessingLayer): `"tf_idf"`, this argument must be supplied. ragged: Boolean. Only applicable to `"int"` output mode. If True, returns a `RaggedTensor` instead of a dense `Tensor`, where each sequence may - have a different length after string splitting. Defaults to False. + have a different length after string splitting. Defaults to `False`. sparse: Boolean. Only applicable to `"multi_hot"`, `"count"`, and `"tf_idf"` output modes. If True, returns a `SparseTensor` instead of a - dense `Tensor`. Defaults to False. + dense `Tensor`. Defaults to `False`. encoding: Optional. The text encoding to use to interpret the input strings. Defaults to `"utf-8"`. From a1925ecdd2c0ecf3fbdc101deb73b2625e007549 Mon Sep 17 00:00:00 2001 From: Samuel Marks <807580+SamuelMarks@users.noreply.github.com> Date: Sun, 23 Apr 2023 15:23:57 -0400 Subject: [PATCH 2/2] [keras/layers/preprocessing/hashing.py] Use backticks for defaults in docstrings --- keras/layers/preprocessing/hashing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/keras/layers/preprocessing/hashing.py b/keras/layers/preprocessing/hashing.py index e64c0f34297..77adfee68d0 100644 --- a/keras/layers/preprocessing/hashing.py +++ b/keras/layers/preprocessing/hashing.py @@ -109,12 +109,12 @@ class Hashing(base_layer.Layer): bin, so the effective number of bins is `(num_bins - 1)` if `mask_value` is set. mask_value: A value that represents masked inputs, which are mapped to - index 0. None means no mask term will be added and the + index 0. `None` means no mask term will be added and the hashing will start at index 0. Defaults to `None`. salt: A single unsigned integer or None. If passed, the hash function used will be SipHash64, with these values used as an additional input (known as a "salt" in cryptography). - These should be non-zero. If None, uses the FarmHash64 hash function. + These should be non-zero. If `None`, uses the FarmHash64 hash function. It also supports tuple/list of 2 unsigned integer numbers, see reference paper for details. Defaults to `None`. output_mode: Specification for the output of the layer. Values can bes