From 3538622ace799cf1d9a5dd279af90098965665c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kaan=20B=C4=B1=C3=A7akc=C4=B1?= <kaan.dvlpr@gmail.com>
Date: Mon, 13 Mar 2023 00:15:11 +0000
Subject: [PATCH] Fix docstring style.

---
 keras/backend.py | 30 ++++++++++++++----------
 keras/losses.py  | 61 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 57 insertions(+), 34 deletions(-)

diff --git a/keras/backend.py b/keras/backend.py
index d8b67592b40..d142bc8a9d1 100644
--- a/keras/backend.py
+++ b/keras/backend.py
@@ -5605,24 +5605,28 @@ def categorical_focal_crossentropy(
     parameter. When `gamma` = 0, there is no focal effect on the categorical
     crossentropy. And if alpha = 1, at the same time the loss is equivalent
     to the categorical crossentropy.
+
     Args:
-      target: A tensor with the same shape as `output`.
-      output: A tensor.
-      alpha: A weight balancing factor for all classes, default is `0.25` as
-             mentioned in the reference. It can be a list of floats or a scalar.
-             In the multi-class case, alpha may be set by inverse class
-             frequency by using `compute_class_weight` from `sklearn.utils`.
-      gamma: A focusing parameter, default is `2.0` as mentioned in the
-             reference. It helps to gradually reduce the importance given to
-             simple examples in a smooth manner.
-      from_logits: Whether `output` is expected to be a logits tensor. By
-        default, we consider that `output` encodes a probability distribution.
+        target: A tensor with the same shape as `output`.
+        output: A tensor.
+        alpha: A weight balancing factor for all classes, default is `0.25` as
+            mentioned in the reference. It can be a list of floats or a scalar.
+            In the multi-class case, alpha may be set by inverse class
+            frequency by using `compute_class_weight` from `sklearn.utils`.
+        gamma: A focusing parameter, default is `2.0` as mentioned in the
+            reference. It helps to gradually reduce the importance given to
+            simple examples in a smooth manner.
+        from_logits: Whether `output` is expected to be a logits tensor. By
+            default, we consider that `output` encodes a probability
+            distribution.
+
     Returns:
-      A tensor.
+        A tensor.
     """
     target = tf.convert_to_tensor(target)
     output = tf.convert_to_tensor(output)
     target.shape.assert_is_compatible_with(output.shape)
+
     output, from_logits = _get_logits(
         output, from_logits, "Softmax", "categorical_focal_crossentropy"
     )
@@ -5633,11 +5637,13 @@ def categorical_focal_crossentropy(
         lambda: output,
     )
 
+    # scale preds so that the class probas of each sample sum to 1
     output = output / tf.reduce_sum(output, axis=axis, keepdims=True)
 
     epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
     output = tf.clip_by_value(output, epsilon_, 1.0 - epsilon_)
 
+    # Calculate cross entropy
     cce = -target * tf.math.log(output)
 
     # Calculate factors
diff --git a/keras/losses.py b/keras/losses.py
index 24f4a09de1c..9ca544499bc 100644
--- a/keras/losses.py
+++ b/keras/losses.py
@@ -945,29 +945,40 @@ class CategoricalFocalCrossentropy(LossFunctionWrapper):
     crossentropy. And if alpha = 1, at the same time the loss is equivalent to
     the categorical crossentropy.
 
+    Use this crossentropy loss function when there are two or more label
+    classes and if you want to handle class imbalance without using
+    `class_weights`.
+    We expect labels to be provided in a `one_hot` representation.
+
     In the snippet below, there is `# classes` floating pointing values per
     example. The shape of both `y_pred` and `y_true` are
     `[batch_size, num_classes]`.
+
     Standalone usage:
+
     >>> y_true = [[0., 1., 0.], [0., 0., 1.]]
     >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
     >>> # Using 'auto'/'sum_over_batch_size' reduction type.
     >>> cce = tf.keras.losses.CategoricalFocalCrossentropy()
     >>> cce(y_true, y_pred).numpy()
     0.23315276
+
     >>> # Calling with 'sample_weight'.
     >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
     0.1632
+
     >>> # Using 'sum' reduction type.
     >>> cce = tf.keras.losses.CategoricalFocalCrossentropy(
     ...     reduction=tf.keras.losses.Reduction.SUM)
     >>> cce(y_true, y_pred).numpy()
     0.46631
+
     >>> # Using 'none' reduction type.
     >>> cce = tf.keras.losses.CategoricalFocalCrossentropy(
     ...     reduction=tf.keras.losses.Reduction.NONE)
     >>> cce(y_true, y_pred).numpy()
     array([3.2058331e-05, 4.6627346e-01], dtype=float32)
+
     Usage with the `compile()` API:
     ```python
     model.compile(optimizer='sgd',
@@ -975,12 +986,12 @@ class CategoricalFocalCrossentropy(LossFunctionWrapper):
     ```
     Args:
       alpha: A weight balancing factor for all classes, default is `0.25` as
-             mentioned in the reference. It can be a list of floats or a scalar.
-             In the multi-class case, alpha may be set by inverse class
-             frequency by using `compute_class_weight` from `sklearn.utils`.
+        mentioned in the reference. It can be a list of floats or a scalar.
+        In the multi-class case, alpha may be set by inverse class
+        frequency by using `compute_class_weight` from `sklearn.utils`.
       gamma: A focusing parameter, default is `2.0` as mentioned in the
-             reference. It helps to gradually reduce the importance given to
-             simple (easy) examples in a smooth manner.
+        reference. It helps to gradually reduce the importance given to
+        simple (easy) examples in a smooth manner.
       from_logits: Whether `output` is expected to be a logits tensor. By
         default, we consider that `output` encodes a probability distribution.
       label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
@@ -2154,6 +2165,7 @@ def categorical_focal_crossentropy(
     axis=-1,
 ):
     """Computes the categorical focal crossentropy loss.
+
     Standalone usage:
     >>> y_true = [[0, 1, 0], [0, 0, 1]]
     >>> y_pred = [[0.05, 0.9, 0.05], [0.1, 0.85, 0.05]]
@@ -2161,17 +2173,18 @@ def categorical_focal_crossentropy(
     >>> assert loss.shape == (2,)
     >>> loss.numpy()
     array([2.63401289e-04, 6.75912094e-01], dtype=float32)
+
     Args:
       y_true: Tensor of one-hot true targets.
       y_pred: Tensor of predicted targets.
       alpha: A weight balancing factor for all classes, default is `0.25` as
-         mentioned in the reference. It can be a list of floats or a scalar.
-         In the multi-class case, alpha may be set by inverse class frequency by
-         using `compute_class_weight` from `sklearn.utils`.
+        mentioned in the reference. It can be a list of floats or a scalar.
+        In the multi-class case, alpha may be set by inverse class frequency by
+        using `compute_class_weight` from `sklearn.utils`.
       gamma: A focusing parameter, default is `2.0` as mentioned in the
-         reference. It helps to gradually reduce the importance given to
-         simple examples in a smooth manner. When `gamma` = 0, there is no focal
-         effect on the categorical crossentropy.
+        reference. It helps to gradually reduce the importance given to
+        simple examples in a smooth manner. When `gamma` = 0, there is no focal
+        effect on the categorical crossentropy.
       from_logits: Whether `y_pred` is expected to be a logits tensor. By
         default, we assume that `y_pred` encodes a probability distribution.
       label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
@@ -2179,6 +2192,7 @@ def categorical_focal_crossentropy(
         and `0.9 + 0.1 / num_classes` for target labels.
       axis: Defaults to -1. The dimension along which the entropy is
         computed.
+
     Returns:
       Categorical focal crossentropy loss value.
     """
@@ -2240,21 +2254,24 @@ def _ragged_tensor_categorical_focal_crossentropy(
     number of elements independent of the batch. E.g. if the RaggedTensor
     has 2 batches with [2, 1] values respectively the resulting loss is
     the sum of the individual loss values divided by 3.
-    alpha: A weight balancing factor for all classes, default is `0.25` as
-         mentioned in the reference. It can be a list of floats or a scalar.
-         In the multi-class case, alpha may be set by inverse class frequency by
-         using `compute_class_weight` from `sklearn.utils`.
-    gamma: A focusing parameter, default is `2.0` as mentioned in the
-         reference. It helps to gradually reduce the importance given to
-         simple examples in a smooth manner. When `gamma` = 0, there is no focal
-         effect on the categorical crossentropy.
-    from_logits: Whether `y_pred` is expected to be a logits tensor. By
+
+    Args:
+      alpha: A weight balancing factor for all classes, default is `0.25` as
+        mentioned in the reference. It can be a list of floats or a scalar.
+        In the multi-class case, alpha may be set by inverse class frequency by
+        using `compute_class_weight` from `sklearn.utils`.
+      gamma: A focusing parameter, default is `2.0` as mentioned in the
+        reference. It helps to gradually reduce the importance given to
+        simple examples in a smooth manner. When `gamma` = 0, there is no focal
+        effect on the categorical crossentropy.
+      from_logits: Whether `y_pred` is expected to be a logits tensor. By
         default, we assume that `y_pred` encodes a probability distribution.
-    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
+      label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
         example, if `0.1`, use `0.1 / num_classes` for non-target labels
         and `0.9 + 0.1 / num_classes` for target labels.
-    axis: Defaults to -1. The dimension along which the entropy is
+      axis: Defaults to -1. The dimension along which the entropy is
         computed.
+
     Returns:
       Categorical focal crossentropy loss value.
     """