Fix PatchTSMixer Docstrings (huggingface#27943)

* docstring corrections * style make --------- Co-authored-by: vijaye12 <vijaye12@in.ibm.com>
BismuthCloud · Dec 16, 2023 · 0af7c48 · 0af7c48
1 parent e90ffaf
commit 0af7c48
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 66 deletions.
diff --git a/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py b/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py
@@ -49,8 +49,6 @@ class PatchTSMixerConfig(PretrainedConfig):
             non-overlapping patches.
         num_parallel_samples (`int`, *optional*, defaults to 100):
             The number of samples to generate in parallel for probabilistic forecast.
-
-
         d_model (`int`, *optional*, defaults to 8):
             Hidden dimension of the model. Recommended to set it as a multiple of patch_length (i.e. 2-5X of
             patch_len). Larger value indicates more complex model.
@@ -96,8 +94,6 @@ class PatchTSMixerConfig(PretrainedConfig):
             `PyTorch`. Setting it to `False` performs `PyTorch` weight initialization.
         norm_eps (`float`, *optional*, defaults to 1e-05):
             A value added to the denominator for numerical stability of normalization.
-
-
         mask_type (`str`, *optional*, defaults to `"random"`):
             Type of masking to use for Masked Pretraining mode. Allowed values are "random", "forecast". In Random
             masking, points are masked randomly. In Forecast masking, points are masked towards the end.
@@ -116,9 +112,6 @@ class PatchTSMixerConfig(PretrainedConfig):
             across channels.
         unmasked_channel_indices (`list`, *optional*):
             Channels that are not masked during pretraining.
-
-
-
         head_dropout (`float`, *optional*, defaults to 0.2):
             The dropout probability the `PatchTSMixer` head.
         distribution_output (`string`, *optional*, defaults to `"student_t"`):

diff --git a/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py b/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py
@@ -1334,11 +1334,11 @@ def forward(
         return_dict: Optional[bool] = None,
     ) -> PatchTSMixerModelOutput:
         r"""
-            observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
-                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
-                in `[0, 1]`:
-                    - 1 for values that are **observed**,
-                    - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
+        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
+            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
+            in `[0, 1]`:
+                - 1 for values that are **observed**,
+                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
 
         Returns:
 
@@ -1446,13 +1446,13 @@ def forward(
         return_dict: Optional[bool] = None,
     ) -> PatchTSMixerForPreTrainingOutput:
         r"""
-            observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
-                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
-                in `[0, 1]`:
-                    - 1 for values that are **observed**,
-                    - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
-            return_loss (`bool`,  *optional*):
-                Whether to return the loss in the `forward` call.
+        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
+            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
+            in `[0, 1]`:
+                - 1 for values that are **observed**,
+                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
+        return_loss (`bool`,  *optional*):
+            Whether to return the loss in the `forward` call.
 
         Returns:
 
@@ -1650,24 +1650,23 @@ def forward(
         return_dict: Optional[bool] = None,
     ) -> PatchTSMixerForPredictionOutput:
         r"""
-            observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
-                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
-                in `[0, 1]`:
-                    - 1 for values that are **observed**,
-                    - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
-            future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,:
-                `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
-                values of the time series, that serve as labels for the model. The `future_values` is what the
-                Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
-                required for a pretraining task.
-
-                For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
-                to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
-                pass the target data with all channels, as channel Filtering for both prediction and target will be
-                manually applied before the loss computation.
-
-            return_loss (`bool`,  *optional*):
-                Whether to return the loss in the `forward` call.
+        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
+            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
+            in `[0, 1]`:
+                - 1 for values that are **observed**,
+                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
+        future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,:
+            `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
+            values of the time series, that serve as labels for the model. The `future_values` is what the
+            Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
+            required for a pretraining task.
+
+            For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
+            to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
+            pass the target data with all channels, as channel Filtering for both prediction and target will be
+            manually applied before the loss computation.
+        return_loss (`bool`,  *optional*):
+            Whether to return the loss in the `forward` call.
 
         Returns:
 
@@ -1871,22 +1870,22 @@ def forward(
         return_dict: Optional[bool] = None,
     ) -> PatchTSMixerForTimeSeriesClassificationOutput:
         r"""
-            future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,
-                `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
-                values of the time series, that serve as labels for the model. The `future_values` is what the
-                Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
-                required for a pretraining task.
+        future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,
+            `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
+            values of the time series, that serve as labels for the model. The `future_values` is what the
+            Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
+            required for a pretraining task.
 
-                For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
-                to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
-                pass the target data with all channels, as channel Filtering for both prediction and target will be
-                manually applied before the loss computation.
+            For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
+            to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
+            pass the target data with all channels, as channel Filtering for both prediction and target will be
+            manually applied before the loss computation.
 
-                For a classification task, it has a shape of `(batch_size,)`.
+            For a classification task, it has a shape of `(batch_size,)`.
 
-                For a regression task, it has a shape of `(batch_size, num_targets)`.
-            return_loss (`bool`, *optional*):
-                Whether to return the loss in the `forward` call.
+            For a regression task, it has a shape of `(batch_size, num_targets)`.
+        return_loss (`bool`, *optional*):
+            Whether to return the loss in the `forward` call.
 
         Returns:
 
@@ -2061,22 +2060,22 @@ def forward(
         return_dict: Optional[bool] = None,
     ) -> PatchTSMixerForRegressionOutput:
         r"""
-            future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,
-                `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
-                values of the time series, that serve as labels for the model. The `future_values` is what the
-                Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
-                required for a pretraining task.
-
-                For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
-                to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
-                pass the target data with all channels, as channel Filtering for both prediction and target will be
-                manually applied before the loss computation.
-
-                For a classification task, it has a shape of `(batch_size,)`.
-
-                For a regression task, it has a shape of `(batch_size, num_targets)`.
-            return_loss (`bool`, *optional*):
-                Whether to return the loss in the `forward` call.
+        future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,
+            `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
+            values of the time series, that serve as labels for the model. The `future_values` is what the
+            Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
+            required for a pretraining task.
+
+            For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
+            to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
+            pass the target data with all channels, as channel Filtering for both prediction and target will be
+            manually applied before the loss computation.
+
+            For a classification task, it has a shape of `(batch_size,)`.
+
+            For a regression task, it has a shape of `(batch_size, num_targets)`.
+        return_loss (`bool`, *optional*):
+            Whether to return the loss in the `forward` call.
 
         Returns: