From 1e7750076dc9073b0c89474789a882b977d6f6a7 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Thu, 2 Nov 2023 18:16:49 +0100
Subject: [PATCH 1/7] feat: added warning about usage of past covariates during
 auto-regression

---
 darts/models/forecasting/ensemble_model.py     |  2 ++
 darts/models/forecasting/forecasting_model.py  | 18 ++++++++++++++++++
 darts/models/forecasting/regression_model.py   |  2 ++
 .../forecasting/torch_forecasting_model.py     |  2 ++
 4 files changed, 24 insertions(+)

diff --git a/darts/models/forecasting/ensemble_model.py b/darts/models/forecasting/ensemble_model.py
index 65e0595faf..b772594d1e 100644
--- a/darts/models/forecasting/ensemble_model.py
+++ b/darts/models/forecasting/ensemble_model.py
@@ -288,6 +288,7 @@ def predict(
         num_samples: int = 1,
         verbose: bool = False,
         predict_likelihood_parameters: bool = False,
+        show_warnings: bool = True,
     ) -> Union[TimeSeries, Sequence[TimeSeries]]:
         # ensure forecasting models all rely on the same series during inference
         if series is None:
@@ -305,6 +306,7 @@ def predict(
             num_samples=num_samples,
             verbose=verbose,
             predict_likelihood_parameters=predict_likelihood_parameters,
+            show_warnings=show_warnings,
         )
 
         # for single-level ensemble, probabilistic forecast is obtained directly from forecasting models
diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py
index 1d3bf14eac..42810c6cd4 100644
--- a/darts/models/forecasting/forecasting_model.py
+++ b/darts/models/forecasting/forecasting_model.py
@@ -2131,6 +2131,7 @@ def predict(
         num_samples: int = 1,
         verbose: bool = False,
         predict_likelihood_parameters: bool = False,
+        show_warnings: bool = True,
     ) -> Union[TimeSeries, Sequence[TimeSeries]]:
         """Forecasts values for `n` time steps after the end of the series.
 
@@ -2169,6 +2170,8 @@ def predict(
             If set to `True`, the model predict the parameters of its Likelihood parameters instead of the target. Only
             supported for probabilistic models with a likelihood, `num_samples = 1` and `n<=output_chunk_length`.
             Default: ``False``
+        show_warnings
+            Whether to show warnings related auto-regression and past covariates usage.
 
         Returns
         -------
@@ -2210,6 +2213,19 @@ def predict(
                     "must be embedded in the target `series` passed to `predict()`."
                 )
             )
+        if (
+            show_warnings
+            and self.uses_past_covariates
+            and self.output_chunk_length is not None
+            and n > self.output_chunk_length
+        ):
+            logger.warning(
+                "Since `predict` was called with `n > output_chunk_length`, auto-regression is be used to forecast "
+                "the values after `output_chunk_length`. As this model uses past covariates, it will access future "
+                "values (compared to the first predicted timestep) of this covariates to produce each subsequent "
+                "`output_chunk_length` forecasts."
+                "To hide this warning, set `show_warnings=False`."
+            )
 
     def _predict_wrapper(
         self,
@@ -2220,6 +2236,7 @@ def _predict_wrapper(
         num_samples: int,
         verbose: bool = False,
         predict_likelihood_parameters: bool = False,
+        show_warnings: bool = True,
     ) -> Union[TimeSeries, Sequence[TimeSeries]]:
         kwargs = dict()
         if self.supports_likelihood_parameter_prediction:
@@ -2231,6 +2248,7 @@ def _predict_wrapper(
             future_covariates=future_covariates,
             num_samples=num_samples,
             verbose=verbose,
+            show_warnings=show_warnings,
             **kwargs,
         )
 
diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 68a7b08d38..075adb9e5c 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -737,6 +737,7 @@ def predict(
         num_samples: int = 1,
         verbose: bool = False,
         predict_likelihood_parameters: bool = False,
+        show_warnings: bool = True,
         **kwargs,
     ) -> Union[TimeSeries, Sequence[TimeSeries]]:
         """Forecasts values for `n` time steps after the end of the series.
@@ -812,6 +813,7 @@ def predict(
             num_samples,
             verbose,
             predict_likelihood_parameters,
+            show_warnings,
         )
 
         # check that the input sizes of the target series and covariates match
diff --git a/darts/models/forecasting/torch_forecasting_model.py b/darts/models/forecasting/torch_forecasting_model.py
index 7cfb9cd5cb..88287d0642 100644
--- a/darts/models/forecasting/torch_forecasting_model.py
+++ b/darts/models/forecasting/torch_forecasting_model.py
@@ -1225,6 +1225,7 @@ def predict(
         num_loader_workers: int = 0,
         mc_dropout: bool = False,
         predict_likelihood_parameters: bool = False,
+        show_warnings: bool = True,
     ) -> Union[TimeSeries, Sequence[TimeSeries]]:
         """Predict the ``n`` time step following the end of the training series, or of the specified ``series``.
 
@@ -1344,6 +1345,7 @@ def predict(
             future_covariates,
             num_samples=num_samples,
             predict_likelihood_parameters=predict_likelihood_parameters,
+            show_warnings=show_warnings,
         )
 
         dataset = self._build_inference_dataset(

From 71ab596cb382a3c3d1e4f24ea28a28ce0d148764 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Fri, 3 Nov 2023 11:24:43 +0100
Subject: [PATCH 2/7] feat: slightly changed the docstring for
 output_chunk_length

---
 darts/models/forecasting/catboost_model.py          | 3 ++-
 darts/models/forecasting/lgbm.py                    | 3 ++-
 darts/models/forecasting/linear_regression_model.py | 3 ++-
 darts/models/forecasting/random_forest.py           | 3 ++-
 darts/models/forecasting/regression_model.py        | 3 ++-
 darts/models/forecasting/xgboost.py                 | 3 ++-
 6 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/darts/models/forecasting/catboost_model.py b/darts/models/forecasting/catboost_model.py
index b25b983a77..e5eee8908f 100644
--- a/darts/models/forecasting/catboost_model.py
+++ b/darts/models/forecasting/catboost_model.py
@@ -53,7 +53,8 @@ def __init__(
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful if the covariates don't extend far enough into the future.
+            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
+            future values of the past covariates should not be accessed.
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/lgbm.py b/darts/models/forecasting/lgbm.py
index f5ca44e288..ec909bf13c 100644
--- a/darts/models/forecasting/lgbm.py
+++ b/darts/models/forecasting/lgbm.py
@@ -80,7 +80,8 @@ def __init__(
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful if the covariates don't extend far enough into the future.
+            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
+            future values of the past covariates should not be accessed.
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py
index ed6ac9ba25..68100346fd 100644
--- a/darts/models/forecasting/linear_regression_model.py
+++ b/darts/models/forecasting/linear_regression_model.py
@@ -73,7 +73,8 @@ def __init__(
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful if the covariates don't extend far enough into the future.
+            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
+            future values of the past covariates should not be accessed.
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/random_forest.py b/darts/models/forecasting/random_forest.py
index a5d91448ed..9c9b3d924b 100644
--- a/darts/models/forecasting/random_forest.py
+++ b/darts/models/forecasting/random_forest.py
@@ -77,7 +77,8 @@ def __init__(
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful if the covariates don't extend far enough into the future.
+            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
+            future values of the past covariates should not be accessed.
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 075adb9e5c..7ea8599fe6 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -1500,7 +1500,8 @@ def __init__(
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful if the covariates don't extend far enough into the future.
+            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
+            future values of the past covariates should not be accessed.
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/xgboost.py b/darts/models/forecasting/xgboost.py
index 99e2df3ff9..a2fd1c8f6a 100644
--- a/darts/models/forecasting/xgboost.py
+++ b/darts/models/forecasting/xgboost.py
@@ -95,7 +95,8 @@ def __init__(
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
             horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful if the covariates don't extend far enough into the future.
+            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
+            future values of the past covariates should not be accessed.
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that

From 42f54d1c26db2687daee2a5c8fc931185b1df710 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Fri, 3 Nov 2023 12:07:37 +0100
Subject: [PATCH 3/7] fix: forgot to add the new argument to some wrapper

---
 darts/models/forecasting/forecasting_model.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py
index 42810c6cd4..d161d76f49 100644
--- a/darts/models/forecasting/forecasting_model.py
+++ b/darts/models/forecasting/forecasting_model.py
@@ -328,6 +328,7 @@ def _predict_wrapper(
         num_samples: int,
         verbose: bool = False,
         predict_likelihood_parameters: bool = False,
+        show_warnings: bool = True,
     ) -> TimeSeries:
         kwargs = dict()
         if self.supports_likelihood_parameter_prediction:
@@ -838,6 +839,9 @@ def retrain_func(
         else:
             outer_iterator = _build_tqdm_iterator(series, verbose)
 
+        # deactivate the warning after displaying it once
+        show_predict_warnings = show_warnings
+
         forecasts_list = []
         for idx, series_ in enumerate(outer_iterator):
             past_covariates_ = past_covariates[idx] if past_covariates else None
@@ -1019,7 +1023,10 @@ def retrain_func(
                     num_samples=num_samples,
                     verbose=verbose,
                     predict_likelihood_parameters=predict_likelihood_parameters,
+                    show_warnings=show_predict_warnings,
                 )
+                show_predict_warnings = False
+
                 if forecast_components is None:
                     forecast_components = forecast.columns
 
@@ -2483,6 +2490,7 @@ def _predict_wrapper(
         num_samples: int,
         verbose: bool = False,
         predict_likelihood_parameters: bool = False,
+        show_warnings: bool = True,
     ) -> TimeSeries:
         kwargs = dict()
         if self.supports_likelihood_parameter_prediction:
@@ -2700,6 +2708,7 @@ def _predict_wrapper(
         num_samples: int,
         verbose: bool = False,
         predict_likelihood_parameters: bool = False,
+        show_warnings: bool = True,
     ) -> TimeSeries:
         kwargs = dict()
         if self.supports_likelihood_parameter_prediction:

From 8c82f060d7db11441d275530fa9994441fc5b2b7 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Tue, 7 Nov 2023 17:37:47 +0100
Subject: [PATCH 4/7] feat: display autoregression and past cov related warning
 only once when using historical forecast with a global model

---
 darts/models/forecasting/forecasting_model.py                | 5 ++---
 .../optimized_historical_forecasts_torch.py                  | 1 +
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py
index d161d76f49..dc6c27e197 100644
--- a/darts/models/forecasting/forecasting_model.py
+++ b/darts/models/forecasting/forecasting_model.py
@@ -839,7 +839,7 @@ def retrain_func(
         else:
             outer_iterator = _build_tqdm_iterator(series, verbose)
 
-        # deactivate the warning after displaying it once
+        # deactivate the warning after displaying it once if show_warnings is True
         show_predict_warnings = show_warnings
 
         forecasts_list = []
@@ -2230,8 +2230,7 @@ def predict(
                 "Since `predict` was called with `n > output_chunk_length`, auto-regression is be used to forecast "
                 "the values after `output_chunk_length`. As this model uses past covariates, it will access future "
                 "values (compared to the first predicted timestep) of this covariates to produce each subsequent "
-                "`output_chunk_length` forecasts."
-                "To hide this warning, set `show_warnings=False`."
+                "`output_chunk_length` forecasts. To hide this warning, set `show_warnings=False`."
             )
 
     def _predict_wrapper(
diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py b/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py
index 085c5efa64..d063d66241 100644
--- a/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py
+++ b/darts/utils/historical_forecasts/optimized_historical_forecasts_torch.py
@@ -99,6 +99,7 @@ def _optimized_historical_forecasts(
         future_covariates,
         num_samples=num_samples,
         predict_likelihood_parameters=predict_likelihood_parameters,
+        show_warnings=show_warnings,
     )
 
     dataset = model._build_inference_dataset(

From 5652a71c0e04c97f2d2f54d47cb8c28301f6ffa3 Mon Sep 17 00:00:00 2001
From: madtoinou <32447896+madtoinou@users.noreply.github.com>
Date: Fri, 10 Nov 2023 14:34:48 +0100
Subject: [PATCH 5/7] Apply suggestions from code review

Co-authored-by: Dennis Bader <dennis.bader@gmx.ch>
---
 darts/models/forecasting/forecasting_model.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py
index dc6c27e197..6de1ffaa3f 100644
--- a/darts/models/forecasting/forecasting_model.py
+++ b/darts/models/forecasting/forecasting_model.py
@@ -2227,10 +2227,10 @@ def predict(
             and n > self.output_chunk_length
         ):
             logger.warning(
-                "Since `predict` was called with `n > output_chunk_length`, auto-regression is be used to forecast "
-                "the values after `output_chunk_length`. As this model uses past covariates, it will access future "
-                "values (compared to the first predicted timestep) of this covariates to produce each subsequent "
-                "`output_chunk_length` forecasts. To hide this warning, set `show_warnings=False`."
+                "`predict()` was called with `n > output_chunk_length`: using auto-regression to forecast "
+                "the values after `output_chunk_length` points. The model will access `(n - output_chunk_length)` "
+                "future values of your `past_covariates` (relative to the first predicted time step). "
+                "To hide this warning, set `show_warnings=False`."
             )
 
     def _predict_wrapper(

From 200ae8766386ca1a71ce81365b0e71c98df22ee4 Mon Sep 17 00:00:00 2001
From: madtoinou <antoine.madrona@unit8.co>
Date: Wed, 15 Nov 2023 19:40:22 +0100
Subject: [PATCH 6/7] feat: updated docstring for output_chunk_length in both
 regression and torch models, updated docstring about past/future covariates
 requirements in the torch models that were missing it

---
 darts/models/forecasting/block_rnn_model.py         |  5 ++++-
 darts/models/forecasting/catboost_model.py          |  6 +++---
 darts/models/forecasting/dlinear.py                 | 10 +++++++---
 darts/models/forecasting/lgbm.py                    |  6 +++---
 darts/models/forecasting/linear_regression_model.py |  6 +++---
 darts/models/forecasting/nbeats.py                  |  5 ++++-
 darts/models/forecasting/nhits.py                   |  5 ++++-
 darts/models/forecasting/nlinear.py                 | 10 +++++++---
 darts/models/forecasting/pl_forecasting_module.py   |  5 ++++-
 darts/models/forecasting/random_forest.py           |  6 +++---
 darts/models/forecasting/regression_model.py        |  5 +++--
 darts/models/forecasting/tcn_model.py               |  5 ++++-
 darts/models/forecasting/tft_model.py               |  3 ++-
 darts/models/forecasting/tide_model.py              | 10 ++++++++--
 darts/models/forecasting/transformer_model.py       |  7 +++++--
 darts/models/forecasting/xgboost.py                 |  6 +++---
 16 files changed, 67 insertions(+), 33 deletions(-)

diff --git a/darts/models/forecasting/block_rnn_model.py b/darts/models/forecasting/block_rnn_model.py
index 5903adf9aa..78f7dc492b 100644
--- a/darts/models/forecasting/block_rnn_model.py
+++ b/darts/models/forecasting/block_rnn_model.py
@@ -159,7 +159,10 @@ def __init__(
         input_chunk_length
             The number of time steps that will be fed to the internal forecasting module
         output_chunk_length
-            Number of time steps to be output by the internal forecasting module.
+            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         model
             Either a string specifying the RNN module type ("RNN", "LSTM" or "GRU"),
             or a PyTorch module with the same specifications as
diff --git a/darts/models/forecasting/catboost_model.py b/darts/models/forecasting/catboost_model.py
index 8de72b42fb..3f3a2fcfe7 100644
--- a/darts/models/forecasting/catboost_model.py
+++ b/darts/models/forecasting/catboost_model.py
@@ -52,9 +52,9 @@ def __init__(
             of integers with lags is required.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
-            future values of the past covariates should not be accessed.
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/dlinear.py b/darts/models/forecasting/dlinear.py
index 62f41ee621..064c98c508 100644
--- a/darts/models/forecasting/dlinear.py
+++ b/darts/models/forecasting/dlinear.py
@@ -245,15 +245,19 @@ def __init__(
     ):
         """An implementation of the DLinear model, as presented in [1]_.
 
-        This implementation is improved by allowing the optional use of past covariates,
-        future covariates and static covariates, and by making the model optionally probabilistic.
+        This implementation is improved by allowing the optional use of past covariates (known for
+        `output_chunk_length` points before prediction time), future covariates (known for ``output_chunk_length``
+        points after prediction time) and static covariates, as well as supporting probabilistic forecasting.
 
         Parameters
         ----------
         input_chunk_length
             The length of the input sequence fed to the model.
         output_chunk_length
-            The length of the forecast of the model.
+            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         shared_weights
             Whether to use shared weights for all components of multivariate series.
 
diff --git a/darts/models/forecasting/lgbm.py b/darts/models/forecasting/lgbm.py
index d45e6bcde7..23a72a9cb3 100644
--- a/darts/models/forecasting/lgbm.py
+++ b/darts/models/forecasting/lgbm.py
@@ -79,9 +79,9 @@ def __init__(
             components are missing and the 'default_lags' key is not provided.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
-            future values of the past covariates should not be accessed.
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py
index e725ed132c..fa9403b07b 100644
--- a/darts/models/forecasting/linear_regression_model.py
+++ b/darts/models/forecasting/linear_regression_model.py
@@ -72,9 +72,9 @@ def __init__(
             components are missing and the 'default_lags' key is not provided.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
-            future values of the past covariates should not be accessed.
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/nbeats.py b/darts/models/forecasting/nbeats.py
index 24e789a201..df7b50b846 100644
--- a/darts/models/forecasting/nbeats.py
+++ b/darts/models/forecasting/nbeats.py
@@ -565,7 +565,10 @@ def __init__(
         input_chunk_length
             The length of the input sequence fed to the model.
         output_chunk_length
-            The length of the forecast of the model.
+            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         generic_architecture
             Boolean value indicating whether the generic architecture of N-BEATS is used.
             If not, the interpretable architecture outlined in the paper (consisting of one trend
diff --git a/darts/models/forecasting/nhits.py b/darts/models/forecasting/nhits.py
index 69ba11eee3..3167f01139 100644
--- a/darts/models/forecasting/nhits.py
+++ b/darts/models/forecasting/nhits.py
@@ -502,7 +502,10 @@ def __init__(
         input_chunk_length
             The length of the input sequence fed to the model.
         output_chunk_length
-            The length of the forecast of the model.
+            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         num_stacks
             The number of stacks that make up the whole model.
         num_blocks
diff --git a/darts/models/forecasting/nlinear.py b/darts/models/forecasting/nlinear.py
index 09e112f6f9..993d5bfe15 100644
--- a/darts/models/forecasting/nlinear.py
+++ b/darts/models/forecasting/nlinear.py
@@ -191,15 +191,19 @@ def __init__(
     ):
         """An implementation of the NLinear model, as presented in [1]_.
 
-        This implementation is improved by allowing the optional use of past covariates,
-        future covariates and static covariates, and by making the model optionally probabilistic.
+        This implementation is improved by allowing the optional use of past covariates (known for
+        ``output_chunk_length`` points before prediction time), future covariates (known for ``output_chunk_length``
+        points after prediction time) and static covariates, as well as supporting probabilistic forecasting.
 
         Parameters
         ----------
         input_chunk_length
             The length of the input sequence fed to the model.
         output_chunk_length
-            The length of the forecast of the model.
+            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         shared_weights
             Whether to use shared weights for all components of multivariate series.
 
diff --git a/darts/models/forecasting/pl_forecasting_module.py b/darts/models/forecasting/pl_forecasting_module.py
index 79c1902a81..2a98cabafd 100644
--- a/darts/models/forecasting/pl_forecasting_module.py
+++ b/darts/models/forecasting/pl_forecasting_module.py
@@ -98,7 +98,10 @@ def __init__(
         input_chunk_length
             Number of input past time steps per chunk.
         output_chunk_length
-            Number of output time steps per chunk.
+            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         train_sample_shape
             Shape of the model's input, used to instantiate model without calling ``fit_from_dataset`` and
             perform sanity check on new training/inference datasets used for re-training or prediction.
diff --git a/darts/models/forecasting/random_forest.py b/darts/models/forecasting/random_forest.py
index 6ceb360a04..f6e288a1f8 100644
--- a/darts/models/forecasting/random_forest.py
+++ b/darts/models/forecasting/random_forest.py
@@ -76,9 +76,9 @@ def __init__(
             components are missing and the 'default_lags' key is not provided.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
-            future values of the past covariates should not be accessed.
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index d4ecd20a52..aa596625b2 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -115,8 +115,9 @@ def __init__(
             components are missing and the 'default_lags' key is not provided.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful if the covariates don't extend far enough into the future.
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/tcn_model.py b/darts/models/forecasting/tcn_model.py
index 076fd939df..a9e0af317e 100644
--- a/darts/models/forecasting/tcn_model.py
+++ b/darts/models/forecasting/tcn_model.py
@@ -281,7 +281,10 @@ def __init__(
         input_chunk_length
             Number of past time steps that are fed to the forecasting module.
         output_chunk_length
-            Number of time steps the torch module will predict into the future at once.
+            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         kernel_size
             The size of every kernel in a convolutional layer.
         num_filters
diff --git a/darts/models/forecasting/tft_model.py b/darts/models/forecasting/tft_model.py
index f77255c601..fc1c6a20aa 100644
--- a/darts/models/forecasting/tft_model.py
+++ b/darts/models/forecasting/tft_model.py
@@ -684,7 +684,8 @@ def __init__(
         <https://pytorch-forecasting.readthedocs.io/en/latest/models.html>`_ implementation.
 
         This model supports mixed covariates (includes past covariates known for ``input_chunk_length``
-        points before prediction time and future covariates known for ``output_chunk_length`` after prediction time).
+        points before prediction time and future covariates known for ``output_chunk_length`` points
+        after prediction time).
 
         The TFT applies multi-head attention queries on future inputs from mandatory ``future_covariates``.
         Specifying future encoders with ``add_encoders`` (read below) can automatically generate future covariates
diff --git a/darts/models/forecasting/tide_model.py b/darts/models/forecasting/tide_model.py
index 57704e21dd..4dccbe530d 100644
--- a/darts/models/forecasting/tide_model.py
+++ b/darts/models/forecasting/tide_model.py
@@ -385,10 +385,13 @@ def __init__(
 
         The model is implemented as a :class:`MixedCovariatesTorchModel`, which means that it supports
         both past and future covariates, as well as static covariates. Probabilistic forecasting is supported through
-        the use of a `likelihood` instead of a `loss_fn`.
+        the use of a ``likelihood`` instead of a ``loss_fn``.
         The original paper does not describe how past covariates are treated in detail, so we assume that they are
         passed to the encoder as-is.
 
+        The past covariates must be known for ``input_chunk_length`` points before prediction time whereas the future
+        covariates must be known for ``output_chunk_length`` points after prediction time.
+
         The encoder and decoder are implemented as a series of residual blocks. The number of residual blocks in
         the encoder and decoder can be controlled via ``num_encoder_layers`` and ``num_decoder_layers`` respectively.
         The width of the layers in the residual blocks can be controlled via ``hidden_size``. Similarly, the width
@@ -399,7 +402,10 @@ def __init__(
         input_chunk_length
             The length of the input sequence fed to the model.
         output_chunk_length
-            The length of the forecast of the model.
+            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         num_encoder_layers
             The number of residual blocks in the encoder.
         num_decoder_layers
diff --git a/darts/models/forecasting/transformer_model.py b/darts/models/forecasting/transformer_model.py
index 6430b1d108..a5b56e8b32 100644
--- a/darts/models/forecasting/transformer_model.py
+++ b/darts/models/forecasting/transformer_model.py
@@ -351,14 +351,17 @@ def __init__(
 
         The transformer architecture implemented here is based on [1]_.
 
-        This model supports past covariates (known for `input_chunk_length` points before prediction time).
+        This model supports past covariates (known for ``input_chunk_length`` points before prediction time).
 
         Parameters
         ----------
         input_chunk_length
             Number of time steps to be input to the forecasting module.
         output_chunk_length
-            Number of time steps to be output by the forecasting module.
+            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         d_model
             The number of expected features in the transformer encoder/decoder inputs (default=64).
         nhead
diff --git a/darts/models/forecasting/xgboost.py b/darts/models/forecasting/xgboost.py
index 9b4b1fda76..0d0b532810 100644
--- a/darts/models/forecasting/xgboost.py
+++ b/darts/models/forecasting/xgboost.py
@@ -98,9 +98,9 @@ def __init__(
             components are missing and the 'default_lags' key is not provided.
         output_chunk_length
             Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
-            future values of the past covariates should not be accessed.
+            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
+            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past covariates for prediction (depending on the model's covariate support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that

From 7d649dc87382c44f2a3c7e63ccea808228e28320 Mon Sep 17 00:00:00 2001
From: dennisbader <dennis.bader@gmx.ch>
Date: Thu, 16 Nov 2023 13:33:21 +0100
Subject: [PATCH 7/7] update docs

---
 darts/models/forecasting/block_rnn_model.py   | 14 +++++++----
 darts/models/forecasting/catboost_model.py    | 10 +++++---
 darts/models/forecasting/dlinear.py           | 16 +++++++-----
 darts/models/forecasting/lgbm.py              | 10 +++++---
 .../forecasting/linear_regression_model.py    | 10 +++++---
 darts/models/forecasting/nbeats.py            | 14 +++++++----
 darts/models/forecasting/nhits.py             | 14 +++++++----
 darts/models/forecasting/nlinear.py           | 16 +++++++-----
 .../forecasting/pl_forecasting_module.py      | 14 +++++++----
 darts/models/forecasting/random_forest.py     | 10 +++++---
 darts/models/forecasting/regression_model.py  | 20 +++++++++------
 darts/models/forecasting/tcn_model.py         | 14 +++++++----
 darts/models/forecasting/tft_model.py         | 19 ++++++++++----
 darts/models/forecasting/tide_model.py        | 25 +++++++++----------
 darts/models/forecasting/transformer_model.py | 16 +++++++-----
 darts/models/forecasting/xgboost.py           | 10 +++++---
 .../models/forecasting/test_backtesting.py    |  3 ++-
 17 files changed, 145 insertions(+), 90 deletions(-)

diff --git a/darts/models/forecasting/block_rnn_model.py b/darts/models/forecasting/block_rnn_model.py
index 78f7dc492b..afd3cbf503 100644
--- a/darts/models/forecasting/block_rnn_model.py
+++ b/darts/models/forecasting/block_rnn_model.py
@@ -157,12 +157,16 @@ def __init__(
         Parameters
         ----------
         input_chunk_length
-            The number of time steps that will be fed to the internal forecasting module
+            Number of time steps in the past to take as a model input (per chunk). Applies to the target
+            series, and past and/or future covariates (if the model supports it).
         output_chunk_length
-            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. Also, the number of future values
+            from future covariates to use as a model input (if the model supports future covariates). It is not the same
+            as forecast horizon `n` used in `predict()`, which is the desired number of prediction points generated
+            using either a one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents
+            auto-regression. This is useful when the covariates don't extend far enough into the future, or to prohibit
+            the model from using future values of past and / or future covariates for prediction (depending on the
+            model's covariate support).
         model
             Either a string specifying the RNN module type ("RNN", "LSTM" or "GRU"),
             or a PyTorch module with the same specifications as
diff --git a/darts/models/forecasting/catboost_model.py b/darts/models/forecasting/catboost_model.py
index 3f3a2fcfe7..fbb8e3df7d 100644
--- a/darts/models/forecasting/catboost_model.py
+++ b/darts/models/forecasting/catboost_model.py
@@ -51,10 +51,12 @@ def __init__(
             `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
             of integers with lags is required.
         output_chunk_length
-            Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. It is not the same as forecast
+            horizon `n` used in `predict()`, which is the desired number of prediction points generated using a
+            one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents auto-regression. This is
+            useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past and / or future covariates for prediction (depending on the model's covariate
+            support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/dlinear.py b/darts/models/forecasting/dlinear.py
index 064c98c508..05faff0350 100644
--- a/darts/models/forecasting/dlinear.py
+++ b/darts/models/forecasting/dlinear.py
@@ -246,18 +246,22 @@ def __init__(
         """An implementation of the DLinear model, as presented in [1]_.
 
         This implementation is improved by allowing the optional use of past covariates (known for
-        `output_chunk_length` points before prediction time), future covariates (known for ``output_chunk_length``
+        `input_chunk_length` points before prediction time), future covariates (known for `output_chunk_length`
         points after prediction time) and static covariates, as well as supporting probabilistic forecasting.
 
         Parameters
         ----------
         input_chunk_length
-            The length of the input sequence fed to the model.
+            Number of time steps in the past to take as a model input (per chunk). Applies to the target
+            series, and past and/or future covariates (if the model supports it).
         output_chunk_length
-            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. Also, the number of future values
+            from future covariates to use as a model input (if the model supports future covariates). It is not the same
+            as forecast horizon `n` used in `predict()`, which is the desired number of prediction points generated
+            using either a one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents
+            auto-regression. This is useful when the covariates don't extend far enough into the future, or to prohibit
+            the model from using future values of past and / or future covariates for prediction (depending on the
+            model's covariate support).
         shared_weights
             Whether to use shared weights for all components of multivariate series.
 
diff --git a/darts/models/forecasting/lgbm.py b/darts/models/forecasting/lgbm.py
index 23a72a9cb3..4a3d748719 100644
--- a/darts/models/forecasting/lgbm.py
+++ b/darts/models/forecasting/lgbm.py
@@ -78,10 +78,12 @@ def __init__(
             'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
             components are missing and the 'default_lags' key is not provided.
         output_chunk_length
-            Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. It is not the same as forecast
+            horizon `n` used in `predict()`, which is the desired number of prediction points generated using a
+            one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents auto-regression. This is
+            useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past and / or future covariates for prediction (depending on the model's covariate
+            support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py
index fa9403b07b..24e7aec049 100644
--- a/darts/models/forecasting/linear_regression_model.py
+++ b/darts/models/forecasting/linear_regression_model.py
@@ -71,10 +71,12 @@ def __init__(
             'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
             components are missing and the 'default_lags' key is not provided.
         output_chunk_length
-            Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. It is not the same as forecast
+            horizon `n` used in `predict()`, which is the desired number of prediction points generated using a
+            one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents auto-regression. This is
+            useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past and / or future covariates for prediction (depending on the model's covariate
+            support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/nbeats.py b/darts/models/forecasting/nbeats.py
index df7b50b846..76daceac7a 100644
--- a/darts/models/forecasting/nbeats.py
+++ b/darts/models/forecasting/nbeats.py
@@ -563,12 +563,16 @@ def __init__(
         Parameters
         ----------
         input_chunk_length
-            The length of the input sequence fed to the model.
+            Number of time steps in the past to take as a model input (per chunk). Applies to the target
+            series, and past and/or future covariates (if the model supports it).
         output_chunk_length
-            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. Also, the number of future values
+            from future covariates to use as a model input (if the model supports future covariates). It is not the same
+            as forecast horizon `n` used in `predict()`, which is the desired number of prediction points generated
+            using either a one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents
+            auto-regression. This is useful when the covariates don't extend far enough into the future, or to prohibit
+            the model from using future values of past and / or future covariates for prediction (depending on the
+            model's covariate support).
         generic_architecture
             Boolean value indicating whether the generic architecture of N-BEATS is used.
             If not, the interpretable architecture outlined in the paper (consisting of one trend
diff --git a/darts/models/forecasting/nhits.py b/darts/models/forecasting/nhits.py
index 3167f01139..94adc5f0ef 100644
--- a/darts/models/forecasting/nhits.py
+++ b/darts/models/forecasting/nhits.py
@@ -500,12 +500,16 @@ def __init__(
         Parameters
         ----------
         input_chunk_length
-            The length of the input sequence fed to the model.
+            Number of time steps in the past to take as a model input (per chunk). Applies to the target
+            series, and past and/or future covariates (if the model supports it).
         output_chunk_length
-            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. Also, the number of future values
+            from future covariates to use as a model input (if the model supports future covariates). It is not the same
+            as forecast horizon `n` used in `predict()`, which is the desired number of prediction points generated
+            using either a one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents
+            auto-regression. This is useful when the covariates don't extend far enough into the future, or to prohibit
+            the model from using future values of past and / or future covariates for prediction (depending on the
+            model's covariate support).
         num_stacks
             The number of stacks that make up the whole model.
         num_blocks
diff --git a/darts/models/forecasting/nlinear.py b/darts/models/forecasting/nlinear.py
index 993d5bfe15..2120e32de9 100644
--- a/darts/models/forecasting/nlinear.py
+++ b/darts/models/forecasting/nlinear.py
@@ -192,18 +192,22 @@ def __init__(
         """An implementation of the NLinear model, as presented in [1]_.
 
         This implementation is improved by allowing the optional use of past covariates (known for
-        ``output_chunk_length`` points before prediction time), future covariates (known for ``output_chunk_length``
+        `input_chunk_length` points before prediction time), future covariates (known for `output_chunk_length`
         points after prediction time) and static covariates, as well as supporting probabilistic forecasting.
 
         Parameters
         ----------
         input_chunk_length
-            The length of the input sequence fed to the model.
+            Number of time steps in the past to take as a model input (per chunk). Applies to the target
+            series, and past and/or future covariates (if the model supports it).
         output_chunk_length
-            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. Also, the number of future values
+            from future covariates to use as a model input (if the model supports future covariates). It is not the same
+            as forecast horizon `n` used in `predict()`, which is the desired number of prediction points generated
+            using either a one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents
+            auto-regression. This is useful when the covariates don't extend far enough into the future, or to prohibit
+            the model from using future values of past and / or future covariates for prediction (depending on the
+            model's covariate support).
         shared_weights
             Whether to use shared weights for all components of multivariate series.
 
diff --git a/darts/models/forecasting/pl_forecasting_module.py b/darts/models/forecasting/pl_forecasting_module.py
index 2a98cabafd..c58ffd57e3 100644
--- a/darts/models/forecasting/pl_forecasting_module.py
+++ b/darts/models/forecasting/pl_forecasting_module.py
@@ -96,12 +96,16 @@ def __init__(
         Parameters
         ----------
         input_chunk_length
-            Number of input past time steps per chunk.
+            Number of time steps in the past to take as a model input (per chunk). Applies to the target
+            series, and past and/or future covariates (if the model supports it).
         output_chunk_length
-            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. Also, the number of future values
+            from future covariates to use as a model input (if the model supports future covariates). It is not the same
+            as forecast horizon `n` used in `predict()`, which is the desired number of prediction points generated
+            using either a one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents
+            auto-regression. This is useful when the covariates don't extend far enough into the future, or to prohibit
+            the model from using future values of past and / or future covariates for prediction (depending on the
+            model's covariate support).
         train_sample_shape
             Shape of the model's input, used to instantiate model without calling ``fit_from_dataset`` and
             perform sanity check on new training/inference datasets used for re-training or prediction.
diff --git a/darts/models/forecasting/random_forest.py b/darts/models/forecasting/random_forest.py
index f6e288a1f8..34cee5f38f 100644
--- a/darts/models/forecasting/random_forest.py
+++ b/darts/models/forecasting/random_forest.py
@@ -75,10 +75,12 @@ def __init__(
             'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
             components are missing and the 'default_lags' key is not provided.
         output_chunk_length
-            Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. It is not the same as forecast
+            horizon `n` used in `predict()`, which is the desired number of prediction points generated using a
+            one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents auto-regression. This is
+            useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past and / or future covariates for prediction (depending on the model's covariate
+            support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py
index 1670849518..3058aa87ea 100644
--- a/darts/models/forecasting/regression_model.py
+++ b/darts/models/forecasting/regression_model.py
@@ -114,10 +114,12 @@ def __init__(
             'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
             components are missing and the 'default_lags' key is not provided.
         output_chunk_length
-            Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. It is not the same as forecast
+            horizon `n` used in `predict()`, which is the desired number of prediction points generated using a
+            one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents auto-regression. This is
+            useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past and / or future covariates for prediction (depending on the model's covariate
+            support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
@@ -1503,10 +1505,12 @@ def __init__(
             `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
             of integers with lags is required.
         output_chunk_length
-            Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
-            be useful to prevent auto-regression if the covariates don't extend far enough into the future or the
-            future values of the past covariates should not be accessed.
+            Number of time steps predicted at once (per chunk) by the internal model. It is not the same as forecast
+            horizon `n` used in `predict()`, which is the desired number of prediction points generated using a
+            one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents auto-regression. This is
+            useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past and / or future covariates for prediction (depending on the model's covariate
+            support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/models/forecasting/tcn_model.py b/darts/models/forecasting/tcn_model.py
index a9e0af317e..7f53b8781c 100644
--- a/darts/models/forecasting/tcn_model.py
+++ b/darts/models/forecasting/tcn_model.py
@@ -279,12 +279,16 @@ def __init__(
         Parameters
         ----------
         input_chunk_length
-            Number of past time steps that are fed to the forecasting module.
+            Number of time steps in the past to take as a model input (per chunk). Applies to the target
+            series, and past and/or future covariates (if the model supports it).
         output_chunk_length
-            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. Also, the number of future values
+            from future covariates to use as a model input (if the model supports future covariates). It is not the same
+            as forecast horizon `n` used in `predict()`, which is the desired number of prediction points generated
+            using either a one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents
+            auto-regression. This is useful when the covariates don't extend far enough into the future, or to prohibit
+            the model from using future values of past and / or future covariates for prediction (depending on the
+            model's covariate support).
         kernel_size
             The size of every kernel in a convolutional layer.
         num_filters
diff --git a/darts/models/forecasting/tft_model.py b/darts/models/forecasting/tft_model.py
index fc1c6a20aa..e6e503f974 100644
--- a/darts/models/forecasting/tft_model.py
+++ b/darts/models/forecasting/tft_model.py
@@ -683,9 +683,9 @@ def __init__(
         The internal sub models are adopted from `pytorch-forecasting's TemporalFusionTransformer
         <https://pytorch-forecasting.readthedocs.io/en/latest/models.html>`_ implementation.
 
-        This model supports mixed covariates (includes past covariates known for ``input_chunk_length``
-        points before prediction time and future covariates known for ``output_chunk_length`` points
-        after prediction time).
+        This model supports past covariates (known for `input_chunk_length` points before prediction time),
+        future covariates (known for `output_chunk_length` points after prediction time), static covariates,
+        as well as probabilistic forecasting.
 
         The TFT applies multi-head attention queries on future inputs from mandatory ``future_covariates``.
         Specifying future encoders with ``add_encoders`` (read below) can automatically generate future covariates
@@ -698,9 +698,18 @@ def __init__(
         Parameters
         ----------
         input_chunk_length
-            Encoder length; number of past time steps that are fed to the forecasting module at prediction time.
+            Number of time steps in the past to take as a model input (per chunk). Applies to the target
+            series, and past and/or future covariates (if the model supports it).
+            Also called: Encoder length
         output_chunk_length
-            Decoder length; number of future time steps that are fed to the forecasting module at prediction time.
+            Number of time steps predicted at once (per chunk) by the internal model. Also, the number of future values
+            from future covariates to use as a model input (if the model supports future covariates). It is not the same
+            as forecast horizon `n` used in `predict()`, which is the desired number of prediction points generated
+            using either a one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents
+            auto-regression. This is useful when the covariates don't extend far enough into the future, or to prohibit
+            the model from using future values of past and / or future covariates for prediction (depending on the
+            model's covariate support).
+            Also called: Decoder length
         hidden_size
             Hidden state size of the TFT. It is the main hyper-parameter and common across the internal TFT
             architecture.
diff --git a/darts/models/forecasting/tide_model.py b/darts/models/forecasting/tide_model.py
index 4dccbe530d..3005020268 100644
--- a/darts/models/forecasting/tide_model.py
+++ b/darts/models/forecasting/tide_model.py
@@ -383,14 +383,9 @@ def __init__(
         but attempts to provide better performance at lower computational cost by introducing
         multilayer perceptron (MLP)-based encoder-decoders without attention.
 
-        The model is implemented as a :class:`MixedCovariatesTorchModel`, which means that it supports
-        both past and future covariates, as well as static covariates. Probabilistic forecasting is supported through
-        the use of a ``likelihood`` instead of a ``loss_fn``.
-        The original paper does not describe how past covariates are treated in detail, so we assume that they are
-        passed to the encoder as-is.
-
-        The past covariates must be known for ``input_chunk_length`` points before prediction time whereas the future
-        covariates must be known for ``output_chunk_length`` points after prediction time.
+        This model supports past covariates (known for `input_chunk_length` points before prediction time),
+        future covariates (known for `output_chunk_length` points after prediction time), static covariates,
+        as well as probabilistic forecasting.
 
         The encoder and decoder are implemented as a series of residual blocks. The number of residual blocks in
         the encoder and decoder can be controlled via ``num_encoder_layers`` and ``num_decoder_layers`` respectively.
@@ -400,12 +395,16 @@ def __init__(
         Parameters
         ----------
         input_chunk_length
-            The length of the input sequence fed to the model.
+            Number of time steps in the past to take as a model input (per chunk). Applies to the target
+            series, and past and/or future covariates (if the model supports it).
         output_chunk_length
-            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. Also, the number of future values
+            from future covariates to use as a model input (if the model supports future covariates). It is not the same
+            as forecast horizon `n` used in `predict()`, which is the desired number of prediction points generated
+            using either a one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents
+            auto-regression. This is useful when the covariates don't extend far enough into the future, or to prohibit
+            the model from using future values of past and / or future covariates for prediction (depending on the
+            model's covariate support).
         num_encoder_layers
             The number of residual blocks in the encoder.
         num_decoder_layers
diff --git a/darts/models/forecasting/transformer_model.py b/darts/models/forecasting/transformer_model.py
index a5b56e8b32..775c2e05ca 100644
--- a/darts/models/forecasting/transformer_model.py
+++ b/darts/models/forecasting/transformer_model.py
@@ -351,17 +351,21 @@ def __init__(
 
         The transformer architecture implemented here is based on [1]_.
 
-        This model supports past covariates (known for ``input_chunk_length`` points before prediction time).
+        This model supports past covariates (known for `input_chunk_length` points before prediction time).
 
         Parameters
         ----------
         input_chunk_length
-            Number of time steps to be input to the forecasting module.
+            Number of time steps in the past to take as a model input (per chunk). Applies to the target
+            series, and past and/or future covariates (if the model supports it).
         output_chunk_length
-            Number of time steps to be output by the internal forecasting module. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. Also, the number of future values
+            from future covariates to use as a model input (if the model supports future covariates). It is not the same
+            as forecast horizon `n` used in `predict()`, which is the desired number of prediction points generated
+            using either a one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents
+            auto-regression. This is useful when the covariates don't extend far enough into the future, or to prohibit
+            the model from using future values of past and / or future covariates for prediction (depending on the
+            model's covariate support).
         d_model
             The number of expected features in the transformer encoder/decoder inputs (default=64).
         nhead
diff --git a/darts/models/forecasting/xgboost.py b/darts/models/forecasting/xgboost.py
index 0d0b532810..246e68c17a 100644
--- a/darts/models/forecasting/xgboost.py
+++ b/darts/models/forecasting/xgboost.py
@@ -97,10 +97,12 @@ def __init__(
             'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
             components are missing and the 'default_lags' key is not provided.
         output_chunk_length
-            Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
-            horizon `n` used in `predict()`. However, setting `n <= output_chunk_length` prevents auto-regression. This
-            is useful when the covariates don't extend far enough into the future, or to prohibit the model from using
-            future values of past covariates for prediction (depending on the model's covariate support).
+            Number of time steps predicted at once (per chunk) by the internal model. It is not the same as forecast
+            horizon `n` used in `predict()`, which is the desired number of prediction points generated using a
+            one-shot- or auto-regressive forecast. Setting `n <= output_chunk_length` prevents auto-regression. This is
+            useful when the covariates don't extend far enough into the future, or to prohibit the model from using
+            future values of past and / or future covariates for prediction (depending on the model's covariate
+            support).
         add_encoders
             A large number of past and future covariates can be automatically generated with `add_encoders`.
             This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
diff --git a/darts/tests/models/forecasting/test_backtesting.py b/darts/tests/models/forecasting/test_backtesting.py
index e54ca70d5d..ffea1b2ba5 100644
--- a/darts/tests/models/forecasting/test_backtesting.py
+++ b/darts/tests/models/forecasting/test_backtesting.py
@@ -654,13 +654,14 @@ def test_gridsearch_multi(self):
 
     @pytest.mark.parametrize(
         "model_cls,parameters",
-        zip([Theta, ARIMA], [{"theta": [3, 4]}, {"p": [18, 4]}]),
+        zip([NaiveSeasonal, ARIMA], [{"K": [1, 2]}, {"p": [18, 4]}]),
     )
     def test_gridsearch_bad_covariates(self, model_cls, parameters):
         """Passing unsupported covariate should raise an exception"""
         dummy_series = get_dummy_series(
             ts_length=100, lt_end_value=1, st_value_offset=0
         ).astype(np.float32)
+
         ts_train, ts_val = dummy_series.split_before(split_point=0.8)
 
         bt_kwargs = {"start": -1, "start_format": "position", "show_warnings": False}