From aba3920228537451372d610edab827a2adaee2be Mon Sep 17 00:00:00 2001 From: Tomas Van Pottelbergh <tomas.vanpottelbergh@unit8.co> Date: Tue, 18 Jan 2022 09:40:31 +0100 Subject: [PATCH 01/11] Fix W605 --- darts/dataprocessing/transformers/boxcox.py | 6 +-- darts/metrics/metrics.py | 2 +- darts/utils/likelihood_models.py | 46 ++++++++++----------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/darts/dataprocessing/transformers/boxcox.py b/darts/dataprocessing/transformers/boxcox.py index de4a852fb3..1798129c2a 100644 --- a/darts/dataprocessing/transformers/boxcox.py +++ b/darts/dataprocessing/transformers/boxcox.py @@ -44,11 +44,11 @@ def __init__( name A specific name for the transformer lmbda - The parameter :math:`\lambda` of the Box-Cox transform. If a single float is given, the same - :math:`\lambda` value will be used for all dimensions of the series, for all the series. + The parameter :math:`\\lambda` of the Box-Cox transform. If a single float is given, the same + :math:`\\lambda` value will be used for all dimensions of the series, for all the series. If a sequence is given, there is one value per component in the series. If a sequence of sequence is given, there is one value per component for all series. - If `None` given, will automatically find an optimal value of :math:`\lambda` (for each dimension + If `None` given, will automatically find an optimal value of :math:`\\lambda` (for each dimension of the time series, for each time series) using :func:`scipy.stats.boxcox_normmax` with ``method=optim_method``. optim_method diff --git a/darts/metrics/metrics.py b/darts/metrics/metrics.py index f93cbcb8c6..3088349c5c 100644 --- a/darts/metrics/metrics.py +++ b/darts/metrics/metrics.py @@ -1146,7 +1146,7 @@ def rho_risk( where :math:`L_{\\rho} \\left( Z, \\hat{Z}_{\\rho} \\right)` is the :math:`\\rho`-loss function: .. math:: L_{\\rho} \\left( Z, \\hat{Z}_{\\rho} \\right) = 2 \\left( Z - \\hat{Z}_{\\rho} \\right) - \\left( \\rho I_{\\hat{Z}_{\\rho} < Z} - \\left( 1 - \\rho \\right) I_{\\hat{Z}_{\\rho} \geq Z} \\right), + \\left( \\rho I_{\\hat{Z}_{\\rho} < Z} - \\left( 1 - \\rho \\right) I_{\\hat{Z}_{\\rho} \\geq Z} \\right), where :math:`Z = \\sum_{t=1}^{T} y_t` (1) is the aggregated target value and :math:`\\hat{Z}_{\\rho}` is the :math:`\\rho`-quantile of the predicted values. For this, each sample realization :math:`i \\in N` is first diff --git a/darts/utils/likelihood_models.py b/darts/utils/likelihood_models.py index ecd00ad63d..b9264211cf 100644 --- a/darts/utils/likelihood_models.py +++ b/darts/utils/likelihood_models.py @@ -185,8 +185,8 @@ def __init__(self, prior_mu=None, prior_sigma=None, prior_strength=1.0): https://en.wikipedia.org/wiki/Normal_distribution - Univariate continuous distribution. - - Support: :math:`\mathbb{R}`. - - Parameters: mean :math:`\\mu \in \mathbb{R}`, standard deviation :math:`\\sigma > 0`. + - Support: :math:`\\mathbb{R}`. + - Parameters: mean :math:`\\mu \\in \\mathbb{R}`, standard deviation :math:`\\sigma > 0`. Parameters ---------- @@ -243,7 +243,7 @@ def __init__(self, prior_lambda=None, prior_strength=1.0): https://en.wikipedia.org/wiki/Poisson_distribution - Univariate discrete distribution - - Support: :math:`\mathbb{N}_0` (natural numbers including 0). + - Support: :math:`\\mathbb{N}_0` (natural numbers including 0). - Parameter: rate :math:`\\lambda > 0`. Parameters @@ -295,8 +295,8 @@ def __init__(self): It does not support priors. - Univariate discrete distribution. - - Support: :math:`\mathbb{N}_0` (natural numbers including 0). - - Parameters: number of failures :math:`r > 0`, success probability :math:`p \in (0, 1)`. + - Support: :math:`\\mathbb{N}_0` (natural numbers including 0). + - Parameters: number of failures :math:`r > 0`, success probability :math:`p \\in (0, 1)`. Behind the scenes the distribution is reparameterized so that the actual outputs of the network are in terms of the mean :math:`\\mu` and shape :math:`\\alpha`. @@ -344,8 +344,8 @@ def __init__(self, prior_p=None, prior_strength=1.0): https://en.wikipedia.org/wiki/Bernoulli_distribution - Univariate discrete distribution. - - Support: :math:`\{0, 1\}`. - - Parameter: probability :math:`p \in (0, 1)`. + - Support: :math:`\\{0, 1\\}`. + - Parameter: probability :math:`p \\in (0, 1)`. Parameters ---------- @@ -440,8 +440,8 @@ def __init__(self, prior_xzero=None, prior_gamma=None, prior_strength=1.0): https://en.wikipedia.org/wiki/Cauchy_distribution - Univariate continuous distribution. - - Support: :math:`\mathbb{R}`. - - Parameters: location :math:`x_0 \in \mathbb{R}`, scale :math:`\gamma > 0`. + - Support: :math:`\\mathbb{R}`. + - Parameters: location :math:`x_0 \\in \\mathbb{R}`, scale :math:`\\gamma > 0`. Due to its fat tails, this distribution is typically harder to estimate, and your mileage may vary. Also be aware that it typically @@ -499,7 +499,7 @@ def __init__(self, prior_lambda=None, prior_strength=1.0): - Univariate continuous distribution. - Support: open interval :math:`(0, 1)`. - - Parameter: shape :math:`\\lambda \in (0,1)` + - Parameter: shape :math:`\\lambda \\in (0,1)` Parameters ---------- @@ -545,7 +545,7 @@ def __init__(self, prior_alphas=None, prior_strength=1.0): - Multivariate continuous distribution, modeling all components of a time series jointly. - Support: The :math:`K`-dimensional simplex for series of dimension :math:`K`, i.e., - :math:`x_1, ..., x_K \\text{ with } x_i \in (0,1),\\; \\sum_i^K{x_i}=1`. + :math:`x_1, ..., x_K \\text{ with } x_i \\in (0,1),\\; \\sum_i^K{x_i}=1`. - Parameter: concentrations :math:`\\alpha_1, ..., \\alpha_K` with :math:`\\alpha_i > 0`. Parameters @@ -592,7 +592,7 @@ def __init__(self, prior_lambda=None, prior_strength=1.0): https://en.wikipedia.org/wiki/Exponential_distribution - Univariate continuous distribution. - - Support: :math:`\mathbb{R}_{>0}`. + - Support: :math:`\\mathbb{R}_{>0}`. - Parameter: rate :math:`\\lambda > 0`. Parameters @@ -637,7 +637,7 @@ def __init__(self, prior_alpha=None, prior_beta=None, prior_strength=1.0): https://en.wikipedia.org/wiki/Gamma_distribution - Univariate continuous distribution - - Support: :math:`\mathbb{R}_{>0}`. + - Support: :math:`\\mathbb{R}_{>0}`. - Parameters: shape :math:`\\alpha > 0` and rate :math:`\\beta > 0`. Parameters @@ -687,8 +687,8 @@ def __init__(self, prior_p=None, prior_strength=1.0): https://en.wikipedia.org/wiki/Geometric_distribution - Univariate discrete distribution - - Support: :math:`\mathbb{N}_0` (natural numbers including 0). - - Parameter: success probability :math:`p \in (0, 1)`. + - Support: :math:`\\mathbb{N}_0` (natural numbers including 0). + - Parameter: success probability :math:`p \\in (0, 1)`. Parameters ---------- @@ -732,8 +732,8 @@ def __init__(self, prior_mu=None, prior_beta=None, prior_strength=1.0): https://en.wikipedia.org/wiki/Gumbel_distribution - Univariate continuous distribution - - Support: :math:`\mathbb{R}`. - - Parameters: location :math:`\\mu \in \mathbb{R}` and scale :math:`\\beta > 0`. + - Support: :math:`\\mathbb{R}`. + - Parameters: location :math:`\\mu \\in \\mathbb{R}` and scale :math:`\\beta > 0`. Parameters ---------- @@ -781,7 +781,7 @@ def __init__(self, prior_sigma=None, prior_strength=1.0): https://en.wikipedia.org/wiki/Half-normal_distribution - Univariate continuous distribution. - - Support: :math:`\mathbb{R}_{>0}`. + - Support: :math:`\\mathbb{R}_{>0}`. - Parameter: rate :math:`\\sigma > 0`. Parameters @@ -826,8 +826,8 @@ def __init__(self, prior_mu=None, prior_b=None, prior_strength=1.0): https://en.wikipedia.org/wiki/Laplace_distribution - Univariate continuous distribution - - Support: :math:`\mathbb{R}`. - - Parameters: location :math:`\\mu \in \mathbb{R}` and scale :math:`b > 0`. + - Support: :math:`\\mathbb{R}`. + - Parameters: location :math:`\\mu \\in \\mathbb{R}` and scale :math:`b > 0`. Parameters ---------- @@ -875,8 +875,8 @@ def __init__(self, prior_mu=None, prior_sigma=None, prior_strength=1.0): https://en.wikipedia.org/wiki/Log-normal_distribution - Univariate continuous distribution. - - Support: :math:`\mathbb{R}_{>0}`. - - Parameters: :math:`\\mu \in \mathbb{R}` and :math:`\\sigma > 0`. + - Support: :math:`\\mathbb{R}_{>0}`. + - Parameters: :math:`\\mu \\in \\mathbb{R}` and :math:`\\sigma > 0`. Parameters ---------- @@ -924,7 +924,7 @@ def __init__(self, prior_strength=1.0): https://en.wikipedia.org/wiki/Weibull_distribution - Univariate continuous distribution - - Support: :math:`\mathbb{R}_{>0}`. + - Support: :math:`\\mathbb{R}_{>0}`. - Parameters: scale :math:`\\lambda > 0` and concentration :math:`k > 0`. It does not support priors. From d494238702cb0478667f8233a6b6bd2be7d5c055 Mon Sep 17 00:00:00 2001 From: Tomas Van Pottelbergh <tomas.vanpottelbergh@unit8.co> Date: Tue, 18 Jan 2022 09:41:29 +0100 Subject: [PATCH 02/11] Fix W291 --- darts/timeseries.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/darts/timeseries.py b/darts/timeseries.py index 0fd0a4be14..1978e7f58d 100644 --- a/darts/timeseries.py +++ b/darts/timeseries.py @@ -2,7 +2,7 @@ Timeseries ---------- -``TimeSeries`` is the main class in `darts`. +``TimeSeries`` is the main class in `darts`. It represents a univariate or multivariate time series, deterministic or stochastic. The values are stored in an array of shape `(time, dimensions, samples)`, where @@ -186,7 +186,7 @@ def __init__(self, xa: xr.DataArray): self._freq = 1 self._freq_str = None - """ + """ Factory Methods =============== """ @@ -759,7 +759,7 @@ def duration(self) -> Union[pd.Timedelta, int]: """The duration of this time series (as a time delta or int).""" return self._time_index[-1] - self._time_index[0] - """ + """ Some asserts ============= """ From e50911b8f25a9b0c8b181712909a778c2149b674 Mon Sep 17 00:00:00 2001 From: Tomas Van Pottelbergh <tomas.vanpottelbergh@unit8.co> Date: Tue, 18 Jan 2022 09:43:51 +0100 Subject: [PATCH 03/11] Fix F401 --- darts/dataprocessing/dtw/cost_matrix.py | 2 +- darts/dataprocessing/dtw/window.py | 2 -- darts/datasets/dataset_loaders.py | 1 - darts/models/filtering/gaussian_process_filter.py | 1 - darts/tests/models/forecasting/test_regression_models.py | 1 - darts/timeseries.py | 2 +- darts/utils/statistics.py | 1 - 7 files changed, 2 insertions(+), 8 deletions(-) diff --git a/darts/dataprocessing/dtw/cost_matrix.py b/darts/dataprocessing/dtw/cost_matrix.py index b795b5bb47..7e0adf5b7d 100644 --- a/darts/dataprocessing/dtw/cost_matrix.py +++ b/darts/dataprocessing/dtw/cost_matrix.py @@ -1,4 +1,4 @@ -from typing import Tuple, Dict +from typing import Tuple import numpy as np from .window import Window, CRWindow diff --git a/darts/dataprocessing/dtw/window.py b/darts/dataprocessing/dtw/window.py index b4bdd07fd5..a6fd87e55a 100644 --- a/darts/dataprocessing/dtw/window.py +++ b/darts/dataprocessing/dtw/window.py @@ -1,5 +1,3 @@ -from typing import Iterable, Tuple -from dataclasses import dataclass import numpy as np from darts.logging import raise_if_not, raise_if from abc import ABC, abstractmethod diff --git a/darts/datasets/dataset_loaders.py b/darts/datasets/dataset_loaders.py index be0b08dad6..5cfcab4d67 100644 --- a/darts/datasets/dataset_loaders.py +++ b/darts/datasets/dataset_loaders.py @@ -5,7 +5,6 @@ from abc import ABC, abstractmethod import pandas as pd -import numpy as np import requests from darts import TimeSeries diff --git a/darts/models/filtering/gaussian_process_filter.py b/darts/models/filtering/gaussian_process_filter.py index df1d276239..eb99e2d465 100644 --- a/darts/models/filtering/gaussian_process_filter.py +++ b/darts/models/filtering/gaussian_process_filter.py @@ -10,7 +10,6 @@ from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import Kernel -from darts.utils.utils import raise_if_not from darts.models.filtering.filtering_model import FilteringModel diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index 52ce5e1670..37c2bb8e80 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -21,7 +21,6 @@ LinearRegressionModel, LightGBMModel, ) - from darts.utils.data.sequential_dataset import MixedCovariatesSequentialDataset from darts.utils.data.inference_dataset import MixedCovariatesInferenceDataset from darts.models.forecasting.regression_model import ( _shift_matrices, diff --git a/darts/timeseries.py b/darts/timeseries.py index 1978e7f58d..0b686e9218 100644 --- a/darts/timeseries.py +++ b/darts/timeseries.py @@ -23,7 +23,7 @@ import numpy as np import xarray as xr import matplotlib.pyplot as plt -from typing import Tuple, Optional, Callable, Any, List, Union, TextIO, Sequence +from typing import Tuple, Optional, Callable, Any, List, Union, Sequence from inspect import signature from collections import defaultdict from pandas.tseries.frequencies import to_offset diff --git a/darts/utils/statistics.py b/darts/utils/statistics.py index e6e5a9d20e..36aa29fcd0 100644 --- a/darts/utils/statistics.py +++ b/darts/utils/statistics.py @@ -13,7 +13,6 @@ from statsmodels.tsa.seasonal import seasonal_decompose from statsmodels.tsa.stattools import acf, pacf, grangercausalitytests, adfuller, kpss -from warnings import warn from darts.logging import raise_log, get_logger, raise_if_not, raise_if from darts import TimeSeries from .missing_values import fill_missing_values From b015ee7196c05402209c3e78f013c2a995364d95 Mon Sep 17 00:00:00 2001 From: Tomas Van Pottelbergh <tomas.vanpottelbergh@unit8.co> Date: Tue, 18 Jan 2022 10:57:46 +0100 Subject: [PATCH 04/11] Fix F541 --- darts/models/forecasting/regression_model.py | 2 +- darts/timeseries.py | 2 +- darts/utils/data/horizon_based_dataset.py | 4 ++-- darts/utils/statistics.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 646e41cbfd..940d089c51 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -359,7 +359,7 @@ def _create_lagged_data( raise_if( X_y.shape[0] == 0, - f"Unable to build any training samples; target and covariate series overlap too little.", + "Unable to build any training samples; target and covariate series overlap too little.", ) X, y = np.split(X_y, [df_X.shape[1]], axis=1) diff --git a/darts/timeseries.py b/darts/timeseries.py index 0b686e9218..4e3d22a578 100644 --- a/darts/timeseries.py +++ b/darts/timeseries.py @@ -2594,7 +2594,7 @@ def _fill_missing_dates( raise_if( len(xa) <= 2, - f"Input time series must be of (length>=3) when fill_missing_dates=True and freq=None.", + "Input time series must be of (length>=3) when fill_missing_dates=True and freq=None.", logger, ) diff --git a/darts/utils/data/horizon_based_dataset.py b/darts/utils/data/horizon_based_dataset.py index 50bb19ad71..b6c97e7636 100644 --- a/darts/utils/data/horizon_based_dataset.py +++ b/darts/utils/data/horizon_based_dataset.py @@ -170,8 +170,8 @@ def __getitem__( raise_if_not( len(covariate) == len(past_target), - f"The dataset contains 'past' covariates whose time axis doesn't allow to obtain the " - f"input (or output) chunk relative to the target series.", + "The dataset contains 'past' covariates whose time axis doesn't allow to obtain the " + "input (or output) chunk relative to the target series.", ) return past_target, covariate, future_target diff --git a/darts/utils/statistics.py b/darts/utils/statistics.py index 36aa29fcd0..2a86a2c6d5 100644 --- a/darts/utils/statistics.py +++ b/darts/utils/statistics.py @@ -473,11 +473,11 @@ def granger_causality_tests( if not stationarity_tests(ts_cause): logger.warning( - f"ts_cause doesn't seem to be stationary. Please review granger causality validity in your problem context." + "ts_cause doesn't seem to be stationary. Please review granger causality validity in your problem context." ) if not stationarity_tests(ts_effect): logger.warning( - f"ts_effect doesn't seem to be stationary. Please review granger causality validity in your problem context." + "ts_effect doesn't seem to be stationary. Please review granger causality validity in your problem context." ) return grangercausalitytests( From 01990c49cacccac9419798a315cd9a1966e0a908 Mon Sep 17 00:00:00 2001 From: Tomas Van Pottelbergh <tomas.vanpottelbergh@unit8.co> Date: Tue, 18 Jan 2022 11:02:30 +0100 Subject: [PATCH 05/11] Fix F841 (partially) --- darts/dataprocessing/dtw/cost_matrix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/darts/dataprocessing/dtw/cost_matrix.py b/darts/dataprocessing/dtw/cost_matrix.py index 7e0adf5b7d..ad3f8bebb6 100644 --- a/darts/dataprocessing/dtw/cost_matrix.py +++ b/darts/dataprocessing/dtw/cost_matrix.py @@ -117,8 +117,8 @@ def to_dense(self) -> np.ndarray: # TODO express only in terms of numpy operations for i in range(1, self.n + 1): - start = self.window.column_ranges[i * 2 + 0] - 1 - end = self.window.column_ranges[i * 2 + 1] - 1 + start = ranges[i * 2 + 0] - 1 + end = ranges[i * 2 + 1] - 1 len = lengths[i] offset = self.offsets[i] From 932344b5cfb4f940cd41e19ce7fb584a4b61d048 Mon Sep 17 00:00:00 2001 From: Tomas Van Pottelbergh <tomas.vanpottelbergh@unit8.co> Date: Tue, 18 Jan 2022 11:03:10 +0100 Subject: [PATCH 06/11] Fix E713 --- darts/utils/data/encoders.py | 2 +- darts/utils/timeseries_generation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/darts/utils/data/encoders.py b/darts/utils/data/encoders.py index 2efacaa6b0..afc6b603fe 100644 --- a/darts/utils/data/encoders.py +++ b/darts/utils/data/encoders.py @@ -1032,7 +1032,7 @@ def _process_input_encoders(self, params: Dict) -> Tuple[List, List]: # check input for invalid encoder types invalid_encoders = [ - enc for enc in params if not enc in ENCODER_KEYS + TRANSFORMER_KEYS + enc for enc in params if enc not in ENCODER_KEYS + TRANSFORMER_KEYS ] raise_if( len(invalid_encoders) > 0, diff --git a/darts/utils/timeseries_generation.py b/darts/utils/timeseries_generation.py index b8a5fa3515..6a967149c5 100644 --- a/darts/utils/timeseries_generation.py +++ b/darts/utils/timeseries_generation.py @@ -632,7 +632,7 @@ def datetime_attribute_timeseries( "week_of_year": 52, } - if not attribute in ["week", "weekofyear", "week_of_year"]: + if attribute not in ["week", "weekofyear", "week_of_year"]: values = getattr(time_index, attribute) else: values = ( From 99cdb6d3c29f338987d095bc20055ce8bbf6118c Mon Sep 17 00:00:00 2001 From: Tomas Van Pottelbergh <tomas.vanpottelbergh@unit8.co> Date: Tue, 18 Jan 2022 11:06:51 +0100 Subject: [PATCH 07/11] Fix E731 --- darts/models/forecasting/torch_forecasting_model.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/darts/models/forecasting/torch_forecasting_model.py b/darts/models/forecasting/torch_forecasting_model.py index 37a0e7bb86..3ce353c18c 100644 --- a/darts/models/forecasting/torch_forecasting_model.py +++ b/darts/models/forecasting/torch_forecasting_model.py @@ -470,7 +470,11 @@ def fit( past_covariates=past_covariates, future_covariates=future_covariates ) - wrap_fn = lambda ts: [ts] if isinstance(ts, TimeSeries) else ts + def wrap_fn( + ts: Union[TimeSeries, Sequence[TimeSeries]] + ) -> Sequence[TimeSeries]: + return [ts] if isinstance(ts, TimeSeries) else ts + series = wrap_fn(series) past_covariates = wrap_fn(past_covariates) future_covariates = wrap_fn(future_covariates) From 0ad504de0727e50f01e3a9f67c01c7506322bab5 Mon Sep 17 00:00:00 2001 From: Tomas Van Pottelbergh <tomas.vanpottelbergh@unit8.co> Date: Tue, 18 Jan 2022 11:07:37 +0100 Subject: [PATCH 08/11] Fix E714 --- darts/models/forecasting/torch_forecasting_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/models/forecasting/torch_forecasting_model.py b/darts/models/forecasting/torch_forecasting_model.py index 3ce353c18c..85120d02fd 100644 --- a/darts/models/forecasting/torch_forecasting_model.py +++ b/darts/models/forecasting/torch_forecasting_model.py @@ -1531,7 +1531,7 @@ def _get_batch_prediction( n_targets = past_target.shape[dim_component] n_past_covs = ( - past_covariates.shape[dim_component] if not past_covariates is None else 0 + past_covariates.shape[dim_component] if past_covariates is not None else 0 ) input_past = torch.cat( From 8676b396d4f24fedf79cae3e814a8e1b07d4a183 Mon Sep 17 00:00:00 2001 From: Tomas Van Pottelbergh <tomas.vanpottelbergh@unit8.co> Date: Tue, 18 Jan 2022 16:07:38 +0100 Subject: [PATCH 09/11] Fix F841 --- darts/dataprocessing/dtw/cost_matrix.py | 1 - darts/utils/likelihood_models.py | 11 +++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/darts/dataprocessing/dtw/cost_matrix.py b/darts/dataprocessing/dtw/cost_matrix.py index ad3f8bebb6..27c7ebf74b 100644 --- a/darts/dataprocessing/dtw/cost_matrix.py +++ b/darts/dataprocessing/dtw/cost_matrix.py @@ -150,7 +150,6 @@ def __setitem__(self, elem, value): i, j = elem start = self.column_ranges[i * 2 + 0] - end = self.column_ranges[i * 2 + 1] self.dense[self.offsets[i] + j - start] = value diff --git a/darts/utils/likelihood_models.py b/darts/utils/likelihood_models.py index b9264211cf..b65c7cfa74 100644 --- a/darts/utils/likelihood_models.py +++ b/darts/utils/likelihood_models.py @@ -51,7 +51,6 @@ Laplace as _Laplace, Beta as _Beta, Exponential as _Exponential, - MultivariateNormal as _MultivariateNormal, Dirichlet as _Dirichlet, Geometric as _Geometric, Cauchy as _Cauchy, @@ -1108,12 +1107,11 @@ def _params_from_output(self, model_output: torch.Tensor) -> None: return None -if False: - """TODO - To make it work, we'll have to change our models so they optionally accept an absolute - number of parameters, instead of num_parameters per component. - """ +""" TODO +To make it work, we'll have to change our models so they optionally accept an absolute +number of parameters, instead of num_parameters per component. +from torch.distributions import MultivariateNormal as _MultivariateNormal class MultivariateNormal(Likelihood): def __init__( self, dim: int, prior_mu=None, prior_covmat=None, prior_strength=1.0 @@ -1172,3 +1170,4 @@ def _params_from_output(self, model_output: torch.Tensor): covmat[tril_indices[0], tril_indices[1]] = covmat_coefs covmat[tril_indices[1], tril_indices[0]] = covmat_coefs covmat[range(self.dim), range(self.dim)] = 1.0 +""" From a643ea94b97db5a8422df98a32bae375a2c1b19e Mon Sep 17 00:00:00 2001 From: Tomas Van Pottelbergh <tomas.vanpottelbergh@unit8.co> Date: Fri, 21 Jan 2022 10:00:57 +0100 Subject: [PATCH 10/11] Fix E501 --- .../transformers/base_data_transformer.py | 4 +- darts/metrics/metrics.py | 4 +- darts/models/filtering/kalman_filter.py | 18 +- darts/models/forecasting/block_rnn_model.py | 6 +- darts/models/forecasting/forecasting_model.py | 6 +- darts/models/forecasting/nbeats.py | 9 +- darts/models/forecasting/rnn_model.py | 6 +- darts/models/forecasting/tcn_model.py | 6 +- darts/models/forecasting/tft_model.py | 6 +- .../forecasting/torch_forecasting_model.py | 6 +- darts/models/forecasting/transformer_model.py | 10 +- darts/utils/data/encoders.py | 4 +- darts/utils/data/utils.py | 4 +- darts/utils/statistics.py | 8 +- darts/utils/timeseries_generation.py | 5 +- examples/00-quickstart.ipynb | 354 +++++++++--------- 16 files changed, 240 insertions(+), 216 deletions(-) diff --git a/darts/dataprocessing/transformers/base_data_transformer.py b/darts/dataprocessing/transformers/base_data_transformer.py index 95b6a73565..4b7448bdbc 100644 --- a/darts/dataprocessing/transformers/base_data_transformer.py +++ b/darts/dataprocessing/transformers/base_data_transformer.py @@ -109,8 +109,8 @@ def _transform_iterator( ) -> Iterator[Tuple[TimeSeries]]: """ Return an ``Iterator`` object with tuples of inputs for each single call to :func:`ts_transform()`. - Additional `args` and `kwargs` from :func:`transform()` (constant across all the calls to :func:`ts_transform()`) - are already forwarded, and thus don't need to be included in this generator. + Additional `args` and `kwargs` from :func:`transform()` (constant across all the calls to + :func:`ts_transform()`) are already forwarded, and thus don't need to be included in this generator. The basic implementation of this method returns ``zip(series)``, i.e., a generator of single-valued tuples, each containing one ``TimeSeries`` object. diff --git a/darts/metrics/metrics.py b/darts/metrics/metrics.py index 3088349c5c..9af3e5488c 100644 --- a/darts/metrics/metrics.py +++ b/darts/metrics/metrics.py @@ -30,8 +30,8 @@ def multi_ts_support(func): """ This decorator further adapts the metrics that took as input two univariate/multivariate ``TimeSeries`` instances, - adding support for equally-sized sequences of ``TimeSeries`` instances. The decorator computes the pairwise metric for - ``TimeSeries`` with the same indices, and returns a float value that is computed as a function of all the + adding support for equally-sized sequences of ``TimeSeries`` instances. The decorator computes the pairwise metric + for ``TimeSeries`` with the same indices, and returns a float value that is computed as a function of all the pairwise metrics using a `inter_reduction` subroutine passed as argument to the metric function. If a 'Sequence[TimeSeries]' is passed as input, this decorator provides also parallelisation of the metric diff --git a/darts/models/filtering/kalman_filter.py b/darts/models/filtering/kalman_filter.py index 574e0357bc..9915df6edd 100644 --- a/darts/models/filtering/kalman_filter.py +++ b/darts/models/filtering/kalman_filter.py @@ -35,8 +35,8 @@ def __init__(self, dim_x: int = 1, kf: Optional[Kalman] = None): This implementation uses Kalman from the NFourSID package. More information can be found here: https://nfoursid.readthedocs.io/en/latest/source/kalman.html. - The dimensionality of the measurements z and optional control signal (covariates) u is automatically inferred upon - calling `filter()`. + The dimensionality of the measurements z and optional control signal (covariates) u is automatically inferred + upon calling `filter()`. Parameters ---------- @@ -47,7 +47,7 @@ def __init__(self, dim_x: int = 1, kf: Optional[Kalman] = None): If this is provided, the parameter dim_x is ignored. This instance will be copied for every call to `filter()`, so the state is not carried over from one time series to another across several calls to `filter()`. - The various dimensionalities of the filter must match those of the `TimeSeries` used when calling `filter()`. + The dimensionalities of the filter must match those of the `TimeSeries` used when calling `filter()`. """ # TODO: Add support for x_init. Needs reimplementation of NFourSID. @@ -85,8 +85,8 @@ def fit( The series of outputs (observations) used to infer the underlying state space model. This must be a deterministic series (containing one sample). covariates : Optional[TimeSeries] - An optional series of inputs (control signal) that will also be used to infer the underlying state space model. - This must be a deterministic series (containing one sample). + An optional series of inputs (control signal) that will also be used to infer the underlying state space + model. This must be a deterministic series (containing one sample). num_block_rows : Optional[int] The number of block rows to use in the block Hankel matrices used in the N4SID algorithm. See the documentation of nfoursid.nfoursid.NFourSID for more information. @@ -142,11 +142,11 @@ def filter( Parameters ---------- series : TimeSeries - The series of outputs (observations) used to infer the underlying outputs according to the specified Kalman process. - This must be a deterministic series (containing one sample). + The series of outputs (observations) used to infer the underlying outputs according to the specified Kalman + process. This must be a deterministic series (containing one sample). covariates : Optional[TimeSeries] - An optional series of inputs (control signal), necessary if the Kalman filter was initialized with covariates. - This must be a deterministic series (containing one sample). + An optional series of inputs (control signal), necessary if the Kalman filter was initialized with + covariates. This must be a deterministic series (containing one sample). num_samples : int, default: 1 The number of samples to generate from the inferred distribution of the output z. If this is set to 1, the output is a `TimeSeries` containing a single sample using the mean of the distribution. diff --git a/darts/models/forecasting/block_rnn_model.py b/darts/models/forecasting/block_rnn_model.py index 71521b5b4d..c87ba15c6a 100644 --- a/darts/models/forecasting/block_rnn_model.py +++ b/darts/models/forecasting/block_rnn_model.py @@ -226,9 +226,9 @@ def __init__( Default: ``torch.nn.MSELoss()``. model_name Name of the model. Used for creating checkpoints and saving tensorboard data. If not specified, - defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of the - name is formatted with the local date and time, while PID is the processed ID (preventing models spawned at - the same time by different processes to share the same model_name). E.g., + defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of + the name is formatted with the local date and time, while PID is the processed ID (preventing models spawned + at the same time by different processes to share the same model_name). E.g., ``"2021-06-14_09:53:32_torch_model_run_44607"``. work_dir Path of the working directory, where to save checkpoints and Tensorboard summaries. diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 0ba27f34dc..1294204cfc 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -332,8 +332,10 @@ def historical_forecasts( # TODO: do we need a check here? I'd rather leave these checks to the models/datasets. # if covariates: - # raise_if_not(series.end_time() <= covariates.end_time() and covariates.start_time() <= series.start_time(), - # 'The provided covariates must be at least as long as the target series.') + # raise_if_not( + # series.end_time() <= covariates.end_time() and covariates.start_time() <= series.start_time(), + # 'The provided covariates must be at least as long as the target series.' + # ) # only GlobalForecastingModels support historical forecastings without retraining the model base_class_name = self.__class__.__base__.__name__ diff --git a/darts/models/forecasting/nbeats.py b/darts/models/forecasting/nbeats.py index 158a531b2c..d3f0f29e73 100644 --- a/darts/models/forecasting/nbeats.py +++ b/darts/models/forecasting/nbeats.py @@ -100,7 +100,8 @@ def __init__( The number of parameters of the likelihood (or 1 if no likelihood is used) expansion_coefficient_dim The dimensionality of the waveform generator parameters, also known as expansion coefficients. - Used in the generic architecture and the trend module of the interpretable architecture, where it determines the degree of the polynomial basis. + Used in the generic architecture and the trend module of the interpretable architecture, where it determines + the degree of the polynomial basis. input_chunk_length The length of the input sequence fed to the model. target_length @@ -562,9 +563,9 @@ def __init__( Default: ``torch.nn.MSELoss()``. model_name Name of the model. Used for creating checkpoints and saving tensorboard data. If not specified, - defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of the - name is formatted with the local date and time, while PID is the processed ID (preventing models spawned at - the same time by different processes to share the same model_name). E.g., + defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of + the name is formatted with the local date and time, while PID is the processed ID (preventing models spawned + at the same time by different processes to share the same model_name). E.g., ``"2021-06-14_09:53:32_torch_model_run_44607"``. work_dir Path of the working directory, where to save checkpoints and Tensorboard summaries. diff --git a/darts/models/forecasting/rnn_model.py b/darts/models/forecasting/rnn_model.py index ff0686a74d..826c177559 100644 --- a/darts/models/forecasting/rnn_model.py +++ b/darts/models/forecasting/rnn_model.py @@ -210,9 +210,9 @@ def __init__( Default: ``torch.nn.MSELoss()``. model_name Name of the model. Used for creating checkpoints and saving tensorboard data. If not specified, - defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of the - name is formatted with the local date and time, while PID is the processed ID (preventing models spawned at - the same time by different processes to share the same model_name). E.g., + defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of + the name is formatted with the local date and time, while PID is the processed ID (preventing models spawned + at the same time by different processes to share the same model_name). E.g., ``"2021-06-14_09:53:32_torch_model_run_44607"``. work_dir Path of the working directory, where to save checkpoints and Tensorboard summaries. diff --git a/darts/models/forecasting/tcn_model.py b/darts/models/forecasting/tcn_model.py index a43275d8f3..c6f4268287 100644 --- a/darts/models/forecasting/tcn_model.py +++ b/darts/models/forecasting/tcn_model.py @@ -338,9 +338,9 @@ def __init__( Default: ``torch.nn.MSELoss()``. model_name Name of the model. Used for creating checkpoints and saving tensorboard data. If not specified, - defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of the - name is formatted with the local date and time, while PID is the processed ID (preventing models spawned at - the same time by different processes to share the same model_name). E.g., + defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of + the name is formatted with the local date and time, while PID is the processed ID (preventing models spawned + at the same time by different processes to share the same model_name). E.g., ``"2021-06-14_09:53:32_torch_model_run_44607"``. work_dir Path of the working directory, where to save checkpoints and Tensorboard summaries. diff --git a/darts/models/forecasting/tft_model.py b/darts/models/forecasting/tft_model.py index 251fccb7de..7614a4ecc1 100644 --- a/darts/models/forecasting/tft_model.py +++ b/darts/models/forecasting/tft_model.py @@ -675,9 +675,9 @@ def __init__( Optionally, some keyword arguments for the PyTorch optimizer. model_name Name of the model. Used for creating checkpoints and saving tensorboard data. If not specified, - defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of the - name is formatted with the local date and time, while PID is the processed ID (preventing models spawned at - the same time by different processes to share the same model_name). E.g., + defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of + the name is formatted with the local date and time, while PID is the processed ID (preventing models spawned + at the same time by different processes to share the same model_name). E.g., ``"2021-06-14_09:53:32_torch_model_run_44607"``. work_dir Path of the working directory, where to save checkpoints and Tensorboard summaries. diff --git a/darts/models/forecasting/torch_forecasting_model.py b/darts/models/forecasting/torch_forecasting_model.py index 85120d02fd..e2093a51f3 100644 --- a/darts/models/forecasting/torch_forecasting_model.py +++ b/darts/models/forecasting/torch_forecasting_model.py @@ -136,9 +136,9 @@ def __init__( Default: ``torch.nn.MSELoss()``. model_name Name of the model. Used for creating checkpoints and saving tensorboard data. If not specified, - defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of the - name is formatted with the local date and time, while PID is the processed ID (preventing models spawned at - the same time by different processes to share the same model_name). E.g., + defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of + the name is formatted with the local date and time, while PID is the processed ID (preventing models spawned + at the same time by different processes to share the same model_name). E.g., ``"2021-06-14_09:53:32_torch_model_run_44607"``. work_dir Path of the working directory, where to save checkpoints and Tensorboard summaries. diff --git a/darts/models/forecasting/transformer_model.py b/darts/models/forecasting/transformer_model.py index b723367dea..d3e1761c2e 100644 --- a/darts/models/forecasting/transformer_model.py +++ b/darts/models/forecasting/transformer_model.py @@ -305,9 +305,9 @@ def __init__( Default: ``torch.nn.MSELoss()``. model_name Name of the model. Used for creating checkpoints and saving tensorboard data. If not specified, - defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of the - name is formatted with the local date and time, while PID is the processed ID (preventing models spawned at - the same time by different processes to share the same model_name). E.g., + defaults to the following string ``"YYYY-mm-dd_HH:MM:SS_torch_model_run_PID"``, where the initial part of + the name is formatted with the local date and time, while PID is the processed ID (preventing models spawned + at the same time by different processes to share the same model_name). E.g., ``"2021-06-14_09:53:32_torch_model_run_44607"``. work_dir Path of the working directory, where to save checkpoints and Tensorboard summaries. @@ -331,8 +331,8 @@ def __init__( References ---------- - .. [1] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and - Illia Polosukhin, "Attention Is All You Need", 2017. In Advances in Neural Information Processing Systems, + .. [1] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, + and Illia Polosukhin, "Attention Is All You Need", 2017. In Advances in Neural Information Processing Systems, pages 6000-6010. https://arxiv.org/abs/1706.03762. Notes diff --git a/darts/utils/data/encoders.py b/darts/utils/data/encoders.py index afc6b603fe..9a7a9e300b 100644 --- a/darts/utils/data/encoders.py +++ b/darts/utils/data/encoders.py @@ -178,10 +178,10 @@ class CyclicTemporalEncoder(SingleEncoder): - """CyclicTemporalEncoder: Cyclic index encoding for `TimeSeries` that have a time index of type `pd.DatetimeIndex`.""" - def __init__(self, index_generator: CovariateIndexGenerator, attribute: str): """ + Cyclic index encoding for `TimeSeries` that have a time index of type `pd.DatetimeIndex`. + Parameters ---------- index_generator diff --git a/darts/utils/data/utils.py b/darts/utils/data/utils.py index b910ee01e5..7d5a3fc5be 100644 --- a/darts/utils/data/utils.py +++ b/darts/utils/data/utils.py @@ -19,8 +19,8 @@ class CovariateType(Enum): def _get_matching_index(ts_target: TimeSeries, ts_covariate: TimeSeries, idx: int): """ - Given two overlapping series `ts_target` and `ts_covariate` and an index point `idx` of `ts_target`, returns the matching - index point in `ts_covariate`, based on the ending times of the two series. + Given two overlapping series `ts_target` and `ts_covariate` and an index point `idx` of `ts_target`, returns the + matching index point in `ts_covariate`, based on the ending times of the two series. The indices are starting from the end of the series. This function is used to jointly slice target and covariate series in datasets. It supports both datetime and diff --git a/darts/utils/statistics.py b/darts/utils/statistics.py index 1289e86c75..26ec49a16f 100644 --- a/darts/utils/statistics.py +++ b/darts/utils/statistics.py @@ -334,8 +334,9 @@ def stationarity_test_kpss( 'c' : The data is stationary around a constant (default). 'ct' : The data is stationary around a trend. nlags - Indicates the number of lags to be used. If 'auto' (default), lags is calculated using the data-dependent method of Hobijn et al. (1998). - See also Andrews (1991), Newey & West (1994), and Schwert (1989). If set to 'legacy', uses int(12 * (n / 100)**(1 / 4)) , as outlined in Schwert (1989). + Indicates the number of lags to be used. If 'auto' (default), lags is calculated using the data-dependent method + of Hobijn et al. (1998). See also Andrews (1991), Newey & West (1994), and Schwert (1989). If set to 'legacy', + uses int(12 * (n / 100)**(1 / 4)) , as outlined in Schwert (1989). Returns ------- @@ -465,7 +466,8 @@ def granger_causality_tests( if not ts_cause.has_same_time_as(ts_effect): logger.warning( - "ts_cause and ts_effect time series have different time index. We will slice-intersect ts_cause with ts_effect." + "ts_cause and ts_effect time series have different time index. " + "We will slice-intersect ts_cause with ts_effect." ) ts_cause = ts_cause.slice_intersect(ts_effect) diff --git a/darts/utils/timeseries_generation.py b/darts/utils/timeseries_generation.py index f9397c5041..13acf8cf47 100644 --- a/darts/utils/timeseries_generation.py +++ b/darts/utils/timeseries_generation.py @@ -573,8 +573,9 @@ def datetime_attribute_timeseries( Either a `pd.DatetimeIndex` attribute which will serve as the basis of the new column(s), or a `TimeSeries` whose time axis will serve this purpose. attribute - An attribute of `pd.DatetimeIndex`, or `week` / `weekofyear` / `week_of_year` - e.g. "month", "weekday", "day", "hour", "minute", "second". - See all available attributes in https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DatetimeIndex.html#pandas.DatetimeIndex . + An attribute of `pd.DatetimeIndex`, or `week` / `weekofyear` / `week_of_year` - e.g. "month", "weekday", "day", + "hour", "minute", "second". See all available attributes in + https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DatetimeIndex.html#pandas.DatetimeIndex. one_hot Boolean value indicating whether to add the specified attribute as a one hot encoding (results in more columns). diff --git a/examples/00-quickstart.ipynb b/examples/00-quickstart.ipynb index 97f0fdecb7..abb3ff0339 100644 --- a/examples/00-quickstart.ipynb +++ b/examples/00-quickstart.ipynb @@ -205,7 +205,9 @@ } ], "source": [ - "series_noise = TimeSeries.from_times_and_values(series.time_index, np.random.randn(len(series)))\n", + "series_noise = TimeSeries.from_times_and_values(\n", + " series.time_index, np.random.randn(len(series))\n", + ")\n", "(series / 2 + 20 * series_noise - 10).plot()" ] }, @@ -295,7 +297,7 @@ } ], "source": [ - "series.map(lambda ts, x: x/ts.days_in_month).plot()" + "series.map(lambda ts, x: x / ts.days_in_month).plot()" ] }, { @@ -324,7 +326,7 @@ } ], "source": [ - "(series / 20).add_datetime_attribute('month').plot()" + "(series / 20).add_datetime_attribute(\"month\").plot()" ] }, { @@ -353,7 +355,7 @@ } ], "source": [ - "(series / 200).add_holidays('US').plot()" + "(series / 200).add_holidays(\"US\").plot()" ] }, { @@ -420,8 +422,8 @@ "values[60:95] = np.nan\n", "series_ = TimeSeries.from_values(values)\n", "\n", - "(series_ - 10).plot(label='with missing values (shifted below)')\n", - "fill_missing_values(series_).plot(label='without missing values')" + "(series_ - 10).plot(label=\"with missing values (shifted below)\")\n", + "fill_missing_values(series_).plot(label=\"without missing values\")" ] }, { @@ -453,9 +455,9 @@ } ], "source": [ - "train, val = series.split_before(pd.Timestamp('19580101'))\n", - "train.plot(label='training')\n", - "val.plot(label='validation')" + "train, val = series.split_before(pd.Timestamp(\"19580101\"))\n", + "train.plot(label=\"training\")\n", + "val.plot(label=\"validation\")" ] }, { @@ -495,8 +497,8 @@ "naive_model.fit(train)\n", "naive_forecast = naive_model.predict(36)\n", "\n", - "series.plot(label='actual')\n", - "naive_forecast.plot(label='naive forecast (K=1)')" + "series.plot(label=\"actual\")\n", + "naive_forecast.plot(label=\"naive forecast (K=1)\")" ] }, { @@ -530,7 +532,7 @@ "source": [ "from darts.utils.statistics import plot_acf, check_seasonality\n", "\n", - "plot_acf(train, m=12, alpha=.05)" + "plot_acf(train, m=12, alpha=0.05)" ] }, { @@ -557,9 +559,9 @@ ], "source": [ "for m in range(2, 25):\n", - " is_seasonal, period = check_seasonality(train, m=m, alpha=.05)\n", + " is_seasonal, period = check_seasonality(train, m=m, alpha=0.05)\n", " if is_seasonal:\n", - " print('There is seasonality of order {}.'.format(period))" + " print(\"There is seasonality of order {}.\".format(period))" ] }, { @@ -593,8 +595,8 @@ "seasonal_model.fit(train)\n", "seasonal_forecast = seasonal_model.predict(36)\n", "\n", - "series.plot(label='actual')\n", - "seasonal_forecast.plot(label='naive forecast (K=12)')" + "series.plot(label=\"actual\")\n", + "seasonal_forecast.plot(label=\"naive forecast (K=12)\")" ] }, { @@ -632,8 +634,8 @@ "combined_forecast = drift_forecast + seasonal_forecast - train.last_value()\n", "\n", "series.plot()\n", - "combined_forecast.plot(label='combined')\n", - "drift_forecast.plot(label='drift')" + "combined_forecast.plot(label=\"combined\")\n", + "drift_forecast.plot(label=\"drift\")" ] }, { @@ -667,8 +669,11 @@ "source": [ "from darts.metrics import mape\n", "\n", - "print(\"Mean absolute percentage error for the combined naive drift + seasonal: {:.2f}%.\".format(\n", - " mape(series, combined_forecast)))" + "print(\n", + " \"Mean absolute percentage error for the combined naive drift + seasonal: {:.2f}%.\".format(\n", + " mape(series, combined_forecast)\n", + " )\n", + ")" ] }, { @@ -722,10 +727,12 @@ "source": [ "from darts.models import ExponentialSmoothing, Prophet, AutoARIMA, Theta\n", "\n", + "\n", "def eval_model(model):\n", " model.fit(train)\n", " forecast = model.predict(len(val))\n", - " print('model {} obtains MAPE: {:.2f}%'.format(model, mape(val, forecast)))\n", + " print(\"model {} obtains MAPE: {:.2f}%\".format(model, mape(val, forecast)))\n", + "\n", "\n", "eval_model(ExponentialSmoothing())\n", "eval_model(Prophet())\n", @@ -765,7 +772,7 @@ "# Search for the best theta parameter, by trying 50 different values\n", "thetas = 2 - np.linspace(-10, 10, 50)\n", "\n", - "best_mape = float('inf')\n", + "best_mape = float(\"inf\")\n", "best_theta = 0\n", "\n", "for theta in thetas:\n", @@ -773,7 +780,7 @@ " model.fit(train)\n", " pred_theta = model.predict(len(val))\n", " res = mape(val, pred_theta)\n", - " \n", + "\n", " if res < best_mape:\n", " best_mape = res\n", " best_theta = theta" @@ -799,7 +806,11 @@ "best_theta_model.fit(train)\n", "pred_best_theta = best_theta_model.predict(len(val))\n", "\n", - "print('The MAPE is: {:.2f}, with theta = {}.'.format(mape(val, pred_best_theta), best_theta))" + "print(\n", + " \"The MAPE is: {:.2f}, with theta = {}.\".format(\n", + " mape(val, pred_best_theta), best_theta\n", + " )\n", + ")" ] }, { @@ -821,9 +832,9 @@ } ], "source": [ - "train.plot(label='train')\n", - "val.plot(label='true')\n", - "pred_best_theta.plot(label='prediction')" + "train.plot(label=\"train\")\n", + "val.plot(label=\"true\")\n", + "pred_best_theta.plot(label=\"prediction\")" ] }, { @@ -885,14 +896,13 @@ } ], "source": [ - "historical_fcast_theta = best_theta_model.historical_forecasts(series, \n", - " start=0.6, \n", - " forecast_horizon=3, \n", - " verbose=True)\n", + "historical_fcast_theta = best_theta_model.historical_forecasts(\n", + " series, start=0.6, forecast_horizon=3, verbose=True\n", + ")\n", "\n", - "series.plot(label='data')\n", - "historical_fcast_theta.plot(label='backtest 3-months ahead forecast (Theta)')\n", - "print('MAPE = {:.2f}%'.format(mape(historical_fcast_theta, series)))" + "series.plot(label=\"data\")\n", + "historical_fcast_theta.plot(label=\"backtest 3-months ahead forecast (Theta)\")\n", + "print(\"MAPE = {:.2f}%\".format(mape(historical_fcast_theta, series)))" ] }, { @@ -941,15 +951,17 @@ "source": [ "best_theta_model = Theta(best_theta)\n", "\n", - "raw_errors = best_theta_model.backtest(series, \n", - " start=0.6,\n", - " forecast_horizon=3,\n", - " metric=mape,\n", - " reduction=None,\n", - " verbose=True)\n", + "raw_errors = best_theta_model.backtest(\n", + " series, start=0.6, forecast_horizon=3, metric=mape, reduction=None, verbose=True\n", + ")\n", "\n", "from darts.utils.statistics import plot_hist\n", - "plot_hist(raw_errors, bins=np.arange(0, max(raw_errors), 1), title='Individual backtest error scores (histogram)')" + "\n", + "plot_hist(\n", + " raw_errors,\n", + " bins=np.arange(0, max(raw_errors), 1),\n", + " title=\"Individual backtest error scores (histogram)\",\n", + ")" ] }, { @@ -987,12 +999,14 @@ } ], "source": [ - "average_error = best_theta_model.backtest(series, \n", - " start=0.6, \n", - " forecast_horizon=3,\n", - " metric=mape,\n", - " reduction=np.mean, # this is actually the default\n", - " verbose=True)\n", + "average_error = best_theta_model.backtest(\n", + " series,\n", + " start=0.6,\n", + " forecast_horizon=3,\n", + " metric=mape,\n", + " reduction=np.mean, # this is actually the default\n", + " verbose=True,\n", + ")\n", "\n", "print(\"Average error (MAPE) over all historical forecasts: %.2f\" % average_error)" ] @@ -1089,14 +1103,13 @@ ], "source": [ "model_es = ExponentialSmoothing()\n", - "historical_fcast_es = model_es.historical_forecasts(series, \n", - " start=0.6, \n", - " forecast_horizon=3, \n", - " verbose=True)\n", + "historical_fcast_es = model_es.historical_forecasts(\n", + " series, start=0.6, forecast_horizon=3, verbose=True\n", + ")\n", "\n", - "series.plot(label='data')\n", - "historical_fcast_es.plot(label='backtest 3-months ahead forecast (Exp. Smoothing)')\n", - "print('MAPE = {:.2f}%'.format(mape(historical_fcast_es, series)))" + "series.plot(label=\"data\")\n", + "historical_fcast_es.plot(label=\"backtest 3-months ahead forecast (Exp. Smoothing)\")\n", + "print(\"MAPE = {:.2f}%\".format(mape(historical_fcast_es, series)))" ] }, { @@ -1187,8 +1200,10 @@ "train_air, val_air = series_air[:-36], series_air[-36:]\n", "train_milk, val_milk = series_milk[:-36], series_milk[-36:]\n", "\n", - "train_air.plot(); val_air.plot()\n", - "train_milk.plot(); val_milk.plot()" + "train_air.plot()\n", + "val_air.plot()\n", + "train_milk.plot()\n", + "val_milk.plot()" ] }, { @@ -1290,9 +1305,7 @@ "source": [ "from darts.models import NBEATSModel\n", "\n", - "model = NBEATSModel(input_chunk_length=24,\n", - " output_chunk_length=12,\n", - " random_state=42)\n", + "model = NBEATSModel(input_chunk_length=24, output_chunk_length=12, random_state=42)\n", "\n", "model.fit([train_air_scaled, train_milk_scaled], epochs=50, verbose=True)" ] @@ -1329,11 +1342,11 @@ "# scale back:\n", "pred_air, pred_milk = scaler.inverse_transform([pred_air, pred_milk])\n", "\n", - "plt.figure(figsize=(10,6))\n", - "series_air.plot(label='actual (air)')\n", - "series_milk.plot(label='actual (milk)')\n", - "pred_air.plot(label='forecast (air)')\n", - "pred_milk.plot(label='forecast (milk)')" + "plt.figure(figsize=(10, 6))\n", + "series_air.plot(label=\"actual (air)\")\n", + "series_milk.plot(label=\"actual (milk)\")\n", + "pred_air.plot(label=\"forecast (air)\")\n", + "pred_milk.plot(label=\"forecast (milk)\")" ] }, { @@ -1390,16 +1403,26 @@ "from darts import concatenate\n", "from darts.utils.timeseries_generation import datetime_attribute_timeseries as dt_attr\n", "\n", - "air_covs = concatenate([dt_attr(series_air.time_index, 'month', dtype=np.float32) / 12,\n", - " (dt_attr(series_air.time_index, 'year', dtype=np.float32) - 1948) / 12],\n", - " axis='component')\n", - "\n", - "milk_covs = concatenate([dt_attr(series_milk.time_index, 'month', dtype=np.float32) / 12,\n", - " (dt_attr(series_milk.time_index, 'year', dtype=np.float32) - 1962) / 13],\n", - " axis='component')\n", + "air_covs = concatenate(\n", + " [\n", + " dt_attr(series_air.time_index, \"month\", dtype=np.float32) / 12,\n", + " (dt_attr(series_air.time_index, \"year\", dtype=np.float32) - 1948) / 12,\n", + " ],\n", + " axis=\"component\",\n", + ")\n", + "\n", + "milk_covs = concatenate(\n", + " [\n", + " dt_attr(series_milk.time_index, \"month\", dtype=np.float32) / 12,\n", + " (dt_attr(series_milk.time_index, \"year\", dtype=np.float32) - 1962) / 13,\n", + " ],\n", + " axis=\"component\",\n", + ")\n", "\n", "air_covs.plot()\n", - "plt.title('one multivariate time series of 2 dimensions, containing covariates for the air series:');" + "plt.title(\n", + " \"one multivariate time series of 2 dimensions, containing covariates for the air series:\"\n", + ");" ] }, { @@ -1449,14 +1472,14 @@ } ], "source": [ - "model = NBEATSModel(input_chunk_length=24,\n", - " output_chunk_length=12,\n", - " random_state=42)\n", + "model = NBEATSModel(input_chunk_length=24, output_chunk_length=12, random_state=42)\n", "\n", - "model.fit([train_air_scaled, train_milk_scaled],\n", - " past_covariates=[air_covs, milk_covs],\n", - " epochs=50,\n", - " verbose=True)" + "model.fit(\n", + " [train_air_scaled, train_milk_scaled],\n", + " past_covariates=[air_covs, milk_covs],\n", + " epochs=50,\n", + " verbose=True,\n", + ")" ] }, { @@ -1485,21 +1508,17 @@ } ], "source": [ - "pred_air = model.predict(series=train_air_scaled,\n", - " past_covariates=air_covs,\n", - " n=36)\n", - "pred_milk = model.predict(series=train_milk_scaled,\n", - " past_covariates=milk_covs,\n", - " n=36)\n", + "pred_air = model.predict(series=train_air_scaled, past_covariates=air_covs, n=36)\n", + "pred_milk = model.predict(series=train_milk_scaled, past_covariates=milk_covs, n=36)\n", "\n", "# scale back:\n", "pred_air, pred_milk = scaler.inverse_transform([pred_air, pred_milk])\n", "\n", - "plt.figure(figsize=(10,6))\n", - "series_air.plot(label='actual (air)')\n", - "series_milk.plot(label='actual (milk)')\n", - "pred_air.plot(label='forecast (air)')\n", - "pred_milk.plot(label='forecast (milk)')" + "plt.figure(figsize=(10, 6))\n", + "series_air.plot(label=\"actual (air)\")\n", + "series_milk.plot(label=\"actual (milk)\")\n", + "pred_air.plot(label=\"forecast (air)\")\n", + "pred_milk.plot(label=\"forecast (milk)\")" ] }, { @@ -1527,11 +1546,11 @@ "outputs": [], "source": [ "encoders = {\n", - " 'cyclic': {'future': ['month']},\n", - " 'datetime_attribute': {'future': ['hour', 'dayofweek']},\n", - " 'position': {'past': ['absolute'], 'future': ['relative']},\n", - " 'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},\n", - " 'transformer': Scaler()\n", + " \"cyclic\": {\"future\": [\"month\"]},\n", + " \"datetime_attribute\": {\"future\": [\"hour\", \"dayofweek\"]},\n", + " \"position\": {\"past\": [\"absolute\"], \"future\": [\"relative\"]},\n", + " \"custom\": {\"past\": [lambda idx: (idx.year - 1950) / 50]},\n", + " \"transformer\": Scaler(),\n", "}" ] }, @@ -1559,10 +1578,7 @@ "metadata": {}, "outputs": [], "source": [ - "encoders = {\n", - " 'datetime_attribute': {'past': ['month', 'year']},\n", - " 'transformer': Scaler()\n", - "}" + "encoders = {\"datetime_attribute\": {\"past\": [\"month\", \"year\"]}, \"transformer\": Scaler()}" ] }, { @@ -1612,14 +1628,14 @@ } ], "source": [ - "model = NBEATSModel(input_chunk_length=24,\n", - " output_chunk_length=12,\n", - " add_encoders=encoders,\n", - " random_state=42)\n", + "model = NBEATSModel(\n", + " input_chunk_length=24,\n", + " output_chunk_length=12,\n", + " add_encoders=encoders,\n", + " random_state=42,\n", + ")\n", "\n", - "model.fit([train_air_scaled, train_milk_scaled],\n", - " epochs=50,\n", - " verbose=True)" + "model.fit([train_air_scaled, train_milk_scaled], epochs=50, verbose=True)" ] }, { @@ -1648,15 +1664,14 @@ } ], "source": [ - "pred_air = model.predict(series=train_air_scaled,\n", - " n=36)\n", + "pred_air = model.predict(series=train_air_scaled, n=36)\n", "\n", "# scale back:\n", "pred_air = scaler.inverse_transform(pred_air)\n", "\n", - "plt.figure(figsize=(10,6))\n", - "series_air.plot(label='actual (air)')\n", - "pred_air.plot(label='forecast (air)')" + "plt.figure(figsize=(10, 6))\n", + "series_air.plot(label=\"actual (air)\")\n", + "pred_air.plot(label=\"forecast (air)\")" ] }, { @@ -1685,12 +1700,11 @@ "from darts.models import RegressionModel\n", "from sklearn.linear_model import BayesianRidge\n", "\n", - "model = RegressionModel(lags=72, \n", - " lags_future_covariates=[-6, 0],\n", - " model=BayesianRidge())\n", + "model = RegressionModel(lags=72, lags_future_covariates=[-6, 0], model=BayesianRidge())\n", "\n", - "model.fit([train_air_scaled, train_milk_scaled],\n", - " future_covariates=[air_covs, milk_covs])" + "model.fit(\n", + " [train_air_scaled, train_milk_scaled], future_covariates=[air_covs, milk_covs]\n", + ")" ] }, { @@ -1725,18 +1739,20 @@ } ], "source": [ - "pred_air, pred_milk = model.predict(series=[train_air_scaled, train_milk_scaled],\n", - " future_covariates=[air_covs, milk_covs],\n", - " n=36)\n", + "pred_air, pred_milk = model.predict(\n", + " series=[train_air_scaled, train_milk_scaled],\n", + " future_covariates=[air_covs, milk_covs],\n", + " n=36,\n", + ")\n", "\n", "# scale back:\n", "pred_air, pred_milk = scaler.inverse_transform([pred_air, pred_milk])\n", "\n", - "plt.figure(figsize=(10,6))\n", - "series_air.plot(label='actual (air)')\n", - "series_milk.plot(label='actual (milk)')\n", - "pred_air.plot(label='forecast (air)')\n", - "pred_milk.plot(label='forecast (milk)')" + "plt.figure(figsize=(10, 6))\n", + "series_air.plot(label=\"actual (air)\")\n", + "series_milk.plot(label=\"actual (milk)\")\n", + "pred_air.plot(label=\"forecast (air)\")\n", + "pred_milk.plot(label=\"forecast (milk)\")" ] }, { @@ -1790,8 +1806,7 @@ } ], "source": [ - "mape([series_air, series_milk], [pred_air, pred_milk], \n", - " inter_reduction=np.mean)" + "mape([series_air, series_milk], [pred_air, pred_milk], inter_reduction=np.mean)" ] }, { @@ -1843,17 +1858,15 @@ } ], "source": [ - "bayes_ridge_model = RegressionModel(lags=72, \n", - " lags_future_covariates=[0],\n", - " model=BayesianRidge())\n", + "bayes_ridge_model = RegressionModel(\n", + " lags=72, lags_future_covariates=[0], model=BayesianRidge()\n", + ")\n", "\n", - "backtest = bayes_ridge_model.historical_forecasts(series_air,\n", - " future_covariates=air_covs,\n", - " start=0.6,\n", - " forecast_horizon=3,\n", - " verbose=True)\n", + "backtest = bayes_ridge_model.historical_forecasts(\n", + " series_air, future_covariates=air_covs, start=0.6, forecast_horizon=3, verbose=True\n", + ")\n", "\n", - "print('MAPE = %.2f' % (mape(backtest, series_air)))\n", + "print(\"MAPE = %.2f\" % (mape(backtest, series_air)))\n", "series_air.plot()\n", "backtest.plot()" ] @@ -1899,8 +1912,8 @@ "model_es.fit(train)\n", "probabilistic_forecast = model_es.predict(len(val), num_samples=500)\n", "\n", - "series.plot(label='actual')\n", - "probabilistic_forecast.plot(label='probabilistic forecast')\n", + "series.plot(label=\"actual\")\n", + "probabilistic_forecast.plot(label=\"probabilistic forecast\")\n", "plt.legend()\n", "plt.show()" ] @@ -1962,10 +1975,12 @@ "from darts.models import TCNModel\n", "from darts.utils.likelihood_models import LaplaceLikelihood\n", "\n", - "model = TCNModel(input_chunk_length=24,\n", - " output_chunk_length=12,\n", - " random_state=42,\n", - " likelihood=LaplaceLikelihood())\n", + "model = TCNModel(\n", + " input_chunk_length=24,\n", + " output_chunk_length=12,\n", + " random_state=42,\n", + " likelihood=LaplaceLikelihood(),\n", + ")\n", "\n", "model.fit(train_air_scaled, epochs=400, verbose=True)" ] @@ -2057,10 +2072,12 @@ } ], "source": [ - "model = TCNModel(input_chunk_length=24,\n", - " output_chunk_length=12,\n", - " random_state=42,\n", - " likelihood=LaplaceLikelihood(prior_b=.1))\n", + "model = TCNModel(\n", + " input_chunk_length=24,\n", + " output_chunk_length=12,\n", + " random_state=42,\n", + " likelihood=LaplaceLikelihood(prior_b=0.1),\n", + ")\n", "\n", "model.fit(train_air_scaled, epochs=400, verbose=True)" ] @@ -2119,8 +2136,8 @@ } ], "source": [ - "pred.plot(low_quantile=0.01, high_quantile=0.99, label='1-99th percentiles')\n", - "pred.plot(low_quantile=0.2, high_quantile=0.8, label='20-80th percentiles')" + "pred.plot(low_quantile=0.01, high_quantile=0.99, label=\"1-99th percentiles\")\n", + "pred.plot(low_quantile=0.2, high_quantile=0.8, label=\"20-80th percentiles\")" ] }, { @@ -2157,10 +2174,10 @@ "source": [ "from darts.metrics import rho_risk\n", "\n", - "print('MAPE of median forecast: %.2f' % mape(series_air, pred))\n", + "print(\"MAPE of median forecast: %.2f\" % mape(series_air, pred))\n", "for rho in [0.05, 0.1, 0.5, 0.9, 0.95]:\n", " rr = rho_risk(series_air, pred, rho=rho)\n", - " print('rho-risk at quantile %.2f: %.2f' % (rho, rr))" + " print(\"rho-risk at quantile %.2f: %.2f\" % (rho, rr))" ] }, { @@ -2217,10 +2234,12 @@ "source": [ "from darts.utils.likelihood_models import QuantileRegression\n", "\n", - "model = TCNModel(input_chunk_length=24,\n", - " output_chunk_length=12,\n", - " random_state=42,\n", - " likelihood=QuantileRegression([0.05, 0.1, 0.5, 0.9, 0.95]))\n", + "model = TCNModel(\n", + " input_chunk_length=24,\n", + " output_chunk_length=12,\n", + " random_state=42,\n", + " likelihood=QuantileRegression([0.05, 0.1, 0.5, 0.9, 0.95]),\n", + ")\n", "\n", "model.fit(train_air_scaled, epochs=400, verbose=True)" ] @@ -2264,10 +2283,10 @@ "series_air.plot()\n", "pred.plot()\n", "\n", - "print('MAPE of median forecast: %.2f' % mape(series_air, pred))\n", + "print(\"MAPE of median forecast: %.2f\" % mape(series_air, pred))\n", "for rho in [0.05, 0.1, 0.5, 0.9, 0.95]:\n", " rr = rho_risk(series_air, pred, rho=rho)\n", - " print('rho-risk at quantile %.2f: %.2f' % (rho, rr))" + " print(\"rho-risk at quantile %.2f: %.2f\" % (rho, rr))" ] }, { @@ -2335,12 +2354,11 @@ "\n", "ensemble_model = NaiveEnsembleModel(models=models)\n", "\n", - "backtest = ensemble_model.historical_forecasts(series_air,\n", - " start=0.6,\n", - " forecast_horizon=3,\n", - " verbose=True)\n", + "backtest = ensemble_model.historical_forecasts(\n", + " series_air, start=0.6, forecast_horizon=3, verbose=True\n", + ")\n", "\n", - "print('MAPE = %.2f' % (mape(backtest, series_air)))\n", + "print(\"MAPE = %.2f\" % (mape(backtest, series_air)))\n", "series_air.plot()\n", "backtest.plot()" ] @@ -2407,15 +2425,15 @@ "\n", "models = [NaiveDrift(), NaiveSeasonal(12)]\n", "\n", - "ensemble_model = RegressionEnsembleModel(forecasting_models=models, \n", - " regression_train_n_points=12)\n", + "ensemble_model = RegressionEnsembleModel(\n", + " forecasting_models=models, regression_train_n_points=12\n", + ")\n", "\n", - "backtest = ensemble_model.historical_forecasts(series_air,\n", - " start=0.6,\n", - " forecast_horizon=3,\n", - " verbose=True)\n", + "backtest = ensemble_model.historical_forecasts(\n", + " series_air, start=0.6, forecast_horizon=3, verbose=True\n", + ")\n", "\n", - "print('MAPE = %.2f' % (mape(backtest, series_air)))\n", + "print(\"MAPE = %.2f\" % (mape(backtest, series_air)))\n", "series_air.plot()\n", "backtest.plot()" ] @@ -2531,7 +2549,7 @@ "\n", "kernel = RBF()\n", "\n", - "gpf = GaussianProcessFilter(kernel=kernel, alpha=.1, normalize_y=True)\n", + "gpf = GaussianProcessFilter(kernel=kernel, alpha=0.1, normalize_y=True)\n", "filtered_series = gpf.filter(series_holes, num_samples=100)\n", "\n", "filtered_series.plot()" From 872e3fc0c4271948643bd85c7cd1c41bdcde5bab Mon Sep 17 00:00:00 2001 From: Tomas Van Pottelbergh <tomas.vanpottelbergh@unit8.co> Date: Fri, 21 Jan 2022 10:11:18 +0100 Subject: [PATCH 11/11] Fix E501 --- darts/datasets/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/darts/datasets/__init__.py b/darts/datasets/__init__.py index 25f8c0e2de..c0a81829b9 100644 --- a/darts/datasets/__init__.py +++ b/darts/datasets/__init__.py @@ -207,7 +207,8 @@ class TaylorDataset(DatasetLoaderCSV): References ---------- - .. [1] Taylor, J.W. (2003) Short-term electricity demand forecasting using double seasonal exponential smoothing. Journal of the Operational Research Society, 54, 799-805. + .. [1] Taylor, J.W. (2003) Short-term electricity demand forecasting using double seasonal exponential smoothing. + Journal of the Operational Research Society, 54, 799-805. .. [2] https://www.rdocumentation.org/packages/forecast/versions/8.13/topics/taylor """