diff --git a/.github/workflows/mxnet_nightly.yml b/.github/workflows/mxnet_nightly.yml index beec24bf6a..75f29f4f29 100644 --- a/.github/workflows/mxnet_nightly.yml +++ b/.github/workflows/mxnet_nightly.yml @@ -36,4 +36,4 @@ jobs: -r requirements/requirements-extras-autogluon.txt - name: Test with pytest run: | - pytest -m 'not (gpu or serial)' --cov src/gluonts --cov-report=term --cov-report xml test + pytest -m 'not (gpu or serial)' --cov=gluonts --cov-report=term --cov-report xml test diff --git a/.github/workflows/test_release_unix_nightly.yml b/.github/workflows/test_release_unix_nightly.yml index b602bc069a..f058fafcaa 100644 --- a/.github/workflows/test_release_unix_nightly.yml +++ b/.github/workflows/test_release_unix_nightly.yml @@ -36,4 +36,4 @@ jobs: - name: Test with pytest run: | cd gluon-ts - pytest -m 'not (gpu or serial)' --cov src/gluonts --cov-report=term --cov-report xml test + pytest -m 'not (gpu or serial)' --cov=gluonts --cov-report=term --cov-report xml test diff --git a/.github/workflows/test_release_win32_nightly.yml b/.github/workflows/test_release_win32_nightly.yml index 35a416182a..b9cda3fb92 100644 --- a/.github/workflows/test_release_win32_nightly.yml +++ b/.github/workflows/test_release_win32_nightly.yml @@ -37,4 +37,4 @@ jobs: - name: Test with pytest run: | cd gluon-ts - pytest -m 'not (gpu or serial)' --cov src/gluonts --cov-report=term --cov-report xml test + pytest -m 'not (gpu or serial)' --cov=gluonts --cov-report=term --cov-report xml test diff --git a/pyproject.toml b/pyproject.toml index 8a19a453e2..302a8657b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,7 @@ line-length = 79 minversion = "6.0" timeout = 60 addopts = """ + --color=yes --doctest-modules --ignore src/gluonts/block.py --ignore src/gluonts/distribution.py diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 88205bcaf8..3aa4a861c1 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -1,9 +1,9 @@ -pandas>=1.1 -flaky~=3.6 -pytest-cov==2.6.* -pytest-timeout~=1.3 -pytest-xdist~=1.27 -pytest>=6.0 +pandas >=1.1 +pytest >7.0 +pytest-cov >4.0,<5.0 +pytest-timeout >2.0, <3.0 +pytest-xdist >3.0, <4.0 +pytest-rerunfailures >=13.0, <14.0 ujson orjson requests diff --git a/test/ext/naive_2/test_predictors.py b/test/ext/naive_2/test_predictors.py index 225a895f29..0272b9bd5e 100644 --- a/test/ext/naive_2/test_predictors.py +++ b/test/ext/naive_2/test_predictors.py @@ -17,7 +17,6 @@ import numpy as np import pandas as pd import pytest -from flaky import flaky from gluonts.dataset.artificial import constant_dataset from gluonts.dataset.common import Dataset @@ -106,7 +105,7 @@ def test_predictor(make_predictor, freq: str): CONSTANT_DATASET_PREDICTION_LENGTH = dataset_info.prediction_length -@flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize( "predictor, accuracy", [ diff --git a/test/mx/distribution/test_distribution_sampling.py b/test/mx/distribution/test_distribution_sampling.py index 80b61882ce..46b97c83a7 100644 --- a/test/mx/distribution/test_distribution_sampling.py +++ b/test/mx/distribution/test_distribution_sampling.py @@ -14,7 +14,6 @@ import mxnet as mx import numpy as np import pytest -from flaky import flaky from gluonts.core.serde import dump_json, load_json from gluonts.mx.model.tpp.distribution import Loglogistic, Weibull @@ -149,7 +148,7 @@ @pytest.mark.parametrize("distr_class, params", test_cases) @pytest.mark.parametrize("serialize_fn", serialize_fn_list) -@flaky +@pytest.mark.flaky(retries=3) def test_sampling(distr_class, params, serialize_fn) -> None: distr = distr_class(**params) distr = serialize_fn(distr) @@ -205,7 +204,7 @@ def test_sampling(distr_class, params, serialize_fn) -> None: ] -@flaky(min_passes=1, max_runs=3) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("distr, params, dim", test_cases_multivariate) @pytest.mark.parametrize("serialize_fn", serialize_fn_list) def test_multivariate_sampling(distr, params, dim, serialize_fn) -> None: @@ -261,7 +260,7 @@ def test_piecewise_linear_sampling(distr, params, serialize_fn): assert samples.shape == (num_samples, 2) -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("alpha, beta", [(0.3, 0.9), (1.5, 1.7)]) @pytest.mark.parametrize("zero_probability, one_probability", [(0.1, 0.2)]) def test_inflated_beta_sampling( diff --git a/test/mx/distribution/test_mixture.py b/test/mx/distribution/test_mixture.py index b868512e2f..7b02ac94c9 100644 --- a/test/mx/distribution/test_mixture.py +++ b/test/mx/distribution/test_mixture.py @@ -114,6 +114,8 @@ def diff(x: np.ndarray, y: np.ndarray) -> np.ndarray: def test_mixture( distr1: Distribution, distr2: Distribution, p: Tensor, serialize_fn ) -> None: + np.random.seed(0) + mx.random.seed(0) # sample from component distributions, and select samples samples1 = distr1.sample(num_samples=NUM_SAMPLES_LARGE) samples2 = distr2.sample(num_samples=NUM_SAMPLES_LARGE) diff --git a/test/mx/distribution/test_mx_distribution_inference.py b/test/mx/distribution/test_mx_distribution_inference.py index d347d0b35c..21ca98d86f 100644 --- a/test/mx/distribution/test_mx_distribution_inference.py +++ b/test/mx/distribution/test_mx_distribution_inference.py @@ -15,6 +15,7 @@ Test that maximizing likelihood allows to correctly recover distribution parameters for all distributions exposed to the user. """ +import random from functools import reduce from typing import List, Tuple @@ -536,7 +537,7 @@ def test_dirichlet_multinomial(hybridize: bool) -> None: ), f"Covariance did not match: cov = {cov}, cov_hat = {cov_hat}" -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("hybridize", [True, False]) @pytest.mark.parametrize("rank", [0, 1]) def test_lowrank_multivariate_gaussian(hybridize: bool, rank: int) -> None: @@ -604,7 +605,7 @@ def test_lowrank_multivariate_gaussian(hybridize: bool, rank: int) -> None: ), f"sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}" -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("hybridize", [True, False]) def test_empirical_distribution(hybridize: bool) -> None: r""" @@ -757,6 +758,9 @@ def test_neg_binomial(mu_alpha: Tuple[float, float], hybridize: bool) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ + random.seed(0) + np.random.seed(0) + mx.random.seed(0) # test instance mu, alpha = mu_alpha @@ -1243,7 +1247,7 @@ def test_genpareto_likelihood(xi: float, beta: float, hybridize: bool) -> None: @pytest.mark.timeout(120) -@pytest.mark.flaky(max_runs=6, min_passes=1) +@pytest.mark.flaky(retries=6) @pytest.mark.parametrize("rate", [50.0]) @pytest.mark.parametrize("zero_probability", [0.8, 0.2, 0.01]) @pytest.mark.parametrize("hybridize", [False, True]) @@ -1255,8 +1259,11 @@ def test_inflated_poisson_likelihood( """ Test to check that maximizing the likelihood recovers the parameters """ + random.seed(0) + np.random.seed(0) + mx.random.seed(0) # generate samples - num_samples = 1000 # Required for convergence + num_samples = 2000 # Required for convergence distr = ZeroInflatedPoissonOutput().distribution( distr_args=[ @@ -1291,7 +1298,7 @@ def test_inflated_poisson_likelihood( @pytest.mark.timeout(150) -@pytest.mark.flaky(max_runs=6, min_passes=1) +@pytest.mark.flaky(retries=6) @pytest.mark.parametrize("mu", [5.0]) @pytest.mark.parametrize("alpha", [0.05]) @pytest.mark.parametrize("zero_probability", [0.3]) diff --git a/test/mx/model/gpvar/test_gpvar.py b/test/mx/model/gpvar/test_gpvar.py index 26714bff7b..0eba39a1d4 100644 --- a/test/mx/model/gpvar/test_gpvar.py +++ b/test/mx/model/gpvar/test_gpvar.py @@ -14,7 +14,6 @@ import mxnet as mx import pytest -from flaky import flaky from gluonts.dataset.artificial import constant_dataset from gluonts.dataset.common import TrainDatasets @@ -93,7 +92,7 @@ def test_gpvar_proj(): assert distr.mean.shape == (batch, dim) -@flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("hybridize", [True, False]) @pytest.mark.parametrize("target_dim_sample", [None, 2]) @pytest.mark.parametrize("use_marginal_transformation", [True, False]) diff --git a/test/mx/model/simple_feedforward/test_model.py b/test/mx/model/simple_feedforward/test_model.py index ba51109991..a9df665546 100644 --- a/test/mx/model/simple_feedforward/test_model.py +++ b/test/mx/model/simple_feedforward/test_model.py @@ -31,7 +31,7 @@ def hyperparameters(): ) -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("hybridize", [True, False]) @pytest.mark.parametrize("sampling", [True, False]) def test_accuracy(accuracy_test, hyperparameters, hybridize, sampling): diff --git a/test/mx/model/transformer/test_model.py b/test/mx/model/transformer/test_model.py index 0c115a8bf3..0b96c74ffb 100644 --- a/test/mx/model/transformer/test_model.py +++ b/test/mx/model/transformer/test_model.py @@ -32,7 +32,7 @@ def hyperparameters(): ) -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("hybridize", [True, False]) def test_accuracy(accuracy_test, hyperparameters, hybridize): hyperparameters.update(num_batches_per_epoch=80, hybridize=hybridize) diff --git a/test/torch/model/test_estimators.py b/test/torch/model/test_estimators.py index b2317549c4..3c2ef3ea51 100644 --- a/test/torch/model/test_estimators.py +++ b/test/torch/model/test_estimators.py @@ -19,6 +19,7 @@ import pytest import pandas as pd import numpy as np +from lightning import seed_everything from gluonts.dataset.repository import get_dataset from gluonts.model.predictor import Predictor @@ -309,6 +310,7 @@ def test_estimator_constant_dataset( ], ) def test_estimator_with_features(estimator_constructor): + seed_everything(42) freq = "1h" prediction_length = 12 diff --git a/test/torch/modules/test_torch_distribution_inference.py b/test/torch/modules/test_torch_distribution_inference.py index 2a6b7a64e7..8e9e617a8b 100644 --- a/test/torch/modules/test_torch_distribution_inference.py +++ b/test/torch/modules/test_torch_distribution_inference.py @@ -19,6 +19,8 @@ import numpy as np import pytest +from lightning import seed_everything + import torch import torch.nn as nn from scipy.special import softmax @@ -119,7 +121,7 @@ def compare_logits( ).all(), f"logits did not match: logits_true = {param_true}, logits_hat = {param_hat}" -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("concentration1, concentration0", [(3.75, 1.25)]) def test_beta_likelihood(concentration1: float, concentration0: float) -> None: """ @@ -158,7 +160,7 @@ def test_beta_likelihood(concentration1: float, concentration0: float) -> None: ), f"concentration0 did not match: concentration0 = {concentration0}, concentration0_hat = {concentration0_hat}" -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("concentration, rate", [(3.75, 1.25)]) def test_gamma_likelihood(concentration: float, rate: float) -> None: """ @@ -193,7 +195,7 @@ def test_gamma_likelihood(concentration: float, rate: float) -> None: ), f"rate did not match: rate = {rate}, rate_hat = {rate_hat}" -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("loc, scale,", [(1.0, 0.1)]) def test_normal_likelihood(loc: float, scale: float): locs = torch.zeros((NUM_SAMPLES,)) + loc @@ -223,7 +225,7 @@ def test_normal_likelihood(loc: float, scale: float): ), f"scale did not match: scale = {scale}, scale_hat = {scale_hat}" -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("df, loc, scale,", [(6.0, 2.3, 0.7)]) def test_studentT_likelihood(df: float, loc: float, scale: float): dfs = torch.zeros((NUM_SAMPLES,)) + df @@ -258,7 +260,7 @@ def test_studentT_likelihood(df: float, loc: float, scale: float): ), f"scale did not match: scale = {scale}, scale_hat = {scale_hat}" -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(retries=3) @pytest.mark.parametrize("rate", [1.0]) def test_poisson(rate: float) -> None: """ @@ -297,6 +299,7 @@ def test_neg_binomial(total_count: float, logit: float) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ + seed_everything(42) # generate samples total_counts = torch.zeros((NUM_SAMPLES,)) + total_count logits = torch.zeros((NUM_SAMPLES,)) + logit