Clean TODOs (#1522)

* skipping * fix mypy * Apply suggestions from code review * reduce timeout * remove 11.1 rerun pytest * remove 11.1 rerun pytest in tests * version * try fix * another try * maxfail * retry * try again * less than 11.1 * try removing problematic * increase port * revert changes * add to more metrics * stupid mistake * missing type * fix todos --------- Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Jirka <jirka.borovec@seznam.cz>
Lightning-AI · Feb 22, 2023 · 283d55c · 283d55c
1 parent e0508c8
commit 283d55c
Show file tree

Hide file tree

Showing 17 changed files with 121 additions and 87 deletions.
diff --git a/src/torchmetrics/metric.py b/src/torchmetrics/metric.py
@@ -220,7 +220,7 @@ def add_state(
  elif dist_reduce_fx == "cat":
  dist_reduce_fx = dim_zero_cat
  elif dist_reduce_fx is not None and not callable(dist_reduce_fx):
- raise ValueError("`dist_reduce_fx` must be callable or one of ['mean', 'sum', 'cat', None]")
+ raise ValueError("`dist_reduce_fx` must be callable or one of ['mean', 'sum', 'cat', 'min', 'max', None]")
 
  if isinstance(default, Tensor):
  default = default.contiguous()

diff --git a/src/torchmetrics/regression/tweedie_deviance.py b/src/torchmetrics/regression/tweedie_deviance.py
@@ -70,7 +70,7 @@ class TweedieDevianceScore(Metric):
  tensor(1.2083)
  """
  is_differentiable: bool = True
- higher_is_better = None # TODO: both -1 and 1 are optimal
+ higher_is_better = None
  full_state_update: bool = False
  sum_deviance_score: Tensor
  num_observations: Tensor

diff --git a/tests/unittests/bases/test_collections.py b/tests/unittests/bases/test_collections.py
@@ -105,11 +105,11 @@ def test_metric_collection_wrong_input(tmpdir):
  dms = DummyMetricSum()
 
  # Not all input are metrics (list)
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="Input .* to `MetricCollection` is not a instance of .*"):
  _ = MetricCollection([dms, 5])
 
  # Not all input are metrics (dict)
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="Value .* belonging to key .* is not an instance of .*"):
  _ = MetricCollection({"metric1": dms, "metric2": 5})
 
  # Same metric passed in multiple times

diff --git a/tests/unittests/bases/test_metric.py b/tests/unittests/bases/test_metric.py
@@ -69,16 +69,16 @@ def test_add_state():
  a.add_state("c", tensor(0), "cat")
  assert a._reductions["c"]([tensor([1]), tensor([1])]).shape == (2,)
 
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="`dist_reduce_fx` must be callable or one of .*"):
  a.add_state("d1", tensor(0), "xyz")
 
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="`dist_reduce_fx` must be callable or one of .*"):
  a.add_state("d2", tensor(0), 42)
 
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="state variable must be a tensor or any empty list .*"):
  a.add_state("d3", [tensor(0)], "sum")
 
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="state variable must be a tensor or any empty list .*"):
  a.add_state("d4", 42, "sum")
 
  def custom_fx(_):

diff --git a/tests/unittests/image/test_d_lambda.py b/tests/unittests/image/test_d_lambda.py
@@ -124,22 +124,24 @@ def test_d_lambda_half_gpu(self, preds, target, p):
 
 
 @pytest.mark.parametrize(
- ("preds", "target", "p"),
+ ("preds", "target", "p", "match"),
  [
- ([1, 16, 16], [1, 16, 16], 1), # len(shape)
- ([1, 1, 16, 16], [1, 1, 16, 16], 0), # invalid p
- ([1, 1, 16, 16], [1, 1, 16, 16], -1), # invalid p
+ ([1, 16, 16], [1, 16, 16], 1, "Expected `preds` and `target` to have BxCxHxW shape.*"), # len(shape)
+ ([1, 1, 16, 16], [1, 1, 16, 16], 0, "Expected `p` to be a positive integer. Got p: 0."), # invalid p
+ ([1, 1, 16, 16], [1, 1, 16, 16], -1, "Expected `p` to be a positive integer. Got p: -1."), # invalid p
  ],
 )
-def test_d_lambda_invalid_inputs(preds, target, p):
+def test_d_lambda_invalid_inputs(preds, target, p, match):
+ """Test that invalid input raises the correct errors."""
  preds_t = torch.rand(preds)
  target_t = torch.rand(target)
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match=match):
  spectral_distortion_index(preds_t, target_t, p)
 
 
 def test_d_lambda_invalid_type():
+ """Test that error is raised on different dtypes."""
  preds_t = torch.rand((1, 1, 16, 16))
  target_t = torch.rand((1, 1, 16, 16), dtype=torch.float64)
- with pytest.raises(TypeError):
+ with pytest.raises(TypeError, match="Expected `ms` and `fused` to have the same data type.*"):
  spectral_distortion_index(preds_t, target_t, p=1)
diff --git a/tests/unittests/image/test_ergas.py b/tests/unittests/image/test_ergas.py
@@ -114,18 +114,20 @@ def test_ergas_half_gpu(self, reduction, preds, target, ratio):
 
 
 def test_error_on_different_shape(metric_class=ErrorRelativeGlobalDimensionlessSynthesis):
+ """Check that error is raised when input have different shape."""
  metric = metric_class()
- with pytest.raises(RuntimeError): # todo
+ with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape.*"):
  metric(torch.randn([1, 3, 16, 16]), torch.randn([1, 1, 16, 16]))
 
 
 def test_error_on_invalid_shape(metric_class=ErrorRelativeGlobalDimensionlessSynthesis):
+ """Check that error is raised when input is not 4D"""
  metric = metric_class()
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="Expected `preds` and `target` to have BxCxHxW shape.*"):
  metric(torch.randn([3, 16, 16]), torch.randn([3, 16, 16]))
 
 
 def test_error_on_invalid_type(metric_class=ErrorRelativeGlobalDimensionlessSynthesis):
  metric = metric_class()
- with pytest.raises(TypeError): # todo
+ with pytest.raises(TypeError, match="Expected `preds` and `target` to have the same data type.*"):
  metric(torch.randn([3, 16, 16]), torch.randn([3, 16, 16], dtype=torch.float64))
diff --git a/tests/unittests/image/test_kid.py b/tests/unittests/image/test_kid.py
@@ -77,12 +77,10 @@ def test_kid_raises_errors_and_warnings():
  with pytest.raises(TypeError, match="Got unknown input to argument `feature`"):
  KernelInceptionDistance(feature=[1, 2])
 
- with pytest.raises( # noqa: PT012 # todo
- ValueError, match="Argument `subset_size` should be smaller than the number of samples"
- ):
- m = KernelInceptionDistance()
- m.update(torch.randint(0, 255, (5, 3, 299, 299), dtype=torch.uint8), real=True)
- m.update(torch.randint(0, 255, (5, 3, 299, 299), dtype=torch.uint8), real=False)
+ m = KernelInceptionDistance()
+ m.update(torch.randint(0, 255, (5, 3, 299, 299), dtype=torch.uint8), real=True)
+ m.update(torch.randint(0, 255, (5, 3, 299, 299), dtype=torch.uint8), real=False)
+ with pytest.raises(ValueError, match="Argument `subset_size` should be smaller than the number of samples"):
  m.compute()
 
 

diff --git a/tests/unittests/image/test_lpips.py b/tests/unittests/image/test_lpips.py
@@ -45,7 +45,7 @@ def _compare_fn(img1: Tensor, img2: Tensor, net_type: str, normalize: bool, redu
 
 @pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed")
 class TestLPIPS(MetricTester):
- atol: float = 1e-4 # TODO lowered to pass on RTX3090 and PT 1.13
+ atol: float = 1e-4
 
  @pytest.mark.parametrize("net_type", ["vgg", "alex", "squeeze"])
  @pytest.mark.parametrize("normalize", [False, True])

diff --git a/tests/unittests/image/test_psnr.py b/tests/unittests/image/test_psnr.py
@@ -148,8 +148,9 @@ def test_reduction_for_dim_none(reduction):
 
 
 def test_missing_data_range():
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ """Check that error is raised if data range is not provided."""
+ with pytest.raises(ValueError, match="The `data_range` must be given when `dim` is not None."):
  PeakSignalNoiseRatio(data_range=None, dim=0)
 
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="The `data_range` must be given when `dim` is not None."):
  peak_signal_noise_ratio(_inputs[0].preds, _inputs[0].target, data_range=None, dim=0)
diff --git a/tests/unittests/image/test_sam.py b/tests/unittests/image/test_sam.py
@@ -103,24 +103,28 @@ def test_sam_half_gpu(self, reduction, preds, target):
 
 
 def test_error_on_different_shape(metric_class=SpectralAngleMapper):
+ """Test that error is raised if preds and target have different shape."""
  metric = metric_class()
- with pytest.raises(RuntimeError):
+ with pytest.raises(RuntimeError, match="Predictions and targets are expected to have the same shape.*"):
  metric(torch.randn([1, 3, 16, 16]), torch.randn([1, 1, 16, 16]))
 
 
 def test_error_on_invalid_shape(metric_class=SpectralAngleMapper):
+ """Test that error is raised if input is not 4D."""
  metric = metric_class()
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="Expected `preds` and `target` to have BxCxHxW shape.*"):
  metric(torch.randn([3, 16, 16]), torch.randn([3, 16, 16]))
 
 
 def test_error_on_invalid_type(metric_class=SpectralAngleMapper):
+ """Test that error is raised if preds and target have different dtype."""
  metric = metric_class()
- with pytest.raises(TypeError):
+ with pytest.raises(TypeError, match="Expected `preds` and `target` to have the same data type.*"):
  metric(torch.randn([3, 16, 16]), torch.randn([3, 16, 16], dtype=torch.float64))
 
 
 def test_error_on_grayscale_image(metric_class=SpectralAngleMapper):
+ """Test that error is raised if number of channelse is not larger than 1."""
  metric = metric_class()
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="Expected channel dimension of `preds` and `target` to be larger than 1.*"):
  metric(torch.randn([16, 1, 16, 16]), torch.randn([16, 1, 16, 16]))
diff --git a/tests/unittests/image/test_ssim.py b/tests/unittests/image/test_ssim.py
@@ -211,28 +211,52 @@ def test_ssim_half_gpu(self, preds, target, sigma):
 
 
 @pytest.mark.parametrize(
- ("pred", "target", "kernel", "sigma"),
+ ("pred", "target", "kernel", "sigma", "match"),
  [
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, 11], [1.5]), # len(kernel), len(sigma)
- ([1, 16, 16], [1, 16, 16], [11, 11], [1.5, 1.5]), # len(shape)
- ([1, 1, 16, 16], [1, 1, 16, 16], [11], [1.5, 1.5]), # len(kernel), len(sigma)
- ([1, 1, 16, 16], [1, 1, 16, 16], [11], [1.5]), # len(kernel), len(sigma)
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, 0], [1.5, 1.5]), # invalid kernel input
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, 10], [1.5, 1.5]), # invalid kernel input
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, -11], [1.5, 1.5]), # invalid kernel input
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, 11], [1.5, 0]), # invalid sigma input
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, 0], [1.5, -1.5]), # invalid sigma input
+ (
+ [1, 1, 16, 16],
+ [1, 1, 16, 16],
+ [11, 11],
+ [1.5],
+ "`kernel_size` has dimension 2, but expected to be two less that target dimensionality.*",
+ ),
+ (
+ [1, 16, 16],
+ [1, 16, 16],
+ [11, 11],
+ [1.5, 1.5],
+ "Expected `preds` and `target` to have BxCxHxW or BxCxDxHxW shape.*",
+ ),
+ (
+ [1, 1, 16, 16],
+ [1, 1, 16, 16],
+ [11],
+ [1.5, 1.5],
+ "`kernel_size` has dimension 1, but expected to be two less that target dimensionality.*",
+ ),
+ (
+ [1, 1, 16, 16],
+ [1, 1, 16, 16],
+ [11],
+ [1.5],
+ "`kernel_size` has dimension 1, but expected to be two less that target dimensionality.*",
+ ),
+ ([1, 1, 16, 16], [1, 1, 16, 16], [11, 0], [1.5, 1.5], "Expected `kernel_size` to have odd positive number.*"),
+ ([1, 1, 16, 16], [1, 1, 16, 16], [11, 10], [1.5, 1.5], "Expected `kernel_size` to have odd positive number.*"),
+ ([1, 1, 16, 16], [1, 1, 16, 16], [11, -11], [1.5, 1.5], "Expected `kernel_size` to have odd positive number.*"),
+ ([1, 1, 16, 16], [1, 1, 16, 16], [11, 11], [1.5, 0], "Expected `sigma` to have positive number.*"),
+ ([1, 1, 16, 16], [1, 1, 16, 16], [11, 11], [1.5, -1.5], "Expected `sigma` to have positive number.*"),
  ],
 )
-def test_ssim_invalid_inputs(pred, target, kernel, sigma):
+def test_ssim_invalid_inputs(pred, target, kernel, sigma, match):
  """Test for invalid input.
 
  Checks that that an value errors are raised if input sizes are different, kernel length and sigma does not match
  size or invalid values are provided.
  """
  pred = torch.rand(pred)
  target = torch.rand(target)
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match=match):
  structural_similarity_index_measure(pred, target, kernel_size=kernel, sigma=sigma)
 
 

diff --git a/tests/unittests/image/test_uqi.py b/tests/unittests/image/test_uqi.py
@@ -110,31 +110,47 @@ def test_uqi_half_gpu(self, preds, target, multichannel, kernel_size):
 
 
 @pytest.mark.parametrize(
- ("pred", "target", "kernel", "sigma"),
+ ("pred", "target", "kernel", "sigma", "match"),
  [
- ([1, 16, 16], [1, 16, 16], [11, 11], [1.5, 1.5]), # len(shape)
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, 11], [1.5]), # len(kernel), len(sigma)
- ([1, 1, 16, 16], [1, 1, 16, 16], [11], [1.5, 1.5]), # len(kernel), len(sigma)
- ([1, 1, 16, 16], [1, 1, 16, 16], [11], [1.5]), # len(kernel), len(sigma)
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, 0], [1.5, 1.5]), # invalid kernel input
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, 10], [1.5, 1.5]), # invalid kernel input
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, -11], [1.5, 1.5]), # invalid kernel input
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, 11], [1.5, 0]), # invalid sigma input
- ([1, 1, 16, 16], [1, 1, 16, 16], [11, 0], [1.5, -1.5]), # invalid sigma input
+ ([1, 16, 16], [1, 16, 16], [11, 11], [1.5, 1.5], "Expected `preds` and `target` to have BxCxHxW shape.*"),
+ (
+ [1, 1, 16, 16],
+ [1, 1, 16, 16],
+ [11, 11],
+ [1.5],
+ "Expected `kernel_size` and `sigma` to have the length of two.*",
+ ),
+ (
+ [1, 1, 16, 16],
+ [1, 1, 16, 16],
+ [11],
+ [1.5, 1.5],
+ "Expected `kernel_size` and `sigma` to have the length of two.*",
+ ),
+ ([1, 1, 16, 16], [1, 1, 16, 16], [11], [1.5], "Expected `kernel_size` and `sigma` to have the length of two.*"),
+ ([1, 1, 16, 16], [1, 1, 16, 16], [11, 0], [1.5, 1.5], "Expected `kernel_size` to have odd positive number.*"),
+ ([1, 1, 16, 16], [1, 1, 16, 16], [11, 10], [1.5, 1.5], "Expected `kernel_size` to have odd positive number.*"),
+ ([1, 1, 16, 16], [1, 1, 16, 16], [11, -11], [1.5, 1.5], "Expected `kernel_size` to have odd positive number.*"),
+ ([1, 1, 16, 16], [1, 1, 16, 16], [11, 11], [1.5, 0], "Expected `sigma` to have positive number.*"),
+ ([1, 1, 16, 16], [1, 1, 16, 16], [11, 0], [1.5, -1.5], "Expected `kernel_size` to have odd positive number.*"),
  ],
 )
-def test_uqi_invalid_inputs(pred, target, kernel, sigma):
- pred_t = torch.rand(pred)
- target_t = torch.rand(target, dtype=torch.float64)
- with pytest.raises(TypeError):
- universal_image_quality_index(pred_t, target_t)
-
+def test_uqi_invalid_inputs(pred, target, kernel, sigma, match):
+ """Check that errors are raised on wrong input and parameter combinations."""
  pred = torch.rand(pred)
  target = torch.rand(target)
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match=match):
  universal_image_quality_index(pred, target, kernel, sigma)
 
 
+def test_uqi_different_dtype():
+ """Check that an type error is raised if preds and target have different dtype."""
+ pred_t = torch.rand([1, 1, 16, 16])
+ target_t = torch.rand([1, 1, 16, 16], dtype=torch.float64)
+ with pytest.raises(TypeError, match="Expected `preds` and `target` to have the same data type.*"):
+ universal_image_quality_index(pred_t, target_t)
+
+
 def test_uqi_unequal_kernel_size():
  """Test the case where kernel_size[0] != kernel_size[1]."""
  preds = torch.tensor(

diff --git a/tests/unittests/multimodal/test_clip_score.py b/tests/unittests/multimodal/test_clip_score.py
@@ -100,12 +100,14 @@ def test_clip_score_differentiability(self, input, model_name_or_path):
  def test_error_on_not_same_amount_of_input(self, input, model_name_or_path):
  """Test that an error is raised if the number of images and text examples does not match."""
  metric = CLIPScore(model_name_or_path=model_name_or_path)
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="Expected the number of images and text examples to be the same.*"):
  metric(torch.randint(255, (2, 3, 224, 224)), "28-year-old chef found dead in San Francisco mall")
 
  @skip_on_connection_issues()
  def test_error_on_wrong_image_format(self, input, model_name_or_path):
  """Test that an error is raised if not all images are [c, h, w] format."""
  metric = CLIPScore(model_name_or_path=model_name_or_path)
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(
+ ValueError, match="Expected all images to be 3d but found image that has either more or less"
+ ):
  metric(torch.randint(255, (224, 224)), "28-year-old chef found dead in San Francisco mall")
diff --git a/tests/unittests/regression/test_kl_divergence.py b/tests/unittests/regression/test_kl_divergence.py
@@ -72,7 +72,6 @@ def test_kldivergence(self, reduction, p, q, log_prob, ddp):
  )
 
  def test_kldivergence_functional(self, reduction, p, q, log_prob):
- # todo: `num_outputs` is unused
  self.run_functional_metric_test(
  p,
  q,

diff --git a/tests/unittests/text/test_rouge.py b/tests/unittests/text/test_rouge.py
@@ -166,10 +166,10 @@ def test_rouge_metric_raises_errors_and_warnings():
 def test_rouge_metric_wrong_key_value_error():
  key = ("rouge1", "rouge")
 
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="Got unknown rouge key rouge. Expected to be one of"):
  ROUGEScore(rouge_keys=key)
 
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="Got unknown rouge key rouge. Expected to be one of"):
  rouge_score(
  _inputs_single_sentence_single_reference.preds,
  _inputs_single_sentence_single_reference.targets,

diff --git a/tests/unittests/utilities/test_utilities.py b/tests/unittests/utilities/test_utilities.py
@@ -29,13 +29,14 @@ def test_prints():
 
 
 def test_reduce():
+ """Test that reduction function works as expected and also raises error on wrong input."""
  start_tensor = torch.rand(50, 40, 30)
 
  assert torch.allclose(reduce(start_tensor, "elementwise_mean"), torch.mean(start_tensor))
  assert torch.allclose(reduce(start_tensor, "sum"), torch.sum(start_tensor))
  assert torch.allclose(reduce(start_tensor, "none"), start_tensor)
 
- with pytest.raises(ValueError): # noqa: PT011 # todo
+ with pytest.raises(ValueError, match="Reduction parameter unknown."):
  reduce(start_tensor, "error_reduction")