Lightning-AI · Borda · Nov 24, 2021 · Nov 20, 2021 · Nov 20, 2021 · Nov 20, 2021
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Added NLP metrics:
  - `MatchErrorRate` ([#619](https://github.com/PyTorchLightning/metrics/pull/619))
+ - `WordInfoLost` and `WordInfoPreserved` ([#630](https://github.com/PyTorchLightning/metrics/pull/630))
 
 
 - Added `MinMaxMetric` to wrappers ([#556](https://github.com/PyTorchLightning/metrics/pull/556))

@@ -463,3 +463,15 @@ wer [func]
 
 .. autofunction:: torchmetrics.functional.wer
  :noindex:
+
+word_information_lost [func]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: torchmetrics.functional.word_information_lost
+ :noindex:
+
+word_information_preserved [func]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: torchmetrics.functional.word_information_preserved
+ :noindex:
@@ -646,6 +646,18 @@ WER
 .. autoclass:: torchmetrics.WER
  :noindex:
 
+WordInfoLost
+~~~~~~~~~~~~
+
+.. autoclass:: torchmetrics.WordInfoLost
+ :noindex:
+
+WordInfoPreserved
+~~~~~~~~~~~~~~~~~
+
+.. autoclass:: torchmetrics.WordInfoPreserved
+ :noindex:
+
 
 ********
 Wrappers

@@ -0,0 +1,70 @@
+from typing import List, Union
+
+import pytest
+from jiwer import wil
+
+from tests.text.helpers import INPUT_ORDER, TextTester
+from torchmetrics.functional.text.wil import word_information_lost
+from torchmetrics.text.wil import WordInfoLost
+from torchmetrics.utilities.imports import _JIWER_AVAILABLE
+
+BATCHES_1 = {"preds": [["hello world"], ["what a day"]], "targets": [["hello world"], ["what a wonderful day"]]}
+
+BATCHES_2 = {
+ "preds": [
+ ["i like python", "what you mean or swallow"],
+ ["hello duck", "i like python"],
+ ],
+ "targets": [
+ ["i like monthy python", "what do you mean, african or european swallow"],
+ ["hello world", "i like monthy python"],
+ ],
+}
+
+
+def _compute_wil_metric_jiwer(prediction: Union[str, List[str]], reference: Union[str, List[str]]):
+ return wil(reference, prediction)
+
+
+@pytest.mark.skipif(not _JIWER_AVAILABLE, reason="test requires jiwer")
+@pytest.mark.parametrize(
+ ["preds", "targets"],
+ [
+ pytest.param(BATCHES_1["preds"], BATCHES_1["targets"]),
+ pytest.param(BATCHES_2["preds"], BATCHES_2["targets"]),
+ ],
+)
+class TestWordInfoLost(TextTester):
+ @pytest.mark.parametrize("ddp", [False, True])
+ @pytest.mark.parametrize("dist_sync_on_step", [False, True])
+ def test_wil_class(self, ddp, dist_sync_on_step, preds, targets):
+
+ self.run_class_metric_test(
+ ddp=ddp,
+ preds=preds,
+ targets=targets,
+ metric_class=WordInfoLost,
+ sk_metric=_compute_wil_metric_jiwer,
+ dist_sync_on_step=dist_sync_on_step,
+ input_order=INPUT_ORDER.PREDS_FIRST,
+ )
+
+ def test_wil_functional(self, preds, targets):
+
+ self.run_functional_metric_test(
+ preds,
+ targets,
+ metric_functional=word_information_lost,
+ sk_metric=_compute_wil_metric_jiwer,
+ input_order=INPUT_ORDER.PREDS_FIRST,
+ )
+
+ def test_wil_differentiability(self, preds, targets):
+
+ self.run_differentiability_test(
+ preds=preds,
+ targets=targets,
+ metric_module=WordInfoLost,
+ metric_functional=word_information_lost,
+ input_order=INPUT_ORDER.PREDS_FIRST,
+ )
@@ -0,0 +1,70 @@
+from typing import List, Union
+
+import pytest
+from jiwer import wip
+
+from tests.text.helpers import INPUT_ORDER, TextTester
+from torchmetrics.functional.text.wip import word_information_preserved
+from torchmetrics.text.wip import WordInfoPreserved
+from torchmetrics.utilities.imports import _JIWER_AVAILABLE
+
+BATCHES_1 = {"preds": [["hello world"], ["what a day"]], "targets": [["hello world"], ["what a wonderful day"]]}
+
+BATCHES_2 = {
+ "preds": [
+ ["i like python", "what you mean or swallow"],
+ ["hello duck", "i like python"],
+ ],
+ "targets": [
+ ["i like monthy python", "what do you mean, african or european swallow"],
+ ["hello world", "i like monthy python"],
+ ],
+}
+
+
+def _compute_wip_metric_jiwer(prediction: Union[str, List[str]], reference: Union[str, List[str]]):
+ return wip(reference, prediction)
+
+
+@pytest.mark.skipif(not _JIWER_AVAILABLE, reason="test requires jiwer")
+@pytest.mark.parametrize(
+ ["preds", "targets"],
+ [
+ pytest.param(BATCHES_1["preds"], BATCHES_1["targets"]),
+ pytest.param(BATCHES_2["preds"], BATCHES_2["targets"]),
+ ],
+)
+class TestWordInfoPreserved(TextTester):
+ @pytest.mark.parametrize("ddp", [False, True])
+ @pytest.mark.parametrize("dist_sync_on_step", [False, True])
+ def test_wip_class(self, ddp, dist_sync_on_step, preds, targets):
+
+ self.run_class_metric_test(
+ ddp=ddp,
+ preds=preds,
+ targets=targets,
+ metric_class=WordInfoPreserved,
+ sk_metric=_compute_wip_metric_jiwer,
+ dist_sync_on_step=dist_sync_on_step,
+ input_order=INPUT_ORDER.PREDS_FIRST,
+ )
+
+ def test_wip_functional(self, preds, targets):
+
+ self.run_functional_metric_test(
+ preds,
+ targets,
+ metric_functional=word_information_preserved,
+ sk_metric=_compute_wip_metric_jiwer,
+ input_order=INPUT_ORDER.PREDS_FIRST,
+ )
+
+ def test_wip_differentiability(self, preds, targets):
+
+ self.run_differentiability_test(
+ preds=preds,
+ targets=targets,
+ metric_module=WordInfoPreserved,
+ metric_functional=word_information_preserved,
+ input_order=INPUT_ORDER.PREDS_FIRST,
+ )
@@ -74,6 +74,8 @@
  MatchErrorRate,
  ROUGEScore,
  SacreBLEUScore,
+ WordInfoLost,
+ WordInfoPreserved,
 )
 from torchmetrics.wrappers import BootStrapper, MetricTracker, MinMaxMetric, MultioutputWrapper # noqa: E402
 
@@ -152,4 +154,6 @@
  "WER",
  "CharErrorRate",
  "MatchErrorRate",
+ "WordInfoLost",
+ "WordInfoPreserved",
 ]
@@ -72,6 +72,8 @@
 from torchmetrics.functional.text.rouge import rouge_score
 from torchmetrics.functional.text.sacre_bleu import sacre_bleu_score
 from torchmetrics.functional.text.wer import wer
+from torchmetrics.functional.text.wil import word_information_lost
+from torchmetrics.functional.text.wip import word_information_preserved
 
 __all__ = [
  "accuracy",
@@ -137,4 +139,6 @@
  "wer",
  "char_error_rate",
  "match_error_rate",
+ "word_information_lost",
+ "word_information_preserved",
 ]
@@ -17,3 +17,5 @@
 from torchmetrics.functional.text.mer import match_error_rate # noqa: F401
 from torchmetrics.functional.text.sacre_bleu import sacre_bleu_score # noqa: F401
 from torchmetrics.functional.text.wer import wer # noqa: F401
+from torchmetrics.functional.text.wil import word_information_lost # noqa: F401
+from torchmetrics.functional.text.wip import word_information_preserved # noqa: F401
@@ -17,28 +17,7 @@
 import torch
 from torch import Tensor, tensor
 
-
-def _edit_distance(prediction_tokens: List[str], reference_tokens: List[str]) -> int:
- """Standard dynamic programming algorithm to compute the edit distance.
-
- Args:
- prediction_tokens: A tokenized predicted sentence
- reference_tokens: A tokenized reference sentence
- Returns:
- (int) Edit distance between the predicted sentence and the reference sentence
- """
- dp = [[0] * (len(reference_tokens) + 1) for _ in range(len(prediction_tokens) + 1)]
- for i in range(len(prediction_tokens) + 1):
- dp[i][0] = i
- for j in range(len(reference_tokens) + 1):
- dp[0][j] = j
- for i in range(1, len(prediction_tokens) + 1):
- for j in range(1, len(reference_tokens) + 1):
- if prediction_tokens[i - 1] == reference_tokens[j - 1]:
- dp[i][j] = dp[i - 1][j - 1]
- else:
- dp[i][j] = min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + 1
- return dp[-1][-1]
+from torchmetrics.functional.text.helper import _edit_distance
 
 
 def _cer_update(
@@ -51,8 +30,8 @@ def _cer_update(
  predictions: Transcription(s) to score as a string or list of strings
  references: Reference(s) for each speech input as a string or list of strings
  Returns:
- (Tensor) Number of edit operations to get from the reference to the prediction, summed over all samples
- (Tensor) Number of character over all references
+ Number of edit operations to get from the reference to the prediction, summed over all samples
+ Number of character overall references
  """
  if isinstance(predictions, str):
  predictions = [predictions]
@@ -75,7 +54,7 @@ def _cer_compute(errors: Tensor, total: Tensor) -> Tensor:
  errors: Number of edit operations to get from the reference to the prediction, summed over all samples
  total: Number of characters over all references
  Returns:
- (Tensor) Character error rate
+ Character error rate score
  """
  return errors / total
 
@@ -91,7 +70,7 @@ def char_error_rate(
  predictions: Transcription(s) to score as a string or list of strings
  references: Reference(s) for each speech input as a string or list of strings
  Returns:
- (Tensor) Character error rate
+ Character error rate score
  Examples:
  >>> predictions = ["this is the prediction", "there is an other sample"]
  >>> references = ["this is the reference", "there is another one"]

@@ -0,0 +1,38 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+
+
+def _edit_distance(prediction_tokens: List[str], reference_tokens: List[str]) -> int:
+ """Standard dynamic programming algorithm to compute the edit distance.
+
+ Args:
+ prediction_tokens: A tokenized predicted sentence
+ reference_tokens: A tokenized reference sentence
+ Returns:
+ (int) Edit distance between the predicted sentence and the reference sentence
+ """
+ dp = [[0] * (len(reference_tokens) + 1) for _ in range(len(prediction_tokens) + 1)]
+ for i in range(len(prediction_tokens) + 1):
+ dp[i][0] = i
+ for j in range(len(reference_tokens) + 1):
+ dp[0][j] = j
+ for i in range(1, len(prediction_tokens) + 1):
+ for j in range(1, len(reference_tokens) + 1):
+ if prediction_tokens[i - 1] == reference_tokens[j - 1]:
+ dp[i][j] = dp[i - 1][j - 1]
+ else:
+ dp[i][j] = min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + 1
+ return dp[-1][-1]
@@ -17,27 +17,7 @@
 import torch
 from torch import Tensor, tensor
 
-
-def _edit_distance(prediction_tokens: List[str], reference_tokens: List[str]) -> int:
- """Standard dynamic programming algorithm to compute the edit distance.
-
- Args:
- prediction_tokens: A tokenized predicted sentence
- reference_tokens: A tokenized reference sentence
-
- Returns:
- Editing distance between the predicted sentence and the reference sentence
- """
- dp = [[0] * (len(reference_tokens) + 1) for _ in range(len(prediction_tokens) + 1)]
- dp[:][0] = list(range(len(prediction_tokens) + 1))
- dp[0][:] = list(range(len(reference_tokens) + 1))
- for i in range(1, len(prediction_tokens) + 1):
- for j in range(1, len(reference_tokens) + 1):
- if prediction_tokens[i - 1] == reference_tokens[j - 1]:
- dp[i][j] = dp[i - 1][j - 1]
- else:
- dp[i][j] = min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + 1
- return dp[-1][-1]
+from torchmetrics.functional.text.helper import _edit_distance
 
 
 def _mer_update(
@@ -52,7 +32,7 @@ def _mer_update(
 
  Returns:
  Number of edit operations to get from the reference to the prediction, summed over all samples
- Number of words over all references
+ Number of words overall references
  """
  if isinstance(predictions, str):
  predictions = [predictions]
@@ -74,10 +54,10 @@ def _mer_compute(errors: Tensor, total: Tensor) -> Tensor:
 
  Args:
  errors: Number of edit operations to get from the reference to the prediction, summed over all samples
- total: Number of words over all references
+ total: Number of words overall references
 
  Returns:
- (Tensor) Match error rate
+ Match error rate score
  """
  return errors / total
 
@@ -94,7 +74,6 @@ def match_error_rate(
  predictions: Transcription(s) to score as a string or list of strings
  references: Reference(s) for each speech input as a string or list of strings
 
-
  Returns:
  Match error rate score