Lightning-AI · Borda · Jan 10, 2022 · Dec 7, 2021 · Dec 7, 2021 · Dec 7, 2021
@@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  - `SQuAD` ([#623](https://github.com/PyTorchLightning/metrics/pull/623))
  - `CHRFScore` ([#641](https://github.com/PyTorchLightning/metrics/pull/641))
  - `TranslationEditRate` ([#646](https://github.com/PyTorchLightning/metrics/pull/646))
+ - `ExtendedEditDistance` ([#668](https://github.com/PyTorchLightning/metrics/pull/668))
+
 
 - Added `MultiScaleSSIM` into image metrics ([#679](https://github.com/PyTorchLightning/metrics/pull/679))
 

@@ -76,4 +76,5 @@
 .. _chrF score: https://aclanthology.org/W15-3049.pdf
 .. _chrF++ score: https://aclanthology.org/W17-4770.pdf
 .. _TER: https://aclanthology.org/2006.amta-papers.25.pdf
+.. _ExtendedEditDistance: https://aclanthology.org/W19-5359.pdf
 .. _MultiScaleSSIM: https://ece.uwaterloo.ca/~z70wang/publications/msssim.pdf
@@ -460,6 +460,12 @@ chrf_score [func]
 .. autofunction:: torchmetrics.functional.chrf_score
  :noindex:
 
+extended_edit_distance [func]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: torchmetrics.functional.extended_edit_distance
+ :noindex:
+
 match_error_rate [func]
 ~~~~~~~~~~~~~~~~~~~~~~~
 

@@ -642,6 +642,12 @@ CHRFScore
 .. autoclass:: torchmetrics.CHRFScore
  :noindex:
 
+ExtendedEditDistance
+~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: torchmetrics.ExtendedEditDistance
+ :noindex:
+
 MatchErrorRate
 ~~~~~~~~~~~~~~
 

@@ -63,3 +63,7 @@
 _inputs_error_rate_batch_size_1 = Input(**ERROR_RATES_BATCHES_1)
 
 _inputs_error_rate_batch_size_2 = Input(**ERROR_RATES_BATCHES_2)
+
+# single reference
+TUPLE_OF_SINGLE_REFERENCES = (((REFERENCE_1A), (REFERENCE_1B)), ((REFERENCE_1B), (REFERENCE_1C)))
+_inputs_single_reference = Input(preds=TUPLE_OF_HYPOTHESES, targets=TUPLE_OF_SINGLE_REFERENCES)
@@ -0,0 +1,120 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+
+import pytest
+from torch import Tensor, tensor
+
+from tests.text.helpers import TextTester
+from tests.text.inputs import _inputs_single_reference, _inputs_single_sentence_multiple_references
+from torchmetrics.functional.text.eed import extended_edit_distance
+from torchmetrics.text.eed import ExtendedEditDistance
+
+
+def rwth_manual_metric(preds, targets) -> Tensor:
+ """The results were obtained w.r.t.
+
+ the examples defined in `tests.text.inputs` with the script from https://github.com/rwth-i6/ExtendedEditDistance.
+ """
+ ans_1 = tensor(0.24248056001808083)
+ ans_2 = tensor(0.19152276295133436)
+
+ HYPOTHESIS_A = "It is a guide to action which ensures that the military always obeys the commands of the party"
+
+ # If hypothesis A and B are in preds, the average of ans_1 and ans_2 is given
+ if len(preds) == 4:
+ return (ans_1 + ans_2) / 2
+ # If only hypothesis A or B are given, ans_1 and ans_2 are given, respectively
+ if HYPOTHESIS_A in preds:
+ return ans_1
+ return ans_2
+
+
+@pytest.mark.parametrize(
+ ["preds", "targets"],
+ [(_inputs_single_reference.preds, _inputs_single_reference.targets)],
+)
+class TestExtendedEditDistance(TextTester):
+ @pytest.mark.parametrize("ddp", [False, True])
+ @pytest.mark.parametrize("dist_sync_on_step", [False, True])
+ def test_eed_class(self, preds, targets, ddp, dist_sync_on_step):
+ rwth_metric = partial(rwth_manual_metric)
+ self.run_class_metric_test(
+ ddp=ddp,
+ preds=preds,
+ targets=targets,
+ metric_class=ExtendedEditDistance,
+ sk_metric=rwth_metric,
+ dist_sync_on_step=dist_sync_on_step,
+ )
+
+ def test_eed_functional(self, preds, targets):
+ rwth_metric = partial(rwth_manual_metric)
+ self.run_functional_metric_test(
+ preds,
+ targets,
+ metric_functional=extended_edit_distance,
+ sk_metric=rwth_metric,
+ )
+
+ def test_eed_differentiability(self, preds, targets):
+ self.run_differentiability_test(
+ preds=preds,
+ targets=targets,
+ metric_module=ExtendedEditDistance,
+ metric_functional=extended_edit_distance,
+ )
+
+
+# test blank edge cases
+def test_eed_empty_functional():
+ hyp = []
+ ref = [[]]
+ assert extended_edit_distance(hyp, ref) == tensor(0.0)
+
+
+def test_eed_empty_class():
+ eed_metric = ExtendedEditDistance()
+ hyp = []
+ ref = [[]]
+ assert eed_metric(hyp, ref) == tensor(0.0)
+
+
+def test_eed_empty_with_non_empty_hyp_functional():
+ hyp = ["python"]
+ ref = [[]]
+ assert extended_edit_distance(hyp, ref) == tensor(0.0)
+
+
+def test_eed_empty_with_non_empty_hyp_class():
+ eed_metric = ExtendedEditDistance()
+ hyp = ["python"]
+ ref = [[]]
+ assert eed_metric(hyp, ref) == tensor(0.0)
+
+
+def test_eed_return_sentence_level_score_functional():
+ hyp = _inputs_single_sentence_multiple_references.preds
+ ref = _inputs_single_sentence_multiple_references.targets
+ _, sentence_eed = extended_edit_distance(hyp, ref, return_sentence_level_score=True)
+ isinstance(sentence_eed, Tensor)
+
+
+def test_eed_return_sentence_level_class():
+ metric = ExtendedEditDistance(return_sentence_level_score=True)
+ hyp = _inputs_single_sentence_multiple_references.preds
+ ref = _inputs_single_sentence_multiple_references.targets
+ _, sentence_eed = metric(hyp, ref)
+ isinstance(sentence_eed, Tensor)
@@ -86,6 +86,7 @@
  BLEUScore,
  CharErrorRate,
  CHRFScore,
+ ExtendedEditDistance,
  MatchErrorRate,
  SacreBLEUScore,
  SQuAD,
@@ -115,6 +116,7 @@
  "CosineSimilarity",
  "TweedieDevianceScore",
  "ExplainedVariance",
+ "ExtendedEditDistance",
  "F1",
  "F1Score",
  "FBeta",

@@ -69,6 +69,7 @@
 from torchmetrics.functional.text.bleu import bleu_score
 from torchmetrics.functional.text.cer import char_error_rate
 from torchmetrics.functional.text.chrf import chrf_score
+from torchmetrics.functional.text.eed import extended_edit_distance
 from torchmetrics.functional.text.mer import match_error_rate
 from torchmetrics.functional.text.rouge import rouge_score
 from torchmetrics.functional.text.sacre_bleu import sacre_bleu_score
@@ -93,6 +94,7 @@
  "tweedie_deviance_score",
  "dice_score",
  "explained_variance",
+ "extended_edit_distance",
  "f1",
  "f1_score",
  "fbeta",