jitsi · nikvaessen · Mar 14, 2023 · Mar 13, 2023 · Mar 13, 2023 · Mar 13, 2023
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -17,9 +17,9 @@ jobs:
  python-version: [3.7]
 
  steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
  - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v1
+ uses: actions/setup-python@v4
  with:
  python-version: ${{ matrix.python-version }}
  - name: Install Poetry
@@ -31,9 +31,9 @@ jobs:
  - name: Lint with flake8
  run: |
  # stop the build if there are Python syntax errors or undefined names
- poetry run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+ poetry run flake8 jiwer --count --select=E9,F63,F7,F82 --show-source --statistics
  # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
- poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics 
+ poetry run flake8 jiwer --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics
  - name: Check formatting with black
  run: |
  poetry run black . --check
@@ -45,9 +45,9 @@ jobs:
  python-version: ["3.7", "3.8", "3.9", "3.10"]
 
  steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
  - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v1
+ uses: actions/setup-python@v4
  with:
  python-version: ${{ matrix.python-version }}
  - name: Install Poetry
@@ -59,4 +59,5 @@ jobs:
  poetry install
  - name: Test with pytest
  run: |
+ poetry run python --version
  poetry run pytest
diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml
@@ -13,9 +13,9 @@ jobs:
  runs-on: ubuntu-latest
 
  steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
  - name: Set up Python
- uses: actions/setup-python@v1
+ uses: actions/setup-python@v4
  with:
  python-version: '3.x'
  - name: Install Poetry

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 This repository contains a simple python package to approximate the Word Error Rate (WER), Match Error Rate (MER), Word Information Lost (WIL) and Word Information Preserved (WIP) of a transcript.
 It computes the minimum-edit distance between the ground-truth sentence and the hypothesis sentence of a speech-to-text API.
-The minimum-edit distance is calculated using the Python C module [Levenshtein](https://github.com/maxbachmann/Levenshtein).
+The minimum-edit distance is calculated using [RapidFuzz](https://github.com/maxbachmann/RapidFuzz), which uses C++ under the hood.
 
 _For a comparison between WER, MER and WIL, see: \
 Morris, Andrew & Maier, Viktoria & Green, Phil. (2004). [From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition.](https://www.researchgate.net/publication/221478089_From_WER_and_RIL_to_MER_and_WIL_improved_evaluation_measures_for_connected_speech_recognition)_
@@ -75,6 +75,92 @@ hypothesis = ["i kan cpell", "i hop"]
 error = cer(ground_truth, hypothesis)
 ```
 
+# alignment
+
+With `jiwer.compute_measures`, you also get the alignment between the ground-truth and hypothesis.
+
+We provide the alignment as a tuple of `(op, truth_idx_start, truth_idx_end, hyp_idx_start, hyp_idx_end)`, where `op` is one of
+ `equal`, `replace`, `delete`, or `insert`.
+
+This looks like the following:
+
+```python3
+import jiwer
+
+out = jiwer.compute_measures("short one here", "shoe order one")
+print(out['ops'])
+# [[('insert', 0, 0, 0, 1), ('replace', 0, 1, 1, 2), ('equal', 1, 2, 2, 3), ('delete', 2, 3, 3, 3)]]
+```
+
+To visualize the alignment, you can use `jiwer.visualize_measures()`
+
+For example:
+
+```python3
+import jiwer
+
+out = jiwer.compute_measures(
+ ["short one here", "quite a bit of longer sentence"],
+ ["shoe order one", "quite bit of an even longest sentence here"],
+)
+
+print(jiwer.visualize_measures(out))
+```
+Gives the following output
+```text
+sentence 1
+REF: # short one here 
+HYP: shoe order one * 
+ I S D 
+
+sentence 2
+REF: quite a bit of # # longer sentence # 
+HYP: quite * bit of an even longest sentence here 
+ D I I S I 
+
+number of sentences: 2
+substitutions=2 deletions=2 insertions=4 hits=5 
+
+mer=61.54%
+wil=74.75%
+wip=25.25%
+wer=88.89%
+```
+
+Note that you can print the CER alignment with `jiwer.cer(return_dict=True)` and `jiwer.visualize_measures(visualize_cer=True)`.
+
+# command-line interface
+
+JiWER provides a simple CLI, which should be available after installation. 
+
+For details, see `jiwer --help`.
+
+```text
+$ jiwer --help
+Usage: jiwer [OPTIONS]
+
+ JiWER is a python tool and API for computing the word-error-rate of ASR systems. To
+ use this CLI, store the ground-truth and hypothesis sentences in a text
+ file, where each sentence is delimited by a new-line character. The text
+ files are expected to have an equal number of lines, unless the `-j` flag is
+ used. The `-j` flag joins computation of the WER by doing a global
+ alignment.
+
+Options:
+ --gt PATH Path to new-line delimited text file of ground-truth
+ sentences. [required]
+ --hp PATH Path to new-line delimited text file of hypothesis sentences.
+ [required]
+ -c, --cer Compute CER instead of WER.
+ -a, --align Print alignment of each sentence.
+ -g, --global Apply a global alignment between ground-truth and hypothesis
+ sentences before computing the WER.
+ --help Show this message and exit.
+```
+
+Note that the CLI does not support a custom pre-processing (as described below). Any pre-processing
+should be done on the text files manually before calling JiWER when using the CLI. 
+
 # pre-processing
 
 It might be necessary to apply some pre-processing steps on either the hypothesis or

diff --git a/jiwer/__init__.py b/jiwer/__init__.py
@@ -1,5 +1,6 @@
 from .measures import *
 from .transforms import *
 from .transformations import *
+from .alignment import *
 
 name = "jiwer"
diff --git a/jiwer/alignment.py b/jiwer/alignment.py
@@ -0,0 +1,116 @@
+#
+# JiWER - Jitsi Word Error Rate
+#
+# Copyright @ 2018 - present 8x8, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Utility method to visualize the alignment as returned by `jiwer.compute_measures()`.
+"""
+
+from typing import Dict, List, Tuple
+
+__all__ = ["visualize_measures"]
+
+
+def visualize_measures(measure_output: Dict, visualize_cer: bool = False) -> str:
+ """
+ Given the output dictionary of `jiwer.compute_measures()`, construct a string which
+ visualizes the alignment between all pairs of ground-truth and hypothesis pairs.
+
+ The constructed string also include the values of all measures. If `visualize_cer`
+ is given, the output dictionary is expected to have come from the `jiwer.cer()`
+ method instead.
+ """
+ if visualize_cer and "cer" not in measure_output:
+ raise ValueError(
+ f"visualize_cer={visualize_cer} while measure dictionary does not contain CER"
+ )
+ if not visualize_cer and "cer" in measure_output:
+ raise ValueError(
+ f"visualize_cer={visualize_cer} while measure dictionary contains CER"
+ )
+
+ truth = measure_output["truth"]
+ hypo = measure_output["hypothesis"]
+ ops = measure_output["ops"]
+
+ final_str = ""
+ for idx, (gt, hp, o) in enumerate(zip(truth, hypo, ops)):
+ final_str += f"sentence {idx+1}\n"
+ final_str += _construct_comparison_string(
+ gt, hp, o, visualize_cer=visualize_cer
+ )
+ final_str += "\n"
+
+ final_str += f"number of sentences: {len(ops)}\n"
+ final_str += f"substitutions={measure_output['substitutions']} "
+ final_str += f"deletions={measure_output['deletions']} "
+ final_str += f"insertions={measure_output['insertions']} "
+ final_str += f"hits={measure_output['hits']}\t\n"
+
+ if visualize_cer:
+ final_str += f"\ncer={measure_output['cer']*100:.2f}%"
+ else:
+ final_str += f"\nmer={measure_output['mer']*100:.2f}%"
+ final_str += f"\nwil={measure_output['wil']*100:.2f}%"
+ final_str += f"\nwip={measure_output['wip']*100:.2f}%"
+ final_str += f"\nwer={measure_output['wer']*100:.2f}%"
+
+ return final_str
+
+
+def _construct_comparison_string(
+ truth: List[str],
+ hypothesis: List[str],
+ ops: List[Tuple[str, int, int, int, int]],
+ visualize_cer: bool = False,
+) -> str:
+ ref_str = "REF: "
+ hyp_str = "HYP: "
+ op_str = " "
+
+ for op in ops:
+ name, gt_start, gt_end, hp_start, hp_end = op
+
+ if name == "equal" or name == "replace":
+ ref = truth[gt_start:gt_end]
+ hyp = hypothesis[hp_start:hp_end]
+ op_char = " " if name == "equal" else "s"
+ elif name == "delete":
+ ref = truth[gt_start:gt_end]
+ hyp = ["*" for _ in range(len(ref))]
+ op_char = "d"
+ elif name == "insert":
+ hyp = hypothesis[hp_start:hp_end]
+ ref = ["#" for _ in range(len(hyp))]
+ op_char = "i"
+ else:
+ raise ValueError(f"unparseable op name={name}")
+
+ op_chars = [op_char for _ in range(len(ref))]
+ for gt, hp, c in zip(ref, hyp, op_chars):
+ str_len = max(len(gt), len(hp), len(c))
+
+ ref_str += f"{gt:>{str_len}}"
+ hyp_str += f"{hp:>{str_len}}"
+ op_str += f"{c.upper():>{str_len}}"
+
+ if not visualize_cer:
+ ref_str += " "
+ hyp_str += " "
+ op_str += " "
+
+ return f"{ref_str}\n{hyp_str}\n{op_str}\n"