Skip to content

Commit

Permalink
Enabling char_level and compute_CER for aishell recipe (#1554)
Browse files Browse the repository at this point in the history
* init fix

Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com>
  • Loading branch information
JinZr and csukuangfj authored Mar 18, 2024
1 parent 2dfd5db commit bf2f943
Show file tree
Hide file tree
Showing 13 changed files with 80 additions and 26 deletions.
8 changes: 6 additions & 2 deletions egs/aishell/ASR/conformer_ctc/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def save_results(
for key, results in results_dict.items():
recog_path = params.exp_dir / f"recogs-{test_set_name}-{key}.txt"
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)
if enable_log:
logging.info(f"The transcripts are stored in {recog_path}")

Expand All @@ -432,7 +432,11 @@ def save_results(
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results_char, enable_log=enable_log
f,
f"{test_set_name}-{key}",
results_char,
enable_log=enable_log,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down
8 changes: 6 additions & 2 deletions egs/aishell/ASR/conformer_mmi/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def save_results(
for key, results in results_dict.items():
recog_path = params.exp_dir / f"recogs-{test_set_name}-{key}.txt"
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)
if enable_log:
logging.info(f"The transcripts are stored in {recog_path}")

Expand All @@ -444,7 +444,11 @@ def save_results(
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results_char, enable_log=enable_log
f,
f"{test_set_name}-{key}",
results_char,
enable_log=enable_log,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down
8 changes: 6 additions & 2 deletions egs/aishell/ASR/pruned_transducer_stateless2/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def save_results(
for key, results in results_dict.items():
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)
logging.info(f"The transcripts are stored in {recog_path}")

# The following prints out WERs, per-word error statistics and aligned
Expand All @@ -402,7 +402,11 @@ def save_results(
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results_char, enable_log=True
f,
f"{test_set_name}-{key}",
results_char,
enable_log=True,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down
8 changes: 6 additions & 2 deletions egs/aishell/ASR/pruned_transducer_stateless3/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def save_results(
for key, results in results_dict.items():
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)
logging.info(f"The transcripts are stored in {recog_path}")

# The following prints out WERs, per-word error statistics and aligned
Expand All @@ -538,7 +538,11 @@ def save_results(
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results_char, enable_log=True
f,
f"{test_set_name}-{key}",
results_char,
enable_log=True,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down
8 changes: 6 additions & 2 deletions egs/aishell/ASR/pruned_transducer_stateless7/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,15 +444,19 @@ def save_results(
for res in results:
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))

store_transcripts(filename=recog_path, texts=results_char)
store_transcripts(filename=recog_path, texts=results_char, char_level=True)
logging.info(f"The transcripts are stored in {recog_path}")

# The following prints out WERs, per-word error statistics and aligned
# ref/hyp pairs.
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results_char, enable_log=True
f,
f"{test_set_name}-{key}",
results_char,
enable_log=True,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down
8 changes: 6 additions & 2 deletions egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ def save_results(
for key, results in results_dict.items():
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)
logging.info(f"The transcripts are stored in {recog_path}")

# The following prints out WERs, per-word error statistics and aligned
Expand All @@ -594,7 +594,11 @@ def save_results(

with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results_char, enable_log=True
f,
f"{test_set_name}-{key}",
results_char,
enable_log=True,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -492,15 +492,19 @@ def save_results(
for key, results in results_dict.items():
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)
logging.info(f"The transcripts are stored in {recog_path}")

# The following prints out WERs, per-word error statistics and aligned
# ref/hyp pairs.
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results, enable_log=True
f,
f"{test_set_name}-{key}",
results,
enable_log=True,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down
10 changes: 8 additions & 2 deletions egs/aishell/ASR/tdnn_lstm_ctc/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def save_results(
for key, results in results_dict.items():
recog_path = params.exp_dir / f"recogs-{test_set_name}-{key}.txt"
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)
logging.info(f"The transcripts are stored in {recog_path}")

# The following prints out WERs, per-word error statistics and aligned
Expand All @@ -289,7 +289,13 @@ def save_results(
for res in results:
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
with open(errs_filename, "w") as f:
wer = write_error_stats(f, f"{test_set_name}-{key}", results_char)
wer = write_error_stats(
f,
f"{test_set_name}-{key}",
results_char,
enable_log=True,
compute_CER=True,
)
test_set_wers[key] = wer

logging.info("Wrote detailed error stats to {}".format(errs_filename))
Expand Down
8 changes: 6 additions & 2 deletions egs/aishell/ASR/transducer_stateless/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def save_results(
for key, results in results_dict.items():
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)

# The following prints out WERs, per-word error statistics and aligned
# ref/hyp pairs.
Expand All @@ -338,7 +338,11 @@ def save_results(
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results_char, enable_log=True
f,
f"{test_set_name}-{key}",
results_char,
enable_log=True,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down
8 changes: 6 additions & 2 deletions egs/aishell/ASR/transducer_stateless_modified-2/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ def save_results(
for key, results in results_dict.items():
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)
logging.info(f"The transcripts are stored in {recog_path}")

# The following prints out WERs, per-word error statistics and aligned
Expand All @@ -384,7 +384,11 @@ def save_results(
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results_char, enable_log=True
f,
f"{test_set_name}-{key}",
results_char,
enable_log=True,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down
8 changes: 6 additions & 2 deletions egs/aishell/ASR/transducer_stateless_modified/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def save_results(
for key, results in results_dict.items():
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)
logging.info(f"The transcripts are stored in {recog_path}")

# The following prints out WERs, per-word error statistics and aligned
Expand All @@ -388,7 +388,11 @@ def save_results(
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results_char, enable_log=True
f,
f"{test_set_name}-{key}",
results_char,
enable_log=True,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down
8 changes: 6 additions & 2 deletions egs/aishell/ASR/whisper/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def save_results(
params.exp_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
)
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)
if enable_log:
logging.info(f"The transcripts are stored in {recog_path}")

Expand All @@ -373,7 +373,11 @@ def save_results(
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results_char, enable_log=enable_log
f,
f"{test_set_name}-{key}",
results_char,
enable_log=enable_log,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down
8 changes: 6 additions & 2 deletions egs/aishell/ASR/zipformer/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def save_results(
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
)
results = sorted(results)
store_transcripts(filename=recog_path, texts=results)
store_transcripts(filename=recog_path, texts=results, char_level=True)
logging.info(f"The transcripts are stored in {recog_path}")

# The following prints out WERs, per-word error statistics and aligned
Expand All @@ -570,7 +570,11 @@ def save_results(
)
with open(errs_filename, "w") as f:
wer = write_error_stats(
f, f"{test_set_name}-{key}", results, enable_log=True
f,
f"{test_set_name}-{key}",
results,
enable_log=True,
compute_CER=True,
)
test_set_wers[key] = wer

Expand Down

0 comments on commit bf2f943

Please sign in to comment.