From 23bd455388fa4fa626a22951f7b6cdd11ba0028b Mon Sep 17 00:00:00 2001 From: jinzr Date: Mon, 18 Mar 2024 11:15:44 +0800 Subject: [PATCH 1/2] init fix --- egs/aishell/ASR/conformer_ctc/decode.py | 8 ++++++-- egs/aishell/ASR/conformer_mmi/decode.py | 8 ++++++-- egs/aishell/ASR/pruned_transducer_stateless2/decode.py | 8 ++++++-- egs/aishell/ASR/pruned_transducer_stateless3/decode.py | 8 ++++++-- egs/aishell/ASR/pruned_transducer_stateless7/decode.py | 8 ++++++-- .../ASR/pruned_transducer_stateless7_bbpe/decode.py | 8 ++++++-- .../pruned_transducer_stateless7_streaming/decode.py | 8 ++++++-- egs/aishell/ASR/tdnn_lstm_ctc/decode.py | 10 ++++++++-- egs/aishell/ASR/transducer_stateless/decode.py | 8 ++++++-- .../ASR/transducer_stateless_modified-2/decode.py | 8 ++++++-- .../ASR/transducer_stateless_modified/decode.py | 8 ++++++-- egs/aishell/ASR/whisper/decode.py | 8 ++++++-- egs/aishell/ASR/zipformer/decode.py | 8 ++++++-- 13 files changed, 80 insertions(+), 26 deletions(-) diff --git a/egs/aishell/ASR/conformer_ctc/decode.py b/egs/aishell/ASR/conformer_ctc/decode.py index 74a7b59334..2cb476e208 100755 --- a/egs/aishell/ASR/conformer_ctc/decode.py +++ b/egs/aishell/ASR/conformer_ctc/decode.py @@ -419,7 +419,7 @@ def save_results( for key, results in results_dict.items(): recog_path = params.exp_dir / f"recogs-{test_set_name}-{key}.txt" results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) if enable_log: logging.info(f"The transcripts are stored in {recog_path}") @@ -432,7 +432,11 @@ def save_results( results_char.append((res[0], list("".join(res[1])), list("".join(res[2])))) with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results_char, enable_log=enable_log + f, + f"{test_set_name}-{key}", + results_char, + enable_log=enable_log, + compute_CER=True, ) test_set_wers[key] = wer diff --git a/egs/aishell/ASR/conformer_mmi/decode.py b/egs/aishell/ASR/conformer_mmi/decode.py index 20a855e7fb..a197a455c6 100755 --- a/egs/aishell/ASR/conformer_mmi/decode.py +++ b/egs/aishell/ASR/conformer_mmi/decode.py @@ -431,7 +431,7 @@ def save_results( for key, results in results_dict.items(): recog_path = params.exp_dir / f"recogs-{test_set_name}-{key}.txt" results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) if enable_log: logging.info(f"The transcripts are stored in {recog_path}") @@ -444,7 +444,11 @@ def save_results( results_char.append((res[0], list("".join(res[1])), list("".join(res[2])))) with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results_char, enable_log=enable_log + f, + f"{test_set_name}-{key}", + results_char, + enable_log=enable_log, + cer=True, ) test_set_wers[key] = wer diff --git a/egs/aishell/ASR/pruned_transducer_stateless2/decode.py b/egs/aishell/ASR/pruned_transducer_stateless2/decode.py index fb6c7c481c..f41ea6776b 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless2/decode.py +++ b/egs/aishell/ASR/pruned_transducer_stateless2/decode.py @@ -390,7 +390,7 @@ def save_results( for key, results in results_dict.items(): recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt" results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) logging.info(f"The transcripts are stored in {recog_path}") # The following prints out WERs, per-word error statistics and aligned @@ -402,7 +402,11 @@ def save_results( results_char.append((res[0], list("".join(res[1])), list("".join(res[2])))) with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results_char, enable_log=True + f, + f"{test_set_name}-{key}", + results_char, + enable_log=True, + compute_CER=True, ) test_set_wers[key] = wer diff --git a/egs/aishell/ASR/pruned_transducer_stateless3/decode.py b/egs/aishell/ASR/pruned_transducer_stateless3/decode.py index 27c64efaa5..3901a330c5 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless3/decode.py +++ b/egs/aishell/ASR/pruned_transducer_stateless3/decode.py @@ -526,7 +526,7 @@ def save_results( for key, results in results_dict.items(): recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt" results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) logging.info(f"The transcripts are stored in {recog_path}") # The following prints out WERs, per-word error statistics and aligned @@ -538,7 +538,11 @@ def save_results( results_char.append((res[0], list("".join(res[1])), list("".join(res[2])))) with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results_char, enable_log=True + f, + f"{test_set_name}-{key}", + results_char, + enable_log=True, + compute_CER=True, ) test_set_wers[key] = wer diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/decode.py b/egs/aishell/ASR/pruned_transducer_stateless7/decode.py index 696eea9060..d50bccf82e 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7/decode.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7/decode.py @@ -444,7 +444,7 @@ def save_results( for res in results: results_char.append((res[0], list("".join(res[1])), list("".join(res[2])))) - store_transcripts(filename=recog_path, texts=results_char) + store_transcripts(filename=recog_path, texts=results_char, char_level=True) logging.info(f"The transcripts are stored in {recog_path}") # The following prints out WERs, per-word error statistics and aligned @@ -452,7 +452,11 @@ def save_results( errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt" with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results_char, enable_log=True + f, + f"{test_set_name}-{key}", + results_char, + enable_log=True, + compute_CER=True, ) test_set_wers[key] = wer diff --git a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py index da90001647..46f542641e 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py @@ -581,7 +581,7 @@ def save_results( for key, results in results_dict.items(): recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt" results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) logging.info(f"The transcripts are stored in {recog_path}") # The following prints out WERs, per-word error statistics and aligned @@ -594,7 +594,11 @@ def save_results( with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results_char, enable_log=True + f, + f"{test_set_name}-{key}", + results_char, + enable_log=True, + compute_CER=True, ) test_set_wers[key] = wer diff --git a/egs/aishell/ASR/pruned_transducer_stateless7_streaming/decode.py b/egs/aishell/ASR/pruned_transducer_stateless7_streaming/decode.py index 0e783e92b9..61b9290913 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7_streaming/decode.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7_streaming/decode.py @@ -492,7 +492,7 @@ def save_results( for key, results in results_dict.items(): recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt" results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) logging.info(f"The transcripts are stored in {recog_path}") # The following prints out WERs, per-word error statistics and aligned @@ -500,7 +500,11 @@ def save_results( errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt" with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results, enable_log=True + f, + f"{test_set_name}-{key}", + results, + enable_log=True, + compute_CER=True, ) test_set_wers[key] = wer diff --git a/egs/aishell/ASR/tdnn_lstm_ctc/decode.py b/egs/aishell/ASR/tdnn_lstm_ctc/decode.py index 824ca2a92e..05e52f560f 100755 --- a/egs/aishell/ASR/tdnn_lstm_ctc/decode.py +++ b/egs/aishell/ASR/tdnn_lstm_ctc/decode.py @@ -278,7 +278,7 @@ def save_results( for key, results in results_dict.items(): recog_path = params.exp_dir / f"recogs-{test_set_name}-{key}.txt" results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) logging.info(f"The transcripts are stored in {recog_path}") # The following prints out WERs, per-word error statistics and aligned @@ -289,7 +289,13 @@ def save_results( for res in results: results_char.append((res[0], list("".join(res[1])), list("".join(res[2])))) with open(errs_filename, "w") as f: - wer = write_error_stats(f, f"{test_set_name}-{key}", results_char) + wer = write_error_stats( + f, + f"{test_set_name}-{key}", + results_char, + enable_log=True, + compute_CER=True, + ) test_set_wers[key] = wer logging.info("Wrote detailed error stats to {}".format(errs_filename)) diff --git a/egs/aishell/ASR/transducer_stateless/decode.py b/egs/aishell/ASR/transducer_stateless/decode.py index d23f4f883c..d958a6338c 100755 --- a/egs/aishell/ASR/transducer_stateless/decode.py +++ b/egs/aishell/ASR/transducer_stateless/decode.py @@ -327,7 +327,7 @@ def save_results( for key, results in results_dict.items(): recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt" results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) # The following prints out WERs, per-word error statistics and aligned # ref/hyp pairs. @@ -338,7 +338,11 @@ def save_results( results_char.append((res[0], list("".join(res[1])), list("".join(res[2])))) with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results_char, enable_log=True + f, + f"{test_set_name}-{key}", + results_char, + enable_log=True, + compute_CER=True, ) test_set_wers[key] = wer diff --git a/egs/aishell/ASR/transducer_stateless_modified-2/decode.py b/egs/aishell/ASR/transducer_stateless_modified-2/decode.py index d164b6890b..57f7a8239e 100755 --- a/egs/aishell/ASR/transducer_stateless_modified-2/decode.py +++ b/egs/aishell/ASR/transducer_stateless_modified-2/decode.py @@ -372,7 +372,7 @@ def save_results( for key, results in results_dict.items(): recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt" results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) logging.info(f"The transcripts are stored in {recog_path}") # The following prints out WERs, per-word error statistics and aligned @@ -384,7 +384,11 @@ def save_results( results_char.append((res[0], list("".join(res[1])), list("".join(res[2])))) with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results_char, enable_log=True + f, + f"{test_set_name}-{key}", + results_char, + enable_log=True, + compute_CER=True, ) test_set_wers[key] = wer diff --git a/egs/aishell/ASR/transducer_stateless_modified/decode.py b/egs/aishell/ASR/transducer_stateless_modified/decode.py index 0a7d87fe8d..56f3724ebe 100755 --- a/egs/aishell/ASR/transducer_stateless_modified/decode.py +++ b/egs/aishell/ASR/transducer_stateless_modified/decode.py @@ -376,7 +376,7 @@ def save_results( for key, results in results_dict.items(): recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt" results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) logging.info(f"The transcripts are stored in {recog_path}") # The following prints out WERs, per-word error statistics and aligned @@ -388,7 +388,11 @@ def save_results( results_char.append((res[0], list("".join(res[1])), list("".join(res[2])))) with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results_char, enable_log=True + f, + f"{test_set_name}-{key}", + results_char, + enable_log=True, + compute_CER=True, ) test_set_wers[key] = wer diff --git a/egs/aishell/ASR/whisper/decode.py b/egs/aishell/ASR/whisper/decode.py index 7f841dcb76..c632d0757e 100755 --- a/egs/aishell/ASR/whisper/decode.py +++ b/egs/aishell/ASR/whisper/decode.py @@ -358,7 +358,7 @@ def save_results( params.exp_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt" ) results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) if enable_log: logging.info(f"The transcripts are stored in {recog_path}") @@ -373,7 +373,11 @@ def save_results( results_char.append((res[0], list("".join(res[1])), list("".join(res[2])))) with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results_char, enable_log=enable_log + f, + f"{test_set_name}-{key}", + results_char, + enable_log=enable_log, + compute_CER=True, ) test_set_wers[key] = wer diff --git a/egs/aishell/ASR/zipformer/decode.py b/egs/aishell/ASR/zipformer/decode.py index 1968904aea..538189e52f 100755 --- a/egs/aishell/ASR/zipformer/decode.py +++ b/egs/aishell/ASR/zipformer/decode.py @@ -560,7 +560,7 @@ def save_results( params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt" ) results = sorted(results) - store_transcripts(filename=recog_path, texts=results) + store_transcripts(filename=recog_path, texts=results, char_level=True) logging.info(f"The transcripts are stored in {recog_path}") # The following prints out WERs, per-word error statistics and aligned @@ -570,7 +570,11 @@ def save_results( ) with open(errs_filename, "w") as f: wer = write_error_stats( - f, f"{test_set_name}-{key}", results, enable_log=True + f, + f"{test_set_name}-{key}", + results, + enable_log=True, + compute_CER=True, ) test_set_wers[key] = wer From f9577510abcb055f9dd2bc97cf08f5d692dfdcb5 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Mon, 18 Mar 2024 11:20:13 +0800 Subject: [PATCH 2/2] Update egs/aishell/ASR/conformer_mmi/decode.py Co-authored-by: Fangjun Kuang --- egs/aishell/ASR/conformer_mmi/decode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/aishell/ASR/conformer_mmi/decode.py b/egs/aishell/ASR/conformer_mmi/decode.py index a197a455c6..8a2daa93e8 100755 --- a/egs/aishell/ASR/conformer_mmi/decode.py +++ b/egs/aishell/ASR/conformer_mmi/decode.py @@ -448,7 +448,7 @@ def save_results( f"{test_set_name}-{key}", results_char, enable_log=enable_log, - cer=True, + compute_CER=True, ) test_set_wers[key] = wer