diff --git a/ci/install.sh b/ci/install.sh index eeb531d7ddd..5bfed7584ad 100755 --- a/ci/install.sh +++ b/ci/install.sh @@ -21,7 +21,7 @@ ${CXX:-g++} -v . ./activate_python.sh make TH_VERSION="${TH_VERSION}" - make warp-ctc.done warp-transducer.done chainer_ctc.done nkf.done moses.done mwerSegmenter.done pesq pyopenjtalk.done py3mmseg.done s3prl.done transformers.done phonemizer.done fairseq.done k2.done gtn.done + make warp-ctc.done warp-transducer.done chainer_ctc.done nkf.done moses.done mwerSegmenter.done pesq pyopenjtalk.done py3mmseg.done s3prl.done transformers.done phonemizer.done fairseq.done k2.done gtn.done longformer.done rm -rf kaldi ) . tools/activate_python.sh diff --git a/egs2/TEMPLATE/asr1/pyscripts/utils/score_summarization.py b/egs2/TEMPLATE/asr1/pyscripts/utils/score_summarization.py new file mode 100644 index 00000000000..35202f1ce88 --- /dev/null +++ b/egs2/TEMPLATE/asr1/pyscripts/utils/score_summarization.py @@ -0,0 +1,50 @@ +import sys +import os +from datasets import load_metric +import numpy as np +from nlgeval import compute_metrics +from nlgeval import NLGEval + + +ref_file = sys.argv[1] +hyp_file = sys.argv[2] + +with open(ref_file, "r") as f: + ref_dict = { + line.strip().split(" ")[0]: " ".join(line.strip().split(" ")[1:]) + for line in f.readlines() + } + +with open(hyp_file, "r") as f: + hyp_dict = { + line.strip().split(" ")[0]: " ".join(line.strip().split(" ")[1:]) + for line in f.readlines() + } + +keys = [k for k, v in hyp_dict.items()] +labels = [ref_dict[k] for k, _ in hyp_dict.items()] +decoded_preds = [v for k, v in hyp_dict.items()] + +metric = load_metric("bertscore") +result_bert = metric.compute( + predictions=decoded_preds, + references=labels, + lang="en", +) + + +nlg = NLGEval() # loads the models +print("Key", "\t", "METEOR", "\t", "ROUGE-L") +for (key, ref, hyp) in zip(keys, labels, decoded_preds): + metrics_dict = nlg.compute_individual_metrics([ref], hyp) + print(key, "\t", metrics_dict["METEOR"], "\t", metrics_dict["ROUGE_L"]) +refs = [[x] for x in labels] +metrics_dict = nlg.compute_metrics(ref_list=[labels], hyp_list=decoded_preds) +metric = load_metric("rouge") +result = metric.compute(predictions=decoded_preds, references=labels) +result = {key: value.mid.fmeasure * 100 for key, value in result.items()} + +print( + f"RESULT {result['rouge1']} {result['rouge2']} {result['rougeL']} \ + {metrics_dict['METEOR']*100.0} {100*np.mean(result_bert['precision'])}" +) diff --git a/egs2/TEMPLATE/asr1/scripts/utils/show_asr_result.sh b/egs2/TEMPLATE/asr1/scripts/utils/show_asr_result.sh index afa768bf5d5..9b8abb9d658 100755 --- a/egs2/TEMPLATE/asr1/scripts/utils/show_asr_result.sh +++ b/egs2/TEMPLATE/asr1/scripts/utils/show_asr_result.sh @@ -44,7 +44,16 @@ cat << EOF EOF while IFS= read -r expdir; do - if ls "${expdir}"/*/*/score_*/result.txt &> /dev/null; then + + if ls "${expdir}"/*/*/result.sum &> /dev/null; then + echo "## $(basename ${expdir})" + cat << EOF +|dataset|ROUGE-1|ROUGE-2|ROUGE-L|METEOR|BERTScore| +|---|---|---|---|---|---| +EOF + grep -H -e "RESULT" "${expdir}"/*/*/result.sum | sed 's=RESULT==g' | cut -d ' ' -f 1,2- | tr ' ' '|' + echo + elif ls "${expdir}"/*/*/score_*/result.txt &> /dev/null; then echo "## $(basename ${expdir})" for type in wer cer ter; do cat << EOF diff --git a/egs2/how2/asr1/cmd.sh b/egs2/how2/asr1/cmd.sh deleted file mode 100644 index 2aae6919fef..00000000000 --- a/egs2/how2/asr1/cmd.sh +++ /dev/null @@ -1,110 +0,0 @@ -# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ====== -# Usage: .pl [options] JOB=1: -# e.g. -# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB -# -# Options: -# --time