From 15cee34c3006d4bc50f86ed1d56636df92b0ff89 Mon Sep 17 00:00:00 2001 From: Brian Yan Date: Mon, 7 Mar 2022 17:05:42 -0500 Subject: [PATCH 1/7] fix bug in mt/st templates for having separate token lists --- egs2/TEMPLATE/mt1/mt.sh | 2 +- egs2/TEMPLATE/st1/st.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/egs2/TEMPLATE/mt1/mt.sh b/egs2/TEMPLATE/mt1/mt.sh index 6164c155558..775193768f2 100755 --- a/egs2/TEMPLATE/mt1/mt.sh +++ b/egs2/TEMPLATE/mt1/mt.sh @@ -299,7 +299,7 @@ if "${token_joint}"; then src_bpetoken_list="${tgt_bpetoken_list}" src_chartoken_list="${tgt_chartoken_list}" else - src_bpedir="${token_listdir}/src_bpe_${tgt_bpemode}${tgt_nbpe}" + src_bpedir="${token_listdir}/src_bpe_${src_bpemode}${src_nbpe}" src_bpeprefix="${src_bpedir}"/bpe src_bpemodel="${src_bpeprefix}".model src_bpetoken_list="${src_bpedir}"/tokens.txt diff --git a/egs2/TEMPLATE/st1/st.sh b/egs2/TEMPLATE/st1/st.sh index 93ffe4d3cf5..00861c0d551 100755 --- a/egs2/TEMPLATE/st1/st.sh +++ b/egs2/TEMPLATE/st1/st.sh @@ -335,7 +335,7 @@ if "${token_joint}"; then src_bpetoken_list="${tgt_bpetoken_list}" src_chartoken_list="${tgt_chartoken_list}" else - src_bpedir="${token_listdir}/src_bpe_${tgt_bpemode}${tgt_nbpe}" + src_bpedir="${token_listdir}/src_bpe_${src_bpemode}${src_nbpe}" src_bpeprefix="${src_bpedir}"/bpe src_bpemodel="${src_bpeprefix}".model src_bpetoken_list="${src_bpedir}"/tokens.txt From 313ee817585f315035e41817988f7527af91bdf9 Mon Sep 17 00:00:00 2001 From: Brian Yan Date: Mon, 7 Mar 2022 22:48:07 -0500 Subject: [PATCH 2/7] fix bug for having separate src and tgt vocab sizes; we need separate sos and eos ids --- espnet2/st/espnet_model.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/espnet2/st/espnet_model.py b/espnet2/st/espnet_model.py index f4d59d1a0cc..ab8fc9e915f 100644 --- a/espnet2/st/espnet_model.py +++ b/espnet2/st/espnet_model.py @@ -78,6 +78,8 @@ def __init__( # note that eos is the same as sos (equivalent ID) self.sos = vocab_size - 1 self.eos = vocab_size - 1 + self.src_sos = src_vocab_size - 1 + self.src_eos = src_vocab_size - 1 self.vocab_size = vocab_size self.src_vocab_size = src_vocab_size self.ignore_id = ignore_id @@ -409,7 +411,7 @@ def _calc_asr_att_loss( ys_pad: torch.Tensor, ys_pad_lens: torch.Tensor, ): - ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos, self.ignore_id) + ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.src_sos, self.src_eos, self.ignore_id) ys_in_lens = ys_pad_lens + 1 # 1. Forward decoder @@ -420,7 +422,7 @@ def _calc_asr_att_loss( # 2. Compute attention loss loss_att = self.criterion_asr(decoder_out, ys_out_pad) acc_att = th_accuracy( - decoder_out.view(-1, self.vocab_size), + decoder_out.view(-1, self.src_vocab_size), ys_out_pad, ignore_label=self.ignore_id, ) From 60d13e55e5a927735fd14bcb41bbe1969d012825 Mon Sep 17 00:00:00 2001 From: Brian Yan Date: Mon, 7 Mar 2022 23:04:43 -0500 Subject: [PATCH 3/7] add show st result --- .../asr1/scripts/utils/show_st_result.sh | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100755 egs2/TEMPLATE/asr1/scripts/utils/show_st_result.sh diff --git a/egs2/TEMPLATE/asr1/scripts/utils/show_st_result.sh b/egs2/TEMPLATE/asr1/scripts/utils/show_st_result.sh new file mode 100755 index 00000000000..1b7f6714293 --- /dev/null +++ b/egs2/TEMPLATE/asr1/scripts/utils/show_st_result.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +mindepth=0 +maxdepth=3 +case=tc + +. utils/parse_options.sh + +if [ $# -gt 1 ]; then + echo "Usage: $0 --mindepth 0 --maxdepth 1 [exp]" 1>&2 + echo "" + echo "Show the system environments and the evaluation results in Markdown format." + echo 'The default of is "exp/".' + exit 1 +fi + +[ -f ./path.sh ] && . ./path.sh +set -euo pipefail +if [ $# -eq 1 ]; then + exp=$1 +else + exp=exp +fi + + +cat << EOF + +# RESULTS +## Environments +- date: \`$(LC_ALL=C date)\` +EOF + +python3 << EOF +import sys, espnet, torch +pyversion = sys.version.replace('\n', ' ') + +print(f"""- python version: \`{pyversion}\` +- espnet version: \`espnet {espnet.__version__}\` +- pytorch version: \`pytorch {torch.__version__}\`""") +EOF + +cat << EOF +- Git hash: \`$(git rev-parse HEAD)\` + - Commit date: \`$(git log -1 --format='%cd')\` + +EOF + +while IFS= read -r expdir; do + if ls "${expdir}"/*/*/score_*/result.${case}.txt &> /dev/null; then + echo "## $(basename ${expdir})" + for type in bleu; do + cat << EOF +### ${type^^} + +|dataset|bleu_score|verbose_score| +|---|---|---| +EOF + data=$(echo "${expdir}"/*/*/score_*/result.${case}.txt | cut -d '/' -f4) + bleu=$(sed -n '5p' "${expdir}"/*/*/score_*/result.${case}.txt | cut -d ' ' -f 3 | tr -d '[,]') + verbose=$(sed -n '7p' "${expdir}"/*/*/score_*/result.${case}.txt | cut -d ' ' -f 3- | tr -d '[",]') + echo "${data}|${bleu}|${verbose}" + + done + fi + +done < <(find ${exp} -mindepth ${mindepth} -maxdepth ${maxdepth} -type d) From 88be0d6cca8a26783aa31dee85ead3b1605523ec Mon Sep 17 00:00:00 2001 From: Brian Yan Date: Fri, 18 Mar 2022 17:54:58 -0400 Subject: [PATCH 4/7] rename show_translation_result --- .../scripts/utils/show_translation_result.sh | 65 +++++++++++++++++++ egs2/TEMPLATE/st1/st.sh | 2 +- 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100755 egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh diff --git a/egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh b/egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh new file mode 100755 index 00000000000..1b7f6714293 --- /dev/null +++ b/egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +mindepth=0 +maxdepth=3 +case=tc + +. utils/parse_options.sh + +if [ $# -gt 1 ]; then + echo "Usage: $0 --mindepth 0 --maxdepth 1 [exp]" 1>&2 + echo "" + echo "Show the system environments and the evaluation results in Markdown format." + echo 'The default of is "exp/".' + exit 1 +fi + +[ -f ./path.sh ] && . ./path.sh +set -euo pipefail +if [ $# -eq 1 ]; then + exp=$1 +else + exp=exp +fi + + +cat << EOF + +# RESULTS +## Environments +- date: \`$(LC_ALL=C date)\` +EOF + +python3 << EOF +import sys, espnet, torch +pyversion = sys.version.replace('\n', ' ') + +print(f"""- python version: \`{pyversion}\` +- espnet version: \`espnet {espnet.__version__}\` +- pytorch version: \`pytorch {torch.__version__}\`""") +EOF + +cat << EOF +- Git hash: \`$(git rev-parse HEAD)\` + - Commit date: \`$(git log -1 --format='%cd')\` + +EOF + +while IFS= read -r expdir; do + if ls "${expdir}"/*/*/score_*/result.${case}.txt &> /dev/null; then + echo "## $(basename ${expdir})" + for type in bleu; do + cat << EOF +### ${type^^} + +|dataset|bleu_score|verbose_score| +|---|---|---| +EOF + data=$(echo "${expdir}"/*/*/score_*/result.${case}.txt | cut -d '/' -f4) + bleu=$(sed -n '5p' "${expdir}"/*/*/score_*/result.${case}.txt | cut -d ' ' -f 3 | tr -d '[,]') + verbose=$(sed -n '7p' "${expdir}"/*/*/score_*/result.${case}.txt | cut -d ' ' -f 3- | tr -d '[",]') + echo "${data}|${bleu}|${verbose}" + + done + fi + +done < <(find ${exp} -mindepth ${mindepth} -maxdepth ${maxdepth} -type d) diff --git a/egs2/TEMPLATE/st1/st.sh b/egs2/TEMPLATE/st1/st.sh index 00861c0d551..dc84ce32c25 100755 --- a/egs2/TEMPLATE/st1/st.sh +++ b/egs2/TEMPLATE/st1/st.sh @@ -1551,7 +1551,7 @@ if ! "${skip_eval}"; then done # Show results in Markdown syntax - scripts/utils/show_st_result.sh --case $tgt_case "${st_exp}" > "${st_exp}"/RESULTS.md + scripts/utils/show_translation_result.sh --case $tgt_case "${st_exp}" > "${st_exp}"/RESULTS.md cat "${cat_exp}"/RESULTS.md fi else From 33d6ac17c10848a460232787fb5d521e4bcd05b3 Mon Sep 17 00:00:00 2001 From: Brian Yan Date: Mon, 4 Apr 2022 12:04:05 -0400 Subject: [PATCH 5/7] fix ci --- .../asr1/scripts/utils/show_st_result.sh | 65 ------------------- .../scripts/utils/show_translation_result.sh | 4 +- 2 files changed, 2 insertions(+), 67 deletions(-) delete mode 100755 egs2/TEMPLATE/asr1/scripts/utils/show_st_result.sh diff --git a/egs2/TEMPLATE/asr1/scripts/utils/show_st_result.sh b/egs2/TEMPLATE/asr1/scripts/utils/show_st_result.sh deleted file mode 100755 index 1b7f6714293..00000000000 --- a/egs2/TEMPLATE/asr1/scripts/utils/show_st_result.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash -mindepth=0 -maxdepth=3 -case=tc - -. utils/parse_options.sh - -if [ $# -gt 1 ]; then - echo "Usage: $0 --mindepth 0 --maxdepth 1 [exp]" 1>&2 - echo "" - echo "Show the system environments and the evaluation results in Markdown format." - echo 'The default of is "exp/".' - exit 1 -fi - -[ -f ./path.sh ] && . ./path.sh -set -euo pipefail -if [ $# -eq 1 ]; then - exp=$1 -else - exp=exp -fi - - -cat << EOF - -# RESULTS -## Environments -- date: \`$(LC_ALL=C date)\` -EOF - -python3 << EOF -import sys, espnet, torch -pyversion = sys.version.replace('\n', ' ') - -print(f"""- python version: \`{pyversion}\` -- espnet version: \`espnet {espnet.__version__}\` -- pytorch version: \`pytorch {torch.__version__}\`""") -EOF - -cat << EOF -- Git hash: \`$(git rev-parse HEAD)\` - - Commit date: \`$(git log -1 --format='%cd')\` - -EOF - -while IFS= read -r expdir; do - if ls "${expdir}"/*/*/score_*/result.${case}.txt &> /dev/null; then - echo "## $(basename ${expdir})" - for type in bleu; do - cat << EOF -### ${type^^} - -|dataset|bleu_score|verbose_score| -|---|---|---| -EOF - data=$(echo "${expdir}"/*/*/score_*/result.${case}.txt | cut -d '/' -f4) - bleu=$(sed -n '5p' "${expdir}"/*/*/score_*/result.${case}.txt | cut -d ' ' -f 3 | tr -d '[,]') - verbose=$(sed -n '7p' "${expdir}"/*/*/score_*/result.${case}.txt | cut -d ' ' -f 3- | tr -d '[",]') - echo "${data}|${bleu}|${verbose}" - - done - fi - -done < <(find ${exp} -mindepth ${mindepth} -maxdepth ${maxdepth} -type d) diff --git a/egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh b/egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh index 1b7f6714293..09ed4abccdf 100755 --- a/egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh +++ b/egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh @@ -55,8 +55,8 @@ while IFS= read -r expdir; do |---|---|---| EOF data=$(echo "${expdir}"/*/*/score_*/result.${case}.txt | cut -d '/' -f4) - bleu=$(sed -n '5p' "${expdir}"/*/*/score_*/result.${case}.txt | cut -d ' ' -f 3 | tr -d '[,]') - verbose=$(sed -n '7p' "${expdir}"/*/*/score_*/result.${case}.txt | cut -d ' ' -f 3- | tr -d '[",]') + bleu=$(sed -n '5p' "${expdir}"/*/*/score_*/result.${case}.txt | cut -d ' ' -f 3 | tr -d ',') + verbose=$(sed -n '7p' "${expdir}"/*/*/score_*/result.${case}.txt | cut -d ' ' -f 3- | tr -d '",') echo "${data}|${bleu}|${verbose}" done From 7b1c3e4c69eff66c8ccd2272fb5ad5db05872491 Mon Sep 17 00:00:00 2001 From: Brian Yan Date: Wed, 6 Apr 2022 10:08:25 -0400 Subject: [PATCH 6/7] fix bash --- egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh b/egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh index 09ed4abccdf..c1c1bdf0882 100755 --- a/egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh +++ b/egs2/TEMPLATE/asr1/scripts/utils/show_translation_result.sh @@ -44,10 +44,12 @@ cat << EOF EOF +metrics="bleu" + while IFS= read -r expdir; do if ls "${expdir}"/*/*/score_*/result.${case}.txt &> /dev/null; then echo "## $(basename ${expdir})" - for type in bleu; do + for type in $metrics; do cat << EOF ### ${type^^} From eefbe01d6ad127663bf5237ac5a24cd681ce2fb1 Mon Sep 17 00:00:00 2001 From: Brian Yan Date: Tue, 12 Apr 2022 15:35:42 -0400 Subject: [PATCH 7/7] black --- espnet2/st/espnet_model.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/espnet2/st/espnet_model.py b/espnet2/st/espnet_model.py index ab8fc9e915f..eb4a707f6ca 100644 --- a/espnet2/st/espnet_model.py +++ b/espnet2/st/espnet_model.py @@ -411,7 +411,9 @@ def _calc_asr_att_loss( ys_pad: torch.Tensor, ys_pad_lens: torch.Tensor, ): - ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.src_sos, self.src_eos, self.ignore_id) + ys_in_pad, ys_out_pad = add_sos_eos( + ys_pad, self.src_sos, self.src_eos, self.ignore_id + ) ys_in_lens = ys_pad_lens + 1 # 1. Forward decoder