forked from espnet/espnet
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request espnet#4071 from roshansh-cmu/summ
Restricted Self Attention for E2E Speech Summarization
- Loading branch information
Showing
72 changed files
with
1,524 additions
and
114 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import sys | ||
import os | ||
from datasets import load_metric | ||
import numpy as np | ||
from nlgeval import compute_metrics | ||
from nlgeval import NLGEval | ||
|
||
|
||
ref_file = sys.argv[1] | ||
hyp_file = sys.argv[2] | ||
|
||
with open(ref_file, "r") as f: | ||
ref_dict = { | ||
line.strip().split(" ")[0]: " ".join(line.strip().split(" ")[1:]) | ||
for line in f.readlines() | ||
} | ||
|
||
with open(hyp_file, "r") as f: | ||
hyp_dict = { | ||
line.strip().split(" ")[0]: " ".join(line.strip().split(" ")[1:]) | ||
for line in f.readlines() | ||
} | ||
|
||
keys = [k for k, v in hyp_dict.items()] | ||
labels = [ref_dict[k] for k, _ in hyp_dict.items()] | ||
decoded_preds = [v for k, v in hyp_dict.items()] | ||
|
||
metric = load_metric("bertscore") | ||
result_bert = metric.compute( | ||
predictions=decoded_preds, | ||
references=labels, | ||
lang="en", | ||
) | ||
|
||
|
||
nlg = NLGEval() # loads the models | ||
print("Key", "\t", "METEOR", "\t", "ROUGE-L") | ||
for (key, ref, hyp) in zip(keys, labels, decoded_preds): | ||
metrics_dict = nlg.compute_individual_metrics([ref], hyp) | ||
print(key, "\t", metrics_dict["METEOR"], "\t", metrics_dict["ROUGE_L"]) | ||
refs = [[x] for x in labels] | ||
metrics_dict = nlg.compute_metrics(ref_list=[labels], hyp_list=decoded_preds) | ||
metric = load_metric("rouge") | ||
result = metric.compute(predictions=decoded_preds, references=labels) | ||
result = {key: value.mid.fmeasure * 100 for key, value in result.items()} | ||
|
||
print( | ||
f"RESULT {result['rouge1']} {result['rouge2']} {result['rougeL']} \ | ||
{metrics_dict['METEOR']*100.0} {100*np.mean(result_bert['precision'])}" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../TEMPLATE/asr1/cmd.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
## End to End Speech Recognition | ||
|
||
This recipe can be used to build E2E Speech Summarization models using restricted self-attention on the HowTo corpus of instructional videos. | ||
|
||
HowTo 2000h fbank-pitch features have been released to enable reproduction of this recipe. | ||
|
||
#Results on ASR | ||
|
||
|
||
## asr_base_conformer_lf_mix | ||
### WER | ||
|
||
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| | ||
|---|---|---|---|---|---|---|---|---| | ||
|decode_asr_model_valid.acc.best/dev5_test|3016|55215|93.1|4.8|2.1|1.9|8.8|56.7| | ||
|decode_asr_model_valid.acc.best/held_out_test|2761|47348|92.7|5.0|2.3|2.2|9.5|54.6| | ||
|
||
### CER | ||
|
||
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| | ||
|---|---|---|---|---|---|---|---|---| | ||
|decode_asr_model_valid.acc.best/dev5_test|3016|276377|97.1|1.1|1.9|1.9|4.8|56.7| | ||
|decode_asr_model_valid.acc.best/held_out_test|2761|236575|96.8|1.2|2.0|2.1|5.4|54.6| | ||
|
||
### TER | ||
|
||
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| | ||
|---|---|---|---|---|---|---|---|---| | ||
|decode_asr_model_valid.acc.best/dev5_test|3016|82484|94.1|3.5|2.4|2.2|8.0|56.7| | ||
|decode_asr_model_valid.acc.best/held_out_test|2761|70264|93.9|3.7|2.4|2.7|8.9|54.6| |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../TEMPLATE/asr1/asr.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../TEMPLATE/asr1/cmd.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
tuning/decode_ctc.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
--sample-frequency=16000 | ||
--num-mel-bins=80 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Default configuration | ||
command qsub -V -v PATH -S /bin/bash | ||
option name=* -N $0 | ||
option mem=* -l mem=$0 | ||
option mem=0 # Do not add anything to qsub_opts | ||
option num_threads=* -l ncpus=$0 | ||
option num_threads=1 # Do not add anything to qsub_opts | ||
option num_nodes=* -l nodes=$0:ppn=1 | ||
default gpu=0 | ||
option gpu=0 | ||
option gpu=* -l ngpus=$0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
--sample-frequency=16000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Default configuration | ||
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* | ||
option name=* -N $0 | ||
option mem=* -l mem_free=$0,ram_free=$0 | ||
option mem=0 # Do not add anything to qsub_opts | ||
option num_threads=* -pe smp $0 | ||
option num_threads=1 # Do not add anything to qsub_opts | ||
option max_jobs_run=* -tc $0 | ||
option num_nodes=* -pe mpi $0 # You must set this PE as allocation_rule=1 | ||
default gpu=0 | ||
option gpu=0 | ||
option gpu=* -l gpu=$0 -q g.q |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Default configuration | ||
command sbatch --export=PATH | ||
option name=* --job-name $0 | ||
option time=* --time $0 | ||
option mem=* --mem-per-cpu $0 | ||
option mem=0 | ||
option num_threads=* --cpus-per-task $0 | ||
option num_threads=1 --cpus-per-task 1 | ||
option num_nodes=* --nodes $0 | ||
default gpu=0 | ||
option gpu=0 -p cpu | ||
option gpu=* -p gpu --gres=gpu:$0 -c $0 # Recommend allocating more CPU than, or equal to the number of GPU | ||
# note: the --max-jobs-run option is supported as a special case | ||
# by slurm.pl and you don't have to handle it in the config file. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
tuning/train_asr_conformer_vid_ctc_lf.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
beam_size: 4 | ||
batch_size: 1 | ||
penalty: 0.0 | ||
minlenratio: 0.0 | ||
maxlenratio: 0.0 | ||
ctc_weight: 0.3 | ||
lm_weight: 0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
beam_size: 20 | ||
batch_size: 1 | ||
penalty: 0.1 | ||
minlenratio: 0.0 | ||
maxlenratio: 0.0 | ||
ctc_weight: 1.0 | ||
lm_weight: 0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
grad_clip: 5.0 | ||
batch_type: numel | ||
batch_bins: 150000000 | ||
accum_grad: 1 | ||
max_epoch: 400 | ||
patience: none | ||
# Use self-defined function for initialization | ||
init: xavier_uniform | ||
best_model_criterion: | ||
- - valid | ||
- acc | ||
- max | ||
keep_nbest_models: 10 | ||
|
||
input_size: 768 | ||
encoder: avhubert_pretrain | ||
encoder_conf: | ||
output_size: 768 | ||
linear_units: 3072 | ||
attention_heads: 8 | ||
num_blocks: 12 | ||
dropout_rate: 0.1 | ||
attention_dropout_rate: 0.0 | ||
dropout_input: 0.1 | ||
dropout_features: 0.1 | ||
skip_masked: false | ||
skip_nomask: false | ||
mask_prob: 0.80 | ||
extractor_mode: default | ||
conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' | ||
final_dim: 256 | ||
encoder_layerdrop: 0.05 | ||
feature_grad_mult: 0.1 | ||
untie_final_proj: true | ||
label_rate: 100 | ||
sample_rate: 16000 | ||
|
||
model_conf: | ||
lsm_weight: 0.1 | ||
length_normalized_loss: false | ||
pred_masked_weight: 1.0 | ||
pred_nomask_weight: 0.0 | ||
loss_weights: 10.0 | ||
|
||
optim: adam | ||
optim_conf: | ||
lr: 0.0005 | ||
scheduler: warmuplr | ||
scheduler_conf: | ||
warmup_steps: 25000 | ||
|
||
unused_parameters: true | ||
|
||
frontend: null | ||
|
||
normalize: null | ||
|
||
specaug: null |
Oops, something went wrong.