diff --git a/egs/librispeech/s5/local/data_prep.sh b/egs/librispeech/s5/local/data_prep.sh index 5a264a07464..dea93525e28 100755 --- a/egs/librispeech/s5/local/data_prep.sh +++ b/egs/librispeech/s5/local/data_prep.sh @@ -33,7 +33,7 @@ utt2spk=$dst/utt2spk; [[ -f "$utt2spk" ]] && rm $utt2spk spk2gender=$dst/spk2gender; [[ -f $spk2gender ]] && rm $spk2gender utt2dur=$dst/utt2dur; [[ -f "$utt2dur" ]] && rm $utt2dur -for reader_dir in $(find $src -mindepth 1 -maxdepth 1 -type d | sort); do +for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do reader=$(basename $reader_dir) if ! [ $reader -eq $reader ]; then # not integer. echo "$0: unexpected subdirectory name $reader" @@ -53,7 +53,7 @@ for reader_dir in $(find $src -mindepth 1 -maxdepth 1 -type d | sort); do exit 1; fi - find $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \ + find -L $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \ awk -v "dir=$chapter_dir" '{printf "%s flac -c -d -s %s/%s.flac |\n", $0, dir, $0}' >>$wav_scp|| exit 1 chapter_trans=$chapter_dir/${reader}-${chapter}.trans.txt diff --git a/egs/mini_librispeech/s5/RESULTS b/egs/mini_librispeech/s5/RESULTS new file mode 100755 index 00000000000..463c059bdbb --- /dev/null +++ b/egs/mini_librispeech/s5/RESULTS @@ -0,0 +1,22 @@ +#!/bin/bash + +for x in exp/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done + +for x in exp/chain/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done +exit 0 + +# Results on on dev_clean_2 +%WER 49.18 [ 9903 / 20138, 439 ins, 2332 del, 7132 sub ] exp/mono/decode_nosp_tgsmall_dev_clean_2/wer_8_0.0 +%WER 20.42 [ 4113 / 20138, 469 ins, 545 del, 3099 sub ] exp/tri1/decode_nosp_tglarge_dev_clean_2/wer_14_0.0 +%WER 24.56 [ 4945 / 20138, 447 ins, 792 del, 3706 sub ] exp/tri1/decode_nosp_tgmed_dev_clean_2/wer_14_0.0 +%WER 27.37 [ 5512 / 20138, 425 ins, 969 del, 4118 sub ] exp/tri1/decode_nosp_tgsmall_dev_clean_2/wer_14_0.0 +%WER 18.59 [ 3743 / 20138, 435 ins, 517 del, 2791 sub ] exp/tri2b/decode_nosp_tglarge_dev_clean_2/wer_15_0.0 +%WER 22.06 [ 4443 / 20138, 400 ins, 748 del, 3295 sub ] exp/tri2b/decode_nosp_tgmed_dev_clean_2/wer_15_0.0 +%WER 24.32 [ 4898 / 20138, 413 ins, 899 del, 3586 sub ] exp/tri2b/decode_nosp_tgsmall_dev_clean_2/wer_15_0.0 +%WER 13.45 [ 2708 / 20138, 358 ins, 330 del, 2020 sub ] exp/tri3b/decode_nosp_tglarge_dev_clean_2/wer_17_0.0 +%WER 16.25 [ 3273 / 20138, 332 ins, 485 del, 2456 sub ] exp/tri3b/decode_nosp_tgmed_dev_clean_2/wer_16_0.0 +%WER 18.10 [ 3645 / 20138, 332 ins, 603 del, 2710 sub ] exp/tri3b/decode_nosp_tgsmall_dev_clean_2/wer_16_0.0 + + +%WER 18.58 [ 3742 / 20138, 366 ins, 763 del, 2613 sub ] exp/chain/tdnn1a_sp/decode_tgsmall_dev_clean_2/wer_10_0.0 +%WER 13.35 [ 2689 / 20138, 318 ins, 491 del, 1880 sub ] exp/chain/tdnn1a_sp/decode_tglarge_dev_clean_2/wer_9_0.5 diff --git a/egs/mini_librispeech/s5/cmd.sh b/egs/mini_librispeech/s5/cmd.sh new file mode 100644 index 00000000000..71dd849a93b --- /dev/null +++ b/egs/mini_librispeech/s5/cmd.sh @@ -0,0 +1,15 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +export train_cmd="queue.pl --mem 2G" +export decode_cmd="queue.pl --mem 4G" +export mkgraph_cmd="queue.pl --mem 8G" diff --git a/egs/mini_librispeech/s5/conf/mfcc.conf b/egs/mini_librispeech/s5/conf/mfcc.conf new file mode 100644 index 00000000000..7361509099f --- /dev/null +++ b/egs/mini_librispeech/s5/conf/mfcc.conf @@ -0,0 +1 @@ +--use-energy=false # only non-default option. diff --git a/egs/mini_librispeech/s5/conf/mfcc_hires.conf b/egs/mini_librispeech/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..434834a6725 --- /dev/null +++ b/egs/mini_librispeech/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/mini_librispeech/s5/conf/online_cmvn.conf b/egs/mini_librispeech/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/mini_librispeech/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/mini_librispeech/s5/local/chain/compare_wer.sh b/egs/mini_librispeech/s5/local/chain/compare_wer.sh new file mode 100755 index 00000000000..cd6be14ed88 --- /dev/null +++ b/egs/mini_librispeech/s5/local/chain/compare_wer.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3} + + +if [ $# == 0 ]; then + echo "Usage: $0: [--looped] [--online] [ ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev_clean_2 (tgsmall) " + "#WER dev_clean_2 (tglarge) ") + +for n in 0 1; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2) + + wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo diff --git a/egs/mini_librispeech/s5/local/chain/run_tdnn.sh b/egs/mini_librispeech/s5/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..34499362831 --- /dev/null +++ b/egs/mini_librispeech/s5/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1a.sh \ No newline at end of file diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..642c20ec191 --- /dev/null +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,298 @@ +#!/bin/bash + +# This is a basic TDNN experiment. + +# steps/info/chain_dir_info.pl exp/chain/tdnn1a_sp +# exp/chain/tdnn1a_sp: num-iters=6 nj=2..5 num-params=7.0M dim=40+100->2309 combine=-0.072->-0.069 xent:train/valid[3,5,final]=(-2.10,-1.62,-1.48/-2.26,-1.85,-1.77) logprob:train/valid[3,5,final]=(-0.096,-0.069,-0.060/-0.124,-0.107,-0.104) + +# local/chain/compare_wer.sh --online exp/chain/tdnn1a_sp +# System tdnn1a_sp +#WER dev_clean_2 (tgsmall) 18.58 +# [online:] 18.49 +#WER dev_clean_2 (tglarge) 13.35 +# [online:] 13.47 +# Final train prob -0.0596 +# Final valid prob -0.1036 +# Final train prob (xent) -1.4843 +# Final valid prob (xent) -1.7723 + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +decode_nj=10 +train_set=train_clean_5 +test_sets=dev_clean_2 +gmm=tri3b +nnet3_affix= + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +affix=1a # affix for the TDNN directory name +tree_affix= +train_stage=-10 +get_egs_stage=-10 +decode_iter= + +# training options +# training chunk-options +chunk_width=140,100,160 +# we don't need extra left/right context for TDNN systems. +chunk_left_context=0 +chunk_right_context=0 +common_egs_dir= +xent_regularize=0.1 + +# training options +srand=0 +remove_egs=true +reporting_email= + +#decode options +test_online_decoding=true # if true, it will run the last decoding stage. + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 11 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 12 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + + +if [ $stage -le 13 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0) + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 14 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient=0.1 \ + --chain.l2-regularize=0.00005 \ + --chain.apply-deriv-weights=false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=4 \ + --trainer.frames-per-iter=3000000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=5 \ + --trainer.optimization.initial-effective-lrate=0.001 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.num-chunk-per-minibatch=256,128,64 \ + --trainer.optimization.momentum=0.0 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --egs.opts="--frames-overlap-per-eg 0" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --tree-dir=$tree_dir \ + --lat-dir=$lat_dir \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 15 ]; then + # Note: it's not important to give mkgraph.sh the lang directory with the + # matched topology (since it gets the topology file from the model). + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test_tgsmall \ + $tree_dir $tree_dir/graph_tgsmall || exit 1; +fi + +if [ $stage -le 16 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l [ ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev_clean_2 (tgsmall) " + "#WER dev_clean_2 (tglarge) ") + +for n in 0 1; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2) + + wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo diff --git a/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh b/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..82bb46d64a9 --- /dev/null +++ b/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +set -euo pipefail + +# This script is called from local/nnet3/run_tdnn.sh and +# local/chain/run_tdnn.sh (and may eventually be called by more +# scripts). It contains the common feature preparation and +# iVector-related parts of the script. See those scripts for examples +# of usage. + +stage=0 +train_set=train_clean_5 +test_sets="dev_clean_2" +gmm=tri3b + +nnet3_affix= + +. ./cmd.sh +. ./path.sh +. utils/parse_options.sh + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + +if [ $stage -le 1 ]; then + # Although the nnet will be trained by high resolution data, we still have to + # perturb the normal data to get the alignment _sp stands for speed-perturbed + echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp + echo "$0: making MFCC features for low-resolution speed-perturbed data" + steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/${train_set}_sp || exit 1; + steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1; + utils/fix_data_dir.sh data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: aligning with the perturbed low-resolution data" + steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \ + data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1 +fi + +if [ $stage -le 3 ]; then + # Create high-resolution MFCC features (with 40 cepstra instead of 13). + # this shows how you can split across multiple file-systems. + echo "$0: creating high-resolution MFCC features" + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b1{5,6,7,8}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp ${test_sets}; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1; + + for datadir in ${train_set}_sp ${test_sets}; do + steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires || exit 1; + steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1; + utils/fix_data_dir.sh data/${datadir}_hires || exit 1; + done +fi + +if [ $stage -le 4 ]; then + echo "$0: computing a subset of data to train the diagonal UBM." + # We'll use about a quarter of the data. + mkdir -p exp/nnet3${nnet3_affix}/diag_ubm + temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm + + num_utts_total=$(wc -l $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + relu-renorm-layer name=tdnn1 dim=520 + relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 11 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value=0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=$lang \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 12 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + relu-renorm-layer name=tdnn1 dim=520 + relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 11 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value=0.99 \ + --trainer.dropout-schedule="$dropout_schedule" \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=$lang \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 12 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab \ + ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1; +done + +# Note: the double level of quoting for the sed command +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \ + cat $dir/scoring/LMWT.$wip.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; +done + +exit 0; diff --git a/egs/mini_librispeech/s5/local/subset_dataset.sh b/egs/mini_librispeech/s5/local/subset_dataset.sh new file mode 100755 index 00000000000..050128247a4 --- /dev/null +++ b/egs/mini_librispeech/s5/local/subset_dataset.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright 2017 Luminar Technologies, Inc. (author: Daniel Galvez) +# Apache 2.0 + +# The following commands were used to generate the mini_librispeech dataset: +# +# Note that data generation is random. This could be fixed by +# providing a seed argument to the shuf program. + +if [ "$#" -ne 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 /export/a05/dgalvez/LibriSpeech/train-clean-100 \\ + /export/a05/dgalvez/LibriSpeech/train-clean-5 5" + exit 1 +fi + +src_dir=$1 +dest_dir=$2 +dest_num_hours=$3 + +src=$(basename $src_dir) +dest=$(basename $dest_dir) +librispeech_dir=$(dirname $src_dir) + +# TODO: Possibly improve this to ensure gender balance and speaker +# balance. +# TODO: Use actual time values instead of assuming that to make sure we get $dest_num_hours of data +src_num_hours=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | awk -F'|' '{ print $3 }' | \ +python -c ' +from __future__ import print_function +from sys import stdin +minutes_str = stdin.read().split() +print(int(round(sum([float(minutes) for minutes in minutes_str]) / 60.0)))') +src_num_chapters=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | \ + awk -F'|' '{ print $1 }' | sort -u | wc -l) +mkdir -p data/subset_tmp +grep "$src" $librispeech_dir/CHAPTERS.TXT | \ + awk -F'|' '{ print $1 }' | \ + shuf -n $(((dest_num_hours * src_num_chapters) / src_num_hours)) > \ + data/subset_tmp/${dest}_chapter_id_list.txt + +while read -r chapter_id || [[ -n "$chapter_id" ]]; do + chapter_dir=$(find $src_dir/ -mindepth 2 -name "$chapter_id" -type d) + speaker_id=$(basename $(dirname $chapter_dir)) + mkdir -p $dest_dir/$speaker_id/ + cp -r $chapter_dir $dest_dir/$speaker_id/ +done < data/subset_tmp/${dest}_chapter_id_list.txt diff --git a/egs/mini_librispeech/s5/path.sh b/egs/mini_librispeech/s5/path.sh new file mode 100644 index 00000000000..705600ad47a --- /dev/null +++ b/egs/mini_librispeech/s5/path.sh @@ -0,0 +1,8 @@ +export KALDI_ROOT=`pwd`/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C + +# For now, don't include any of the optional dependenices of the main +# librispeech recipe diff --git a/egs/mini_librispeech/s5/run.sh b/egs/mini_librispeech/s5/run.sh new file mode 100755 index 00000000000..bf1ded337f6 --- /dev/null +++ b/egs/mini_librispeech/s5/run.sh @@ -0,0 +1,205 @@ +#!/bin/bash + +# Note: this works only on pre-downloaded data on the CLSP servers +data=/export/a05/dgalvez/ + +data_url=www.openslr.org/resources/TODO # TODO +lm_url=www.openslr.org/resources/11 + +. ./cmd.sh +. ./path.sh + +stage=0 +. utils/parse_options.sh + +# TODO(galv): Reconsider this +set -euxo pipefail + +# TODO(galv): Modify openslr.org to contain the minified training dataset. +# for part in dev-clean-2 train-clean-5; do +# local/download_and_untar.sh $data $data_url $part +# done + +if [ $stage -le 0 ]; then + local/download_lm.sh $lm_url data/local/lm +fi + +if [ $stage -le 1 ]; then + # format the data as Kaldi data directories + for part in dev-clean-2 train-clean-5; do + # use underscore-separated names in data directories. + local/data_prep.sh $data/LibriSpeech/$part data/$(echo $part | sed s/-/_/g) + done + + local/prepare_dict.sh --stage 3 --nj 30 --cmd "$train_cmd" \ + data/local/lm data/local/lm data/local/dict_nosp + + utils/prepare_lang.sh data/local/dict_nosp \ + "" data/local/lang_tmp_nosp data/lang_nosp + + local/format_lms.sh --src-dir data/lang_nosp data/local/lm + # Create ConstArpaLm format language model for full 3-gram and 4-gram LMs + utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz \ + data/lang_nosp data/lang_nosp_test_tglarge +fi + +if [ $stage -le 2 ]; then + mfccdir=mfcc + # spread the mfccs over various machines, as this data-set is quite large. + if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then + mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename. + utils/create_split_dir.pl /export/b{07,14,16,17}/$USER/kaldi-data/egs/librispeech/s5/$mfcc/storage \ + $mfccdir/storage + fi + + for part in dev_clean_2 train_clean_5; do + steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/$part exp/make_mfcc/$part $mfccdir + steps/compute_cmvn_stats.sh data/$part exp/make_mfcc/$part $mfccdir + done + + # Get the shortest 500 utterances first because those are more likely + # to have accurate alignments. + utils/subset_data_dir.sh --shortest data/train_clean_5 500 data/train_500short +fi + +# train a monophone system +if [ $stage -le 3 ]; then + # TODO(galv): Is this too many jobs for a smaller dataset? + steps/train_mono.sh --boost-silence 1.25 --nj 5 --cmd "$train_cmd" \ + data/train_500short data/lang_nosp exp/mono + # TODO: Understand why we use lang_nosp here... + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/mono exp/mono/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/mono/graph_nosp_tgsmall \ + data/$test exp/mono/decode_nosp_tgsmall_$test + done + )& + + steps/align_si.sh --boost-silence 1.25 --nj 5 --cmd "$train_cmd" \ + data/train_clean_5 data/lang_nosp exp/mono exp/mono_ali_train_clean_5 +fi + +# train a first delta + delta-delta triphone system on all utterances +if [ $stage -le 4 ]; then + steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ + 2000 10000 data/train_clean_5 data/lang_nosp exp/mono_ali_train_clean_5 exp/tri1 + + # decode using the tri1 model + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri1 exp/tri1/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode.sh --nj 5 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgsmall \ + data/$test exp/tri1/decode_nosp_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri1/decode_nosp_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri1/decode_nosp_{tgsmall,tglarge}_$test + done + )& + + steps/align_si.sh --nj 5 --cmd "$train_cmd" \ + data/train_clean_5 data/lang_nosp exp/tri1 exp/tri1_ali_train_clean_5 +fi + +# train an LDA+MLLT system. +if [ $stage -le 5 ]; then + steps/train_lda_mllt.sh --cmd "$train_cmd" \ + --splice-opts "--left-context=3 --right-context=3" 2500 15000 \ + data/train_clean_5 data/lang_nosp exp/tri1_ali_train_clean_5 exp/tri2b + + # decode using the LDA+MLLT model + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri2b exp/tri2b/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgsmall \ + data/$test exp/tri2b/decode_nosp_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri2b/decode_nosp_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri2b/decode_nosp_{tgsmall,tglarge}_$test + done + )& + + # Align utts using the tri2b model + steps/align_si.sh --nj 5 --cmd "$train_cmd" --use-graphs true \ + data/train_clean_5 data/lang_nosp exp/tri2b exp/tri2b_ali_train_clean_5 +fi + +# Train tri3b, which is LDA+MLLT+SAT +if [ $stage -le 6 ]; then + steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \ + data/train_clean_5 data/lang_nosp exp/tri2b_ali_train_clean_5 exp/tri3b + + # decode using the tri3b model + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri3b exp/tri3b/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \ + exp/tri3b/graph_nosp_tgsmall data/$test \ + exp/tri3b/decode_nosp_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri3b/decode_nosp_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri3b/decode_nosp_{tgsmall,tglarge}_$test + done + )& +fi + +# Now we compute the pronunciation and silence probabilities from training data, +# and re-create the lang directory. +if [ $stage -le 7 ]; then + steps/get_prons.sh --cmd "$train_cmd" \ + data/train_clean_5 data/lang_nosp exp/tri3b + utils/dict_dir_add_pronprobs.sh --max-normalize true \ + data/local/dict_nosp \ + exp/tri3b/pron_counts_nowb.txt exp/tri3b/sil_counts_nowb.txt \ + exp/tri3b/pron_bigram_counts_nowb.txt data/local/dict + + utils/prepare_lang.sh data/local/dict \ + "" data/local/lang_tmp data/lang + + local/format_lms.sh --src-dir data/lang data/local/lm + + utils/build_const_arpa_lm.sh \ + data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge + + steps/align_fmllr.sh --nj 5 --cmd "$train_cmd" \ + data/train_clean_5 data/lang exp/tri3b exp/tri3b_ali_train_clean_5 +fi + + +if [ $stage -le 8 ]; then + # Test the tri3b system with the silprobs and pron-probs. + + # decode using the tri3b model + utils/mkgraph.sh data/lang_test_tgsmall \ + exp/tri3b exp/tri3b/graph_tgsmall + for test in dev_clean_2; do + steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \ + exp/tri3b/graph_tgsmall data/$test \ + exp/tri3b/decode_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test exp/tri3b/decode_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test exp/tri3b/decode_{tgsmall,tglarge}_$test + done +fi + +exit 0 # temp + +# Train a chain model +if [ $stage -le 9 ]; then + local/chain/run_tdnn.sh --stage 0 +fi + +# Don't finish until all background decoding jobs are finished. +wait diff --git a/egs/mini_librispeech/s5/steps b/egs/mini_librispeech/s5/steps new file mode 120000 index 00000000000..1b186770dd1 --- /dev/null +++ b/egs/mini_librispeech/s5/steps @@ -0,0 +1 @@ +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/mini_librispeech/s5/utils b/egs/mini_librispeech/s5/utils new file mode 120000 index 00000000000..a3279dc8679 --- /dev/null +++ b/egs/mini_librispeech/s5/utils @@ -0,0 +1 @@ +../../wsj/s5/utils/ \ No newline at end of file diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh index d874eb0986a..9d48ec7a898 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh @@ -226,7 +226,7 @@ fi if [ $stage -le 16 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage fi steps/nnet3/chain/train.py --stage=$train_stage \